improved series detection
This commit is contained in:
parent
4bb98a6518
commit
469e2984bc
@ -65,7 +65,15 @@ func Scrap(id string) (IMDbEntry, error) {
|
||||
var year int64 = 0
|
||||
foundJSON := false
|
||||
j := IMDbJSON{}
|
||||
c := colly.NewCollector(colly.MaxDepth(1), colly.AllowedDomains("www.imdb.com"))
|
||||
c := colly.NewCollector(
|
||||
colly.MaxDepth(1),
|
||||
colly.AllowedDomains("www.imdb.com"),
|
||||
colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"),
|
||||
)
|
||||
|
||||
c.OnRequest(func(r *colly.Request) {
|
||||
r.Headers.Set("Accept-Language", "de,en-US;q=0.7,en;q=0.3")
|
||||
})
|
||||
|
||||
c.OnHTML("head", func(e *colly.HTMLElement) {
|
||||
e.ForEach("script", func(_ int, script *colly.HTMLElement) {
|
||||
@ -145,26 +153,29 @@ func extractYearFromIMDbTitle(s string, title string) int64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func getSeriesIndicators() []string {
|
||||
return []string{
|
||||
"(Fernsehserie ",
|
||||
"(Miniserie ",
|
||||
"(TV Mini Series ",
|
||||
"(TV Series ",
|
||||
}
|
||||
}
|
||||
|
||||
func isIMDbTitleOfSeries(title string) bool {
|
||||
if strings.Contains(title, "(Fernsehserie ") {
|
||||
return true
|
||||
}
|
||||
|
||||
if strings.Contains(title, "(Miniserie ") {
|
||||
return true
|
||||
}
|
||||
|
||||
if strings.Contains(title, "(TV Series ") {
|
||||
return true
|
||||
for _, indicator := range getSeriesIndicators() {
|
||||
if strings.Contains(title, indicator) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func prepareChunckOfIMDbTitleOfSeriesForYearExtraction(s string) string {
|
||||
s = strings.Replace(s, "(Fernsehserie ", "(", -1)
|
||||
s = strings.Replace(s, "(Miniserie ", "(", -1)
|
||||
s = strings.Replace(s, "(TV Series ", "(", -1)
|
||||
for _, indicator := range getSeriesIndicators() {
|
||||
s = strings.Replace(s, indicator, "(", -1)
|
||||
}
|
||||
|
||||
sSplitted := strings.Split(s, "–")
|
||||
if len(sSplitted) != 2 {
|
||||
|
@ -196,7 +196,7 @@ func TestScrap(t *testing.T) {
|
||||
Rating: 9.1,
|
||||
RuntimeInMins: 0,
|
||||
Title: "Rick and Morty",
|
||||
Type: "TVSeries",
|
||||
Type: "Series",
|
||||
Year: 2013,
|
||||
},
|
||||
"tt0092400": {
|
||||
@ -205,9 +205,18 @@ func TestScrap(t *testing.T) {
|
||||
Rating: 8.1,
|
||||
RuntimeInMins: 0,
|
||||
Title: "Married with Children",
|
||||
Type: "TVSeries",
|
||||
Type: "Series",
|
||||
Year: 1987,
|
||||
},
|
||||
"tt7366338": {
|
||||
AlternateName: "",
|
||||
IMDbID: "tt7366338",
|
||||
Rating: 9.3,
|
||||
RuntimeInMins: 0,
|
||||
Title: "Chernobyl",
|
||||
Type: "Series",
|
||||
Year: 2019,
|
||||
},
|
||||
}
|
||||
|
||||
for d, expectedResult := range data {
|
||||
|
Loading…
Reference in New Issue
Block a user