improved series detection
This commit is contained in:
parent
4bb98a6518
commit
469e2984bc
@ -65,7 +65,15 @@ func Scrap(id string) (IMDbEntry, error) {
|
|||||||
var year int64 = 0
|
var year int64 = 0
|
||||||
foundJSON := false
|
foundJSON := false
|
||||||
j := IMDbJSON{}
|
j := IMDbJSON{}
|
||||||
c := colly.NewCollector(colly.MaxDepth(1), colly.AllowedDomains("www.imdb.com"))
|
c := colly.NewCollector(
|
||||||
|
colly.MaxDepth(1),
|
||||||
|
colly.AllowedDomains("www.imdb.com"),
|
||||||
|
colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"),
|
||||||
|
)
|
||||||
|
|
||||||
|
c.OnRequest(func(r *colly.Request) {
|
||||||
|
r.Headers.Set("Accept-Language", "de,en-US;q=0.7,en;q=0.3")
|
||||||
|
})
|
||||||
|
|
||||||
c.OnHTML("head", func(e *colly.HTMLElement) {
|
c.OnHTML("head", func(e *colly.HTMLElement) {
|
||||||
e.ForEach("script", func(_ int, script *colly.HTMLElement) {
|
e.ForEach("script", func(_ int, script *colly.HTMLElement) {
|
||||||
@ -145,26 +153,29 @@ func extractYearFromIMDbTitle(s string, title string) int64 {
|
|||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getSeriesIndicators() []string {
|
||||||
|
return []string{
|
||||||
|
"(Fernsehserie ",
|
||||||
|
"(Miniserie ",
|
||||||
|
"(TV Mini Series ",
|
||||||
|
"(TV Series ",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func isIMDbTitleOfSeries(title string) bool {
|
func isIMDbTitleOfSeries(title string) bool {
|
||||||
if strings.Contains(title, "(Fernsehserie ") {
|
for _, indicator := range getSeriesIndicators() {
|
||||||
return true
|
if strings.Contains(title, indicator) {
|
||||||
}
|
return true
|
||||||
|
}
|
||||||
if strings.Contains(title, "(Miniserie ") {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.Contains(title, "(TV Series ") {
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func prepareChunckOfIMDbTitleOfSeriesForYearExtraction(s string) string {
|
func prepareChunckOfIMDbTitleOfSeriesForYearExtraction(s string) string {
|
||||||
s = strings.Replace(s, "(Fernsehserie ", "(", -1)
|
for _, indicator := range getSeriesIndicators() {
|
||||||
s = strings.Replace(s, "(Miniserie ", "(", -1)
|
s = strings.Replace(s, indicator, "(", -1)
|
||||||
s = strings.Replace(s, "(TV Series ", "(", -1)
|
}
|
||||||
|
|
||||||
sSplitted := strings.Split(s, "–")
|
sSplitted := strings.Split(s, "–")
|
||||||
if len(sSplitted) != 2 {
|
if len(sSplitted) != 2 {
|
||||||
|
@ -196,7 +196,7 @@ func TestScrap(t *testing.T) {
|
|||||||
Rating: 9.1,
|
Rating: 9.1,
|
||||||
RuntimeInMins: 0,
|
RuntimeInMins: 0,
|
||||||
Title: "Rick and Morty",
|
Title: "Rick and Morty",
|
||||||
Type: "TVSeries",
|
Type: "Series",
|
||||||
Year: 2013,
|
Year: 2013,
|
||||||
},
|
},
|
||||||
"tt0092400": {
|
"tt0092400": {
|
||||||
@ -205,9 +205,18 @@ func TestScrap(t *testing.T) {
|
|||||||
Rating: 8.1,
|
Rating: 8.1,
|
||||||
RuntimeInMins: 0,
|
RuntimeInMins: 0,
|
||||||
Title: "Married with Children",
|
Title: "Married with Children",
|
||||||
Type: "TVSeries",
|
Type: "Series",
|
||||||
Year: 1987,
|
Year: 1987,
|
||||||
},
|
},
|
||||||
|
"tt7366338": {
|
||||||
|
AlternateName: "",
|
||||||
|
IMDbID: "tt7366338",
|
||||||
|
Rating: 9.3,
|
||||||
|
RuntimeInMins: 0,
|
||||||
|
Title: "Chernobyl",
|
||||||
|
Type: "Series",
|
||||||
|
Year: 2019,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for d, expectedResult := range data {
|
for d, expectedResult := range data {
|
||||||
|
Loading…
Reference in New Issue
Block a user