imdbs/imdb-scraper_test.go

224 lines
5.9 KiB
Go
Raw Permalink Normal View History

2023-10-01 09:00:33 +02:00
package imdbs
import "testing"
func TestBuildScrapingURL(t *testing.T) {
data := map[string]string{
"tt2911666": "https://www.imdb.com/title/tt2911666/",
"tt10366206": "https://www.imdb.com/title/tt10366206/",
}
for d, expectedResult := range data {
result := buildScrapingURL(d)
if result != expectedResult {
t.Errorf("\ngot: %q\nwanted: %q\nfor: %q", result, expectedResult, d)
}
}
}
func TestConvertIMDbRuntimeIntoMinutes(t *testing.T) {
data := map[string]int64{
2024-01-23 13:08:55 +01:00
"PT41M": 41,
"PT2H": 120,
"PT2H1M": 121,
" PT2H1M ": 0,
"PT12H1M ": 0,
"PT121M": 0,
"": 0,
2023-10-01 09:00:33 +02:00
}
for d, expectedResult := range data {
result := convertIMDbRuntimeIntoMinutes(d)
if result != expectedResult {
t.Errorf("\ngot: %d\nwanted: %d\nfor: %q", result, expectedResult, d)
}
}
}
func TestGetIMDbIDFromQuery(t *testing.T) {
data := map[string]string{
"tt2861424": "tt2861424",
2024-01-23 13:08:55 +01:00
" tt2861424 ": "tt2861424",
2023-10-01 09:00:33 +02:00
"https://www.imdb.com/title/tt2861424": "tt2861424",
"https://www.imdb.com/title/tt2861424/": "tt2861424",
"https://www.imdb.com/title/tt2861424/?ref_=vp_vi_tt": "tt2861424",
"https://m.imdb.com/title/tt2861424": "tt2861424",
"https://m.imdb.com/title/tt2861424/": "tt2861424",
"https://m.imdb.com/title/tt2861424/?ref_=vp_vi_tt": "tt2861424",
2024-01-23 13:08:55 +01:00
"https://www.google.com/title/tt2861424/": "tt2861424",
2023-10-01 09:00:33 +02:00
"https://www.imdb.com/": "",
2024-01-23 13:08:55 +01:00
"": "",
2023-10-01 09:00:33 +02:00
}
for d, expectedResult := range data {
result, _ := GetIMDbIDFromQuery(d)
if result != expectedResult {
t.Errorf("\ngot: %q\nwanted: %q\nfor: %q", result, expectedResult, d)
}
}
}
func TestExtractYearFromIMDbTitle(t *testing.T) {
type tResult struct {
Data string
Title string
ExpectedResult int64
}
data := []tResult{
{
Data: "John Wick: Kapitel 4 (2023) - IMDb",
ExpectedResult: 2023,
},
{
Data: "Matrix (1999) - IMDb",
ExpectedResult: 1999,
},
{
Data: "Thurgood (Fernsehfilm 2011) - IMDb",
ExpectedResult: 2011,
},
{
Data: "Pretty/Handsome (Fernsehfilm 2008) - IMDb",
ExpectedResult: 2008,
},
{
Data: "Red Planet: Deleted Scenes (Video 2000) - IMDb",
ExpectedResult: 2000,
},
{
Data: "Last Night in Soho: Deleted Scenes (Video 2022) - IMDb",
ExpectedResult: 2022,
},
{
Data: "Eine schrecklich nette Familie (Fernsehserie 19871997) - IMDb",
ExpectedResult: 1987,
},
{
Data: "Rick and Morty (Fernsehserie 2013 ) - IMDb",
ExpectedResult: 2013,
},
2023-10-23 23:59:20 +02:00
{
Data: "Unser Kosmos: Die Reise geht weiter (Miniserie 2014) - IMDb",
ExpectedResult: 2014,
},
2023-10-01 09:00:33 +02:00
}
for _, r := range data {
2024-01-23 13:08:55 +01:00
result := extractYearFromIMDbTitle(r.Data)
2023-10-01 09:00:33 +02:00
if result != r.ExpectedResult {
t.Errorf("\ngot: %d\nwanted: %d\nfor: %q", result, r.ExpectedResult, r.Data)
}
}
}
func TestIsValidIMDbID(t *testing.T) {
data := map[string]bool{
"tt0000000": true,
2024-01-23 13:08:55 +01:00
"tt99999999": true,
2023-10-01 09:00:33 +02:00
"tt2911666": true,
"tt10366206": true,
"tt0944947": true,
"tt11737520": true,
2024-01-23 13:08:55 +01:00
"tt291166": false, // too short
"tt103662060": false, // too long (i hear that one a lot)
"ttt1036620": false, // invalid characters
"tt2911A66": false, // invalid characters
" tt2911666 ": false, // not trimmed
"https://www.imdb.com/": false, // wtf is this
"": false, // rly?!
2023-10-01 09:00:33 +02:00
}
for d, expectedResult := range data {
result := IsValidIMDbID(d)
if result != expectedResult {
t.Errorf("\ngot: %t\nwanted: %t\nfor: %q", result, expectedResult, d)
}
}
}
func TestScrap(t *testing.T) {
data := map[string]IMDbEntry{
"tt0000000": {},
"tt2911666": {
AlternateName: "",
IMDbID: "tt2911666",
Rating: 7.4,
RuntimeInMins: 101,
Title: "John Wick",
Type: "Movie",
Year: 2014,
},
"tt7798634": {
AlternateName: "Ready or Not - Auf die Plätze, fertig, tot",
IMDbID: "tt7798634",
Rating: 6.9,
RuntimeInMins: 95,
Title: "Ready or Not",
Type: "Movie",
Year: 2019,
},
"tt0087803": {
AlternateName: "1984",
IMDbID: "tt0087803",
Rating: 7.1,
RuntimeInMins: 113,
Title: "Nineteen Eighty-Four",
Type: "Movie",
Year: 1984,
},
"tt8579674": {
AlternateName: "",
IMDbID: "tt8579674",
Rating: 8.2,
RuntimeInMins: 119,
Title: "1917",
Type: "Movie",
Year: 2019,
},
"tt2861424": {
AlternateName: "",
IMDbID: "tt2861424",
Rating: 9.1,
RuntimeInMins: 0,
Title: "Rick and Morty",
2023-10-24 00:39:04 +02:00
Type: "Series",
2023-10-01 09:00:33 +02:00
Year: 2013,
},
"tt0092400": {
AlternateName: "Eine schrecklich nette Familie",
IMDbID: "tt0092400",
Rating: 8.1,
RuntimeInMins: 0,
Title: "Married with Children",
2023-10-24 00:39:04 +02:00
Type: "Series",
2023-10-01 09:00:33 +02:00
Year: 1987,
},
2023-10-24 00:39:04 +02:00
"tt7366338": {
AlternateName: "",
IMDbID: "tt7366338",
Rating: 9.3,
RuntimeInMins: 0,
Title: "Chernobyl",
Type: "Series",
Year: 2019,
},
2023-10-25 19:00:21 +02:00
"tt0366551": {
AlternateName: "Harold & Kumar",
IMDbID: "tt0366551",
Rating: 7.0,
RuntimeInMins: 88,
Title: "Harold & Kumar Go to White Castle",
Type: "Movie",
Year: 2004,
},
2023-10-01 09:00:33 +02:00
}
for d, expectedResult := range data {
result, _ := Scrap(d)
if result != expectedResult {
t.Errorf("\ngot: %+v\nwanted: %+v\nfor: %q", result, expectedResult, d)
}
}
}