2023-10-27 16:03:42 +02:00
|
|
|
package useragents
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"math/rand"
|
2023-10-28 15:42:01 +02:00
|
|
|
"strings"
|
2023-10-27 16:03:42 +02:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/gocolly/colly"
|
|
|
|
)
|
|
|
|
|
2023-10-29 19:22:01 +01:00
|
|
|
type useragentEntry struct {
|
2023-10-27 16:03:42 +02:00
|
|
|
UserAgent string `json:"ua"`
|
|
|
|
Percentage float64 `json:"pct"`
|
|
|
|
}
|
|
|
|
|
2024-01-17 22:33:57 +01:00
|
|
|
var defaultUseragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.3"
|
2023-10-29 19:22:01 +01:00
|
|
|
var useragents []string = getFreshUseragents()
|
2023-10-27 16:03:42 +02:00
|
|
|
|
2023-10-29 19:22:01 +01:00
|
|
|
// GetAllUseragents returns all available useragents
|
|
|
|
func GetAllUseragents() []string {
|
|
|
|
copyOfUseragents := make([]string, len(useragents))
|
|
|
|
copy(copyOfUseragents, useragents)
|
|
|
|
|
|
|
|
return copyOfUseragents
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetRandomUseragent returns a random useragent
|
|
|
|
func GetRandomUseragent() string {
|
2023-10-27 16:03:42 +02:00
|
|
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
2023-10-29 19:22:01 +01:00
|
|
|
return useragents[r.Intn(len(useragents))]
|
2023-10-27 16:03:42 +02:00
|
|
|
}
|
|
|
|
|
2024-01-17 22:33:57 +01:00
|
|
|
// RenewUseragents scrapes a fresh list of useragents
|
|
|
|
func RenewUseragents() {
|
|
|
|
useragents = getFreshUseragents()
|
|
|
|
}
|
|
|
|
|
2023-10-29 19:22:01 +01:00
|
|
|
func getFreshUseragents() []string {
|
2023-10-27 16:03:42 +02:00
|
|
|
var freshUserAgents []string = []string{}
|
|
|
|
|
|
|
|
c := colly.NewCollector(
|
|
|
|
colly.MaxDepth(1),
|
|
|
|
colly.AllowedDomains("www.useragents.me"),
|
2023-10-29 19:22:01 +01:00
|
|
|
colly.UserAgent(defaultUseragent),
|
2023-10-27 16:03:42 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
c.OnRequest(func(r *colly.Request) {
|
|
|
|
r.Headers.Set("Accept-Language", "de,en-US;q=0.7,en;q=0.3")
|
|
|
|
})
|
|
|
|
|
|
|
|
c.OnHTML("#most-common-desktop-useragents-json-csv", func(e *colly.HTMLElement) {
|
|
|
|
e.ForEach("textarea", func(n int, textarea *colly.HTMLElement) {
|
|
|
|
if n > 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-10-29 19:22:01 +01:00
|
|
|
var rawData []useragentEntry
|
2023-10-27 16:03:42 +02:00
|
|
|
err := json.Unmarshal([]byte(textarea.Text), &rawData)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, e := range rawData {
|
2023-10-28 15:42:01 +02:00
|
|
|
if !shouldBeValidUseragent(e.UserAgent) {
|
|
|
|
continue
|
2023-10-27 16:03:42 +02:00
|
|
|
}
|
2023-10-28 15:42:01 +02:00
|
|
|
|
|
|
|
freshUserAgents = append(freshUserAgents, e.UserAgent)
|
2023-10-27 16:03:42 +02:00
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
c.Visit("https://www.useragents.me/")
|
|
|
|
|
|
|
|
if len(freshUserAgents) == 0 {
|
2023-10-29 19:22:01 +01:00
|
|
|
return []string{defaultUseragent}
|
2023-10-27 16:03:42 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return freshUserAgents
|
|
|
|
}
|
2023-10-28 15:42:01 +02:00
|
|
|
|
|
|
|
func shouldBeValidUseragent(s string) bool {
|
|
|
|
if !strings.Contains(s, "Mozilla/") {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if !strings.Contains(s, "Linux") && !strings.Contains(s, "Macintosh") && !strings.Contains(s, "Windows NT") {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if !strings.Contains(s, "AppleWebKit") && !strings.Contains(s, "Gecko") {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if !strings.Contains(s, "Firefox") && !strings.Contains(s, "Chrome") && !strings.Contains(s, "Safari") {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|