From 0b5de711c0583c6d5ff988f63c672e3c6cb21755 Mon Sep 17 00:00:00 2001 From: MewX Date: Sun, 11 Aug 2024 12:18:06 +1000 Subject: [PATCH] Upgrade the cookiestxt lib to 1.0.4 which fixed a fatal error --- go.mod | 4 ++-- go.sum | 4 ++-- util/crawlers.go | 10 ++++++---- util/files.go | 6 ++++-- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index d23a483..4ca02c0 100644 --- a/go.mod +++ b/go.mod @@ -20,8 +20,8 @@ require ( ) require ( + github.com/PuerkitoBio/goquery v1.8.0 github.com/gocolly/colly/v2 v2.1.0 github.com/golang/protobuf v1.5.2 - github.com/mengzhuo/cookiestxt v1.0.3 - github.com/PuerkitoBio/goquery v1.8.0 + github.com/mengzhuo/cookiestxt v1.0.4 ) diff --git a/go.sum b/go.sum index cf093f5..a1012ad 100644 --- a/go.sum +++ b/go.sum @@ -57,8 +57,8 @@ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg= github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= -github.com/mengzhuo/cookiestxt v1.0.3 h1:fdVqvdnxNAtZq5VJp5KuMv1xImmgs/m9bG5zNCy53M4= -github.com/mengzhuo/cookiestxt v1.0.3/go.mod h1:hK5Q6nTJi1tZ0x1Sj3kuxPYpdDPVxF0m+1ebSgBheSs= +github.com/mengzhuo/cookiestxt v1.0.4 h1:86h9CiU88KEM3bB2syZ8cJHABfrkmP4i5g0ytPUHvIs= +github.com/mengzhuo/cookiestxt v1.0.4/go.mod h1:TVR3++zJTvADjIgChwqpw5giKn3BHOATkLSSLGKHJP8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= diff --git a/util/crawlers.go b/util/crawlers.go index b0638af..0b0863a 100644 --- a/util/crawlers.go +++ b/util/crawlers.go @@ -2,14 +2,15 @@ package util import ( "flag" - "github.com/gocolly/colly/v2" - "github.com/gocolly/colly/v2/queue" - "github.com/its-my-data/doubak/proto" "log" "net" "net/http" "strings" "time" + + "github.com/gocolly/colly/v2" + "github.com/gocolly/colly/v2/queue" + "github.com/its-my-data/doubak/proto" ) const RequestTimeout = 5 * time.Minute @@ -39,10 +40,11 @@ func NewColly() *colly.Collector { } cookies = c } + log.Println("Cookies: ", cookies) c := colly.NewCollector( colly.MaxDepth(1), - colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.61"), + colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"), ) c.OnError(func(r *colly.Response, err error) { diff --git a/util/files.go b/util/files.go index e265a99..e5d55d9 100644 --- a/util/files.go +++ b/util/files.go @@ -3,8 +3,6 @@ package util import ( "flag" "fmt" - "github.com/its-my-data/doubak/proto" - "github.com/mengzhuo/cookiestxt" "html" "io/fs" "log" @@ -12,6 +10,9 @@ import ( "os" "path/filepath" "strings" + + "github.com/its-my-data/doubak/proto" + "github.com/mengzhuo/cookiestxt" ) const CollectorPathPrefix = "collector/" @@ -88,6 +89,7 @@ func LoadCookiesFile(filePath string) ([]*http.Cookie, error) { func LoadCookiesFileToString(filePath string) (string, error) { cookies, err := LoadCookiesFile(filePath) if err != nil { + log.Fatal(err) return "", nil }