Skip to content

Commit

Permalink
Merge pull request #630 from ErikOwen/patch/respect-disable-redirects…
Browse files Browse the repository at this point in the history
…-flag

Patch: update crawling to not follow redirects when `-disable-redirects` is set
  • Loading branch information
Mzack9999 authored Oct 31, 2023
2 parents 201415a + 19db3cf commit 659a1f8
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 1 deletion.
3 changes: 3 additions & 0 deletions pkg/engine/common/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ func (s *Shared) Do(crawlSession *CrawlSession, doRequest DoRequestFunc) error {
if resp.Resp == nil || resp.Reader == nil {
return
}
if s.Options.Options.DisableRedirects && resp.IsRedirect() {
return
}

navigationRequests := parser.ParseResponse(resp)
s.Enqueue(crawlSession.Queue, navigationRequests...)
Expand Down
5 changes: 5 additions & 0 deletions pkg/engine/hybrid/crawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,11 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re
// process the raw response
navigationRequests := parser.ParseResponse(resp)
c.Enqueue(s.Queue, navigationRequests...)

// do not continue following the request if it's a redirect and redirects are disabled
if c.Options.Options.DisableRedirects && resp.IsRedirect() {
return nil
}
return FetchContinueRequest(page, e)
})() //nolint
defer func() {
Expand Down
4 changes: 3 additions & 1 deletion pkg/engine/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ var responseParsers = []responseParser{
// Header based parsers
{headerParser, headerContentLocationParser},
{headerParser, headerLinkParser},
{headerParser, headerLocationParser},
{headerParser, headerRefreshParser},

// Body based parsers
Expand Down Expand Up @@ -84,6 +83,9 @@ func InitWithOptions(options *types.Options) {
responseParsers = append(responseParsers, responseParser{contentParser, scriptJSFileRegexParser})
responseParsers = append(responseParsers, responseParser{contentParser, bodyScrapeEndpointsParser})
}
if !options.DisableRedirects {
responseParsers = append(responseParsers, responseParser{headerParser, headerLocationParser})
}
}

// parseResponse runs the response parsers on the navigation response
Expand Down
4 changes: 4 additions & 0 deletions pkg/navigation/response.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,7 @@ func (n Response) AbsoluteURL(path string) string {
final := absURL.String()
return final
}

func (n Response) IsRedirect() bool {
return n.StatusCode >= 300 && n.StatusCode <= 399
}

0 comments on commit 659a1f8

Please sign in to comment.