Skip to content

Commit

Permalink
Added some validation to handle bad URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
Gus Ralph authored and Gus Ralph committed Nov 4, 2024
1 parent f8486d4 commit 2f0da40
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions internal/runner/executer.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,28 @@ func (r *Runner) ExecuteCrawling() error {

wg := sizedwaitgroup.New(r.options.Parallelism)
for _, input := range inputs {
if input == "" {
gologger.Warning().Msgf("Skipping empty input")
continue
}
if !r.networkpolicy.Validate(input) {
gologger.Info().Msgf("Skipping excluded host %s", input)
continue
}
wg.Add()
input = addSchemeIfNotExists(input)
if r.crawler == nil {
return errorutil.New("crawler is not initialized")
}
go func(input string) {
defer wg.Done()

if err := r.crawler.Crawl(input); err != nil {
gologger.Warning().Msgf("Could not crawl %s: %s", input, err)
}
r.state.InFlightUrls.Delete(input)
}(input)
}
}(input)
}
wg.Wait()
return nil
}
Expand Down

0 comments on commit 2f0da40

Please sign in to comment.