forked from raviqqe/muffet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpage_checker.go
94 lines (72 loc) · 1.61 KB
/
page_checker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
package main
import (
"sync"
)
type pageChecker struct {
fetcher *linkFetcher
linkValidator *linkValidator
daemonManager *daemonManager
results chan *pageResult
donePages concurrentStringSet
onePageOnly bool
}
func newPageChecker(f *linkFetcher, v *linkValidator, onePageOnly bool) *pageChecker {
return &pageChecker{
f,
v,
newDaemonManager(concurrency),
make(chan *pageResult, concurrency),
newConcurrentStringSet(),
onePageOnly,
}
}
func (c *pageChecker) Results() <-chan *pageResult {
return c.results
}
func (c *pageChecker) Check(page *page) {
c.addPage(page)
c.daemonManager.Run()
close(c.results)
}
func (c *pageChecker) checkPage(p *page) {
us := p.Links()
sc := make(chan *successLinkResult, len(us))
ec := make(chan *errorLinkResult, len(us))
w := sync.WaitGroup{}
for u, err := range us {
if err != nil {
ec <- &errorLinkResult{u, err}
continue
}
w.Add(1)
go func(u string) {
defer w.Done()
status, p, err := c.fetcher.Fetch(u)
if err == nil {
sc <- &successLinkResult{u, status}
} else {
ec <- &errorLinkResult{u, err}
}
if !c.onePageOnly && p != nil && c.linkValidator.Validate(p.URL()) {
c.addPage(p)
}
}(u)
}
w.Wait()
close(sc)
close(ec)
ss := make([]*successLinkResult, 0, len(sc))
for s := range sc {
ss = append(ss, s)
}
es := make([]*errorLinkResult, 0, len(ec))
for e := range ec {
es = append(es, e)
}
c.results <- &pageResult{p.URL().String(), ss, es}
}
func (c *pageChecker) addPage(p *page) {
if !c.donePages.Add(p.URL().String()) {
c.daemonManager.Add(func() { c.checkPage(p) })
}
}