This repository has been archived by the owner on Jan 23, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmajestic.go
86 lines (71 loc) · 1.62 KB
/
majestic.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
package dor
import (
"bufio"
"log"
"net/http"
"strings"
"time"
)
const (
majesticTop1M = "http://downloads.majestic.com/majestic_million.csv"
)
// MajesticIngester is a List implementation which downloads data
// and translates it to LookupMap
//
// More info: https://blog.majestic.com/development/alexa-top-1-million-sites-retired-heres-majestic-million/
type MajesticIngester struct {
IngesterConf
resp *http.Response
}
// NewMajestic bootstraps MajesticIngester
func NewMajestic() *MajesticIngester {
return &MajesticIngester{
IngesterConf: IngesterConf{
Description: "majestic",
},
}
}
// fetch send request to server with the data
func (in *MajesticIngester) fetch(url string) error {
r, err := http.Get(url)
if err != nil {
return err
}
log.Printf("%s downloaded successfully", url)
in.resp = r
return nil
}
// process represents filling the map with response body data
func (in *MajesticIngester) process(rc chan *Entry) {
defer in.resp.Body.Close()
scanner := bufio.NewScanner(in.resp.Body)
var i int
for scanner.Scan() {
line := scanner.Text()
if i < 1 {
i++
continue
}
parts := strings.Split(line, ",")
if len(parts) != 12 {
log.Println("majestic: wrong line in a CSV")
continue
}
rc <- &Entry{
Rank: strToUint(parts[0]),
Domain: parts[2],
RawData: line,
}
}
close(rc)
}
// Do implements Ingester interface with the data from Majestic CSV file
func (in *MajesticIngester) Do() (chan *Entry, error) {
in.Timestamp = time.Now().UTC()
ch := make(chan *Entry)
if err := in.fetch(majesticTop1M); err != nil {
return nil, err
}
go in.process(ch)
return ch, nil
}