-
-
Notifications
You must be signed in to change notification settings - Fork 102
/
Copy pathsitemap_page_parser_test.go
109 lines (94 loc) · 3.63 KB
/
sitemap_page_parser_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
package main
import (
"regexp"
"testing"
"github.com/stretchr/testify/assert"
)
const (
SITEMAP_MIME_TYPE = "application/xml"
SITEMAP_MIME_TYPE_ALIAS = "text/xml"
)
func TestSitemapPageParserParsePage(t *testing.T) {
p, err := newSitemapPageParser(newTestLinkFilterer()).Parse(parseURL(t, "https://foo.com/sitemap.xml"), SITEMAP_MIME_TYPE, []byte(`
<?xml version="1.0" encoding="UTF-8"?>
<urlset
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"
xmlns:xhtml="http://www.w3.org/1999/xhtml"
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"
>
<url>
<loc>https://foo.com/</loc>
</url>
</urlset>
`))
assert.Nil(t, err)
assert.Equal(t, "https://foo.com/sitemap.xml", p.URL().String())
assert.Equal(t, map[string]error{"https://foo.com/": nil}, p.Links())
assert.Equal(t, map[string]struct{}(nil), p.Fragments())
}
func TestSitemapPageParserParsePageMimeTypeAlias(t *testing.T) {
p, err := newSitemapPageParser(newTestLinkFilterer()).Parse(parseURL(t, "https://foo.com/sitemap.xml"), SITEMAP_MIME_TYPE_ALIAS, []byte(`
<?xml version="1.0" encoding="UTF-8"?>
<urlset
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"
xmlns:xhtml="http://www.w3.org/1999/xhtml"
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"
>
<url>
<loc>https://foo.com/</loc>
</url>
</urlset>
`))
assert.Nil(t, err)
assert.NotNil(t, p)
assert.Equal(t, "https://foo.com/sitemap.xml", p.URL().String())
assert.Equal(t, map[string]error{"https://foo.com/": nil}, p.Links())
assert.Equal(t, map[string]struct{}(nil), p.Fragments())
}
func TestSitemapPageParserParseIndexPage(t *testing.T) {
p, err := newSitemapPageParser(newTestLinkFilterer()).Parse(parseURL(t, "https://foo.com/sitemap.xml"), SITEMAP_MIME_TYPE, []byte(`
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap>
<loc>https://foo.com/sitemap-0.xml</loc>
</sitemap>
</sitemapindex>
`))
assert.Nil(t, err)
assert.Equal(t, "https://foo.com/sitemap.xml", p.URL().String())
assert.Equal(t, map[string]error{"https://foo.com/sitemap-0.xml": nil}, p.Links())
assert.Equal(t, map[string]struct{}(nil), p.Fragments())
}
func TestSitemapPageParserExcludeLink(t *testing.T) {
p, err := newSitemapPageParser(
newLinkFilterer([]*regexp.Regexp{regexp.MustCompile("private")}, nil),
).Parse(parseURL(t, "https://foo.com/sitemap.xml"), SITEMAP_MIME_TYPE, []byte(`
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap>
<loc>https://foo.com/private-sitemap.xml</loc>
</sitemap>
<sitemap>
<loc>https://foo.com/public-sitemap.xml</loc>
</sitemap>
</sitemapindex>
`))
assert.Nil(t, err)
assert.Equal(t, "https://foo.com/sitemap.xml", p.URL().String())
assert.Equal(t, map[string]error{"https://foo.com/public-sitemap.xml": nil}, p.Links())
assert.Equal(t, map[string]struct{}(nil), p.Fragments())
}
func TestSitemapPageParserFailToParseXML(t *testing.T) {
p, err := newSitemapPageParser(newTestLinkFilterer()).Parse(parseURL(t, "https://foo.com/sitemap.xml"), SITEMAP_MIME_TYPE, []byte(""))
assert.Nil(t, err)
assert.Nil(t, p)
}
func TestSitemapPageParserFailToParseHTML(t *testing.T) {
p, err := newSitemapPageParser(newTestLinkFilterer()).Parse(parseURL(t, "https://foo.com/sitemap.xml"), "text/html", []byte(""))
assert.Nil(t, err)
assert.Nil(t, p)
}