Skip to content

Commit

Permalink
internal/testing/htmlcheck: replace use of cascadia in In
Browse files Browse the repository at this point in the history
This cl implements the :nth-child() and :nth-of-type() pseudoclasses
for the css selector query function, allowing it to be used for
htmlcheck.In.

It also replaces the single use of a a child combinator '>' in a test
with a descendant combinator ' ' so that we don't need to implement
the child combinator. The test should have the same behavior.

For golang/go#61399

Change-Id: I09d9b8fbcd0eafd37aceb5994515687c85244ef8
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/542058
kokoro-CI: kokoro <[email protected]>
Reviewed-by: Jonathan Amsterdam <[email protected]>
LUCI-TryBot-Result: Go LUCI <[email protected]>
  • Loading branch information
matloob committed Nov 15, 2023
1 parent a7a0e8d commit 21ef660
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 24 deletions.
24 changes: 2 additions & 22 deletions internal/testing/htmlcheck/htmlcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"regexp"
"strings"

"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)

Expand All @@ -38,15 +37,9 @@ func Run(reader io.Reader, checker Checker) error {
//
// A nil Checker is valid and always succeeds.
func In(selector string, checkers ...Checker) Checker {
sel := mustParseCascadiaSelector(selector)
sel := mustParseSelector(selector)
return func(n *html.Node) error {
var m *html.Node
// cascadia.Query does not test against its argument node.
if sel.Match(n) {
m = n
} else {
m = cascadia.Query(n, sel)
}
m := query(n, sel)
if m == nil {
return fmt.Errorf("no element matches selector %q", selector)
}
Expand Down Expand Up @@ -84,19 +77,6 @@ func check(n *html.Node, Checkers []Checker) error {
return nil
}

// mustParseCascadiaSelector parses the given CSS selector. An empty string
// is treated as "*" (match everything).
func mustParseCascadiaSelector(s string) cascadia.Sel {
if s == "" {
s = "*"
}
sel, err := cascadia.Parse(s)
if err != nil {
panic(fmt.Sprintf("parsing %q: %v", s, err))
}
return sel
}

// mustParseSelector parses the given CSS selector. An empty string
// is treated as matching everything.
func mustParseSelector(s string) *selector {
Expand Down
123 changes: 122 additions & 1 deletion internal/testing/htmlcheck/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package htmlcheck
import (
"errors"
"fmt"
"strconv"
"strings"

"golang.org/x/net/html"
Expand Down Expand Up @@ -122,13 +123,62 @@ func parse(s string) (*selector, error) {
}
sel.next = next
return sel, nil
case s[0] == ':':
atom, rest, err := parsePseudoClass(s)
if err != nil {
return nil, err
}
sel.atoms = append(sel.atoms, atom)
s = rest
default:
return nil, fmt.Errorf("unexpected character %q in input", s[0])
return nil, fmt.Errorf("unexpected character '%v' in input", string(s[0]))
}
}
return sel, nil
}

// parsePseudoClass parses a :nth-child(n) or :nth-of-type(n). It only supports those functions and
// number arguments to those functions, not even, odd, or an+b.
func parsePseudoClass(s string) (selectorAtom, string, error) {
if s[0] != ':' {
return nil, "", errors.New("expected ':' at beginning of pseudo class")
}
ident, rest := consumeIdentifier(s[1:])
if len(ident) == 0 {
return nil, "", errors.New("expected identifier after : in pseudo class")
}
if ident != "nth-of-type" && ident != "nth-child" {
return nil, "", errors.New("only :nth-of-type() and :nth-child() pseudoclasses are supported")
}
s = rest
if len(s) == 0 || s[0] != '(' {
return nil, "", errors.New("expected '(' after :nth-of-type or nth-child")
}
numstr, rest := consumeNumber(s[1:])
if len(numstr) == 0 {
return nil, "", errors.New("only number arguments are supported for :nth-of-type() or :nth-child()")
}
num, err := strconv.Atoi(numstr)
if err != nil {
// This shouldn't ever happen because consumeNumber should only return valid numbers and we
// check that the length is greater than 0.
panic(fmt.Errorf("unexpected parse error for number: %v", err))
}
s = rest
if len(s) == 0 || s[0] != ')' {
return nil, "", errors.New("expected ')' after number argument to nth-of-type or nth-child")
}
rest = s[1:]
switch ident {
case "nth-of-type":
return &nthOfType{num}, rest, nil
case "nth-child":
return &nthChild{num}, rest, nil
default:
panic("we should only have allowed nth-of-type or nth-child up to this point")
}
}

// parseAttributeSelector parses an attribute selector of the form [attribute-name="attribute=value"]
func parseAttributeSelector(s string) (*attributeSelector, string, error) {
if s[0] != '[' {
Expand Down Expand Up @@ -192,6 +242,15 @@ func consumeIdentifier(s string) (letters, rest string) {
return s[:i], s[i:]
}

// consumeNumber consumes and returns a (0-9)+ number at the beginning
// of the given string, and the rest of the string.
func consumeNumber(s string) (letters, rest string) {
i := 0
for ; i < len(s) && isNumber(s[i]); i++ {
}
return s[:i], s[i:]
}

func isAscii(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] > 127 {
Expand Down Expand Up @@ -268,3 +327,65 @@ func (s *classSelector) match(n *html.Node) bool {
}
return false
}

// nthOfType implements the :nth-of-type() pseudoclass.
type nthOfType struct {
n int
}

func (s *nthOfType) match(n *html.Node) bool {
if n.Type != html.ElementNode || n.Parent == nil {
return false
}
curChild := n.Parent.FirstChild
i := 0
for {
if curChild.Type == html.ElementNode && curChild.Data == n.Data {
i++
if i == s.n {
break
}
}
if curChild.NextSibling == nil {
break
}
curChild = curChild.NextSibling
}
if i != s.n {
// there were fewer than n children of this element type.
return false
}
return curChild == n
}

// nthChild implements the :nth-child() pseudoclass
type nthChild struct {
n int
}

func (s *nthChild) match(n *html.Node) bool {
if n.Type != html.ElementNode || n.Parent == nil {
return false
}
curChild := n.Parent.FirstChild
// Advance to next element node.
for curChild.Type != html.ElementNode && curChild.NextSibling != nil {
curChild = curChild.NextSibling
}
i := 1
for ; i < s.n; i++ {
if curChild.NextSibling == nil {
break
}
curChild = curChild.NextSibling
// Advance to next element node.
for curChild.Type != html.ElementNode && curChild.NextSibling != nil {
curChild = curChild.NextSibling
}
}
if i != s.n {
// there were fewer than n children.
return false
}
return curChild == n
}
57 changes: 57 additions & 0 deletions internal/testing/htmlcheck/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,52 @@ func TestParse(t *testing.T) {
nil,
errors.New("expected ']' at end of attribute selector"),
},
{
`.VulnMain-title:nth-of-type(4)`,
&selector{atoms: []selectorAtom{&classSelector{"VulnMain-title"}, &nthOfType{4}}},
nil,
},
{
`th:nth-child(2)`,
&selector{atoms: []selectorAtom{&elementSelector{"th"}, &nthChild{2}}},
nil,
},
{
`th:(2)`,
nil,
errors.New("expected identifier after : in pseudo class"),
},
{
`th:32(2)`,
nil,
errors.New("expected identifier after : in pseudo class"),
},
{
`th:active`,
nil,
errors.New("only :nth-of-type() and :nth-child() pseudoclasses are supported"),
},
{
`th:nth-child`,
nil,
errors.New("expected '(' after :nth-of-type or nth-child"),
},
{
`th:nth-child(odd)`,
nil,
errors.New("only number arguments are supported for :nth-of-type() or :nth-child()"),
},
{
`th:nth-child(14`,
nil,
errors.New("expected ')' after number argument to nth-of-type or nth-child"),
},
// We don't support the child combinator. Make sure it returns a parse error.
{
".Documentation-sinceVersion > .Documentation-sinceVersionVersion",
nil,
errors.New("unexpected character '>' in input"),
},
}
for _, tc := range testCases {
sel, err := parse(tc.text)
Expand Down Expand Up @@ -156,6 +202,17 @@ func TestQuery(t *testing.T) {
{`<div></div><div><div>wrong</div></div><div id="wrong-id"><div class="my-class">also wrong</div></div><div id="my-id"><div class="wrong-class">still wrong</div></div><div id="my-id"><div class="my-class">match</div></div>`, "div#my-id div.my-class", `<div class="my-class">match</div>`},
{`<a></a><div class="UnitMeta-repo"><a href="foo" title="">link body</a></div>`, ".UnitMeta-repo a", `<a href="foo" title="">link body</a>`},
{`<ul class="UnitFiles-fileList"><li><a href="foo">a.go</a></li></ul>`, ".UnitFiles-fileList a", `<a href="foo">a.go</a>`},
{`<ul><li>first child</li><li>second child</li></ul>`, "li:nth-child(2)", `<li>second child</li>`},
{`<ul> <li>first child</li> <li>second child</li> </ul>`, "li:nth-child(2)", `<li>second child</li>`},
{`<div><div>not counted</div><p class="class">first paragraph</p></div>`, ".class:nth-of-type(1)", `<p class="class">first paragraph</p>`},
{`<div><div>not counted</div> <p class="class">first paragraph</p> </div>`, ".class:nth-of-type(1)", `<p class="class">first paragraph</p>`},
{`<div><div class="class">not counted</div><p class="class">first paragraph</p>`, ".class:nth-of-type(2)", ``},
{`<div><div class="class">not counted</div><p class="class">first paragraph</p><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
{`<div><div>not counted</div><p>first paragraph</p><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
{`<div><div>not counted</div><p>first paragraph</p><div>also not counted</div><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
{`<div><div>not counted</div><p>first paragraph</p><div>also not counted</div><p class="class">second paragraph</p><td>also not counted</td><p>third paragraph</p><p>fourth paragraph</p><p class="class">fifth paragraph</p></div>`, ".class:nth-of-type(5)", `<p class="class">fifth paragraph</p>`},
{`<table class="UnitDirectories-table"><tbody><tr class="UnitDirectories-tableHeader"> <th>Path</th> <th class="UnitDirectories-desktopSynopsis">Synopsis</th></tr>`, "th:nth-child(1)", "<th>Path</th>"},
{`<table class="UnitDirectories-table"> <tbody> <tr class="UnitDirectories-tableHeader"> <th>Path</th> <th class="UnitDirectories-desktopSynopsis"> Synopsis </th> </tr>`, "th:nth-child(1)", "<th>Path</th>"},
}
for _, tc := range testCases {
n, err := html.Parse(strings.NewReader(tc.queriedText))
Expand Down
2 changes: 1 addition & 1 deletion internal/testing/integration/frontend_main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func TestFrontendMainPage(t *testing.T) {
in("#S2",
in(".Documentation-sinceVersion", hasText(""))),
in("#String",
in(".Documentation-sinceVersion > .Documentation-sinceVersionVersion", hasText("v1.1.0"))),
in(".Documentation-sinceVersion .Documentation-sinceVersionVersion", hasText("v1.1.0"))),
in("#T",
in(".Documentation-sinceVersion", hasText(""))),
in("#TF",
Expand Down

0 comments on commit 21ef660

Please sign in to comment.