diff --git a/internal/testing/htmlcheck/htmlcheck.go b/internal/testing/htmlcheck/htmlcheck.go index 46cdf87d8..4accdb50f 100644 --- a/internal/testing/htmlcheck/htmlcheck.go +++ b/internal/testing/htmlcheck/htmlcheck.go @@ -12,7 +12,6 @@ import ( "regexp" "strings" - "github.com/andybalholm/cascadia" "golang.org/x/net/html" ) @@ -38,15 +37,9 @@ func Run(reader io.Reader, checker Checker) error { // // A nil Checker is valid and always succeeds. func In(selector string, checkers ...Checker) Checker { - sel := mustParseCascadiaSelector(selector) + sel := mustParseSelector(selector) return func(n *html.Node) error { - var m *html.Node - // cascadia.Query does not test against its argument node. - if sel.Match(n) { - m = n - } else { - m = cascadia.Query(n, sel) - } + m := query(n, sel) if m == nil { return fmt.Errorf("no element matches selector %q", selector) } @@ -84,19 +77,6 @@ func check(n *html.Node, Checkers []Checker) error { return nil } -// mustParseCascadiaSelector parses the given CSS selector. An empty string -// is treated as "*" (match everything). -func mustParseCascadiaSelector(s string) cascadia.Sel { - if s == "" { - s = "*" - } - sel, err := cascadia.Parse(s) - if err != nil { - panic(fmt.Sprintf("parsing %q: %v", s, err)) - } - return sel -} - // mustParseSelector parses the given CSS selector. An empty string // is treated as matching everything. func mustParseSelector(s string) *selector { diff --git a/internal/testing/htmlcheck/query.go b/internal/testing/htmlcheck/query.go index 1e5a67149..75a196ddb 100644 --- a/internal/testing/htmlcheck/query.go +++ b/internal/testing/htmlcheck/query.go @@ -7,6 +7,7 @@ package htmlcheck import ( "errors" "fmt" + "strconv" "strings" "golang.org/x/net/html" @@ -122,13 +123,62 @@ func parse(s string) (*selector, error) { } sel.next = next return sel, nil + case s[0] == ':': + atom, rest, err := parsePseudoClass(s) + if err != nil { + return nil, err + } + sel.atoms = append(sel.atoms, atom) + s = rest default: - return nil, fmt.Errorf("unexpected character %q in input", s[0]) + return nil, fmt.Errorf("unexpected character '%v' in input", string(s[0])) } } return sel, nil } +// parsePseudoClass parses a :nth-child(n) or :nth-of-type(n). It only supports those functions and +// number arguments to those functions, not even, odd, or an+b. +func parsePseudoClass(s string) (selectorAtom, string, error) { + if s[0] != ':' { + return nil, "", errors.New("expected ':' at beginning of pseudo class") + } + ident, rest := consumeIdentifier(s[1:]) + if len(ident) == 0 { + return nil, "", errors.New("expected identifier after : in pseudo class") + } + if ident != "nth-of-type" && ident != "nth-child" { + return nil, "", errors.New("only :nth-of-type() and :nth-child() pseudoclasses are supported") + } + s = rest + if len(s) == 0 || s[0] != '(' { + return nil, "", errors.New("expected '(' after :nth-of-type or nth-child") + } + numstr, rest := consumeNumber(s[1:]) + if len(numstr) == 0 { + return nil, "", errors.New("only number arguments are supported for :nth-of-type() or :nth-child()") + } + num, err := strconv.Atoi(numstr) + if err != nil { + // This shouldn't ever happen because consumeNumber should only return valid numbers and we + // check that the length is greater than 0. + panic(fmt.Errorf("unexpected parse error for number: %v", err)) + } + s = rest + if len(s) == 0 || s[0] != ')' { + return nil, "", errors.New("expected ')' after number argument to nth-of-type or nth-child") + } + rest = s[1:] + switch ident { + case "nth-of-type": + return &nthOfType{num}, rest, nil + case "nth-child": + return &nthChild{num}, rest, nil + default: + panic("we should only have allowed nth-of-type or nth-child up to this point") + } +} + // parseAttributeSelector parses an attribute selector of the form [attribute-name="attribute=value"] func parseAttributeSelector(s string) (*attributeSelector, string, error) { if s[0] != '[' { @@ -192,6 +242,15 @@ func consumeIdentifier(s string) (letters, rest string) { return s[:i], s[i:] } +// consumeNumber consumes and returns a (0-9)+ number at the beginning +// of the given string, and the rest of the string. +func consumeNumber(s string) (letters, rest string) { + i := 0 + for ; i < len(s) && isNumber(s[i]); i++ { + } + return s[:i], s[i:] +} + func isAscii(s string) bool { for i := 0; i < len(s); i++ { if s[i] > 127 { @@ -268,3 +327,65 @@ func (s *classSelector) match(n *html.Node) bool { } return false } + +// nthOfType implements the :nth-of-type() pseudoclass. +type nthOfType struct { + n int +} + +func (s *nthOfType) match(n *html.Node) bool { + if n.Type != html.ElementNode || n.Parent == nil { + return false + } + curChild := n.Parent.FirstChild + i := 0 + for { + if curChild.Type == html.ElementNode && curChild.Data == n.Data { + i++ + if i == s.n { + break + } + } + if curChild.NextSibling == nil { + break + } + curChild = curChild.NextSibling + } + if i != s.n { + // there were fewer than n children of this element type. + return false + } + return curChild == n +} + +// nthChild implements the :nth-child() pseudoclass +type nthChild struct { + n int +} + +func (s *nthChild) match(n *html.Node) bool { + if n.Type != html.ElementNode || n.Parent == nil { + return false + } + curChild := n.Parent.FirstChild + // Advance to next element node. + for curChild.Type != html.ElementNode && curChild.NextSibling != nil { + curChild = curChild.NextSibling + } + i := 1 + for ; i < s.n; i++ { + if curChild.NextSibling == nil { + break + } + curChild = curChild.NextSibling + // Advance to next element node. + for curChild.Type != html.ElementNode && curChild.NextSibling != nil { + curChild = curChild.NextSibling + } + } + if i != s.n { + // there were fewer than n children. + return false + } + return curChild == n +} diff --git a/internal/testing/htmlcheck/query_test.go b/internal/testing/htmlcheck/query_test.go index f1282891d..39081f1a4 100644 --- a/internal/testing/htmlcheck/query_test.go +++ b/internal/testing/htmlcheck/query_test.go @@ -120,6 +120,52 @@ func TestParse(t *testing.T) { nil, errors.New("expected ']' at end of attribute selector"), }, + { + `.VulnMain-title:nth-of-type(4)`, + &selector{atoms: []selectorAtom{&classSelector{"VulnMain-title"}, &nthOfType{4}}}, + nil, + }, + { + `th:nth-child(2)`, + &selector{atoms: []selectorAtom{&elementSelector{"th"}, &nthChild{2}}}, + nil, + }, + { + `th:(2)`, + nil, + errors.New("expected identifier after : in pseudo class"), + }, + { + `th:32(2)`, + nil, + errors.New("expected identifier after : in pseudo class"), + }, + { + `th:active`, + nil, + errors.New("only :nth-of-type() and :nth-child() pseudoclasses are supported"), + }, + { + `th:nth-child`, + nil, + errors.New("expected '(' after :nth-of-type or nth-child"), + }, + { + `th:nth-child(odd)`, + nil, + errors.New("only number arguments are supported for :nth-of-type() or :nth-child()"), + }, + { + `th:nth-child(14`, + nil, + errors.New("expected ')' after number argument to nth-of-type or nth-child"), + }, + // We don't support the child combinator. Make sure it returns a parse error. + { + ".Documentation-sinceVersion > .Documentation-sinceVersionVersion", + nil, + errors.New("unexpected character '>' in input"), + }, } for _, tc := range testCases { sel, err := parse(tc.text) @@ -156,6 +202,17 @@ func TestQuery(t *testing.T) { {`
wrong
also wrong
still wrong
match
`, "div#my-id div.my-class", `
match
`}, {`
link body
`, ".UnitMeta-repo a", `link body`}, {``, ".UnitFiles-fileList a", `a.go`}, + {``, "li:nth-child(2)", `
  • second child
  • `}, + {``, "li:nth-child(2)", `
  • second child
  • `}, + {`
    not counted

    first paragraph

    `, ".class:nth-of-type(1)", `

    first paragraph

    `}, + {`
    not counted

    first paragraph

    `, ".class:nth-of-type(1)", `

    first paragraph

    `}, + {`
    not counted

    first paragraph

    `, ".class:nth-of-type(2)", ``}, + {`
    not counted

    first paragraph

    second paragraph

    `, ".class:nth-of-type(2)", `

    second paragraph

    `}, + {`
    not counted

    first paragraph

    second paragraph

    `, ".class:nth-of-type(2)", `

    second paragraph

    `}, + {`
    not counted

    first paragraph

    also not counted

    second paragraph

    `, ".class:nth-of-type(2)", `

    second paragraph

    `}, + {`
    not counted

    first paragraph

    also not counted

    second paragraph

    also not counted

    third paragraph

    fourth paragraph

    fifth paragraph

    `, ".class:nth-of-type(5)", `

    fifth paragraph

    `}, + {``, "th:nth-child(1)", ""}, + {`
    Path Synopsis
    Path
    `, "th:nth-child(1)", ""}, } for _, tc := range testCases { n, err := html.Parse(strings.NewReader(tc.queriedText)) diff --git a/internal/testing/integration/frontend_main_test.go b/internal/testing/integration/frontend_main_test.go index 009fc2b7d..fea314a8d 100644 --- a/internal/testing/integration/frontend_main_test.go +++ b/internal/testing/integration/frontend_main_test.go @@ -51,7 +51,7 @@ func TestFrontendMainPage(t *testing.T) { in("#S2", in(".Documentation-sinceVersion", hasText(""))), in("#String", - in(".Documentation-sinceVersion > .Documentation-sinceVersionVersion", hasText("v1.1.0"))), + in(".Documentation-sinceVersion .Documentation-sinceVersionVersion", hasText("v1.1.0"))), in("#T", in(".Documentation-sinceVersion", hasText(""))), in("#TF",
    Path Synopsis
    Path