Skip to content

Commit

Permalink
fix xalanq#163: parsed new HTML for input codeforces
Browse files Browse the repository at this point in the history
  • Loading branch information
visrut-at-incubyte committed Jul 27, 2023
1 parent 4aff868 commit c83955f
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions client/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,29 @@ import (
)

func findSample(body []byte) (input [][]byte, output [][]byte, err error) {
irg := regexp.MustCompile(`class="input"[\s\S]*?<pre>([\s\S]*?)</pre>`)
org := regexp.MustCompile(`class="output"[\s\S]*?<pre>([\s\S]*?)</pre>`)
irg := regexp.MustCompile(`<div class="input">[\s\S]*?<pre[^>]*>([\s\S]*?)</pre>`)
org := regexp.MustCompile(`<div class="output">[\s\S]*?<pre[^>]*>([\s\S]*?)</pre>`)

a := irg.FindAllSubmatch(body, -1)
b := org.FindAllSubmatch(body, -1)

if a == nil || b == nil || len(a) != len(b) {
return nil, nil, fmt.Errorf("Cannot parse sample with input %v and output %v", len(a), len(b))
}
newline := regexp.MustCompile(`<[\s/br]+?>`)

tagRegex := regexp.MustCompile(`<div[^>]*>|</div>`)
filter := func(src []byte) []byte {
src = newline.ReplaceAll(src, []byte("\n"))
src = tagRegex.ReplaceAll(src, []byte("\n"))
src = bytes.ReplaceAll(src, []byte("\n\n"), []byte("\n"))
s := html.UnescapeString(string(src))
return []byte(strings.TrimSpace(s) + "\n")
return []byte(strings.TrimSpace(s))
}

for i := 0; i < len(a); i++ {
input = append(input, filter(a[i][1]))
output = append(output, filter(b[i][1]))
}

return
}

Expand Down

0 comments on commit c83955f

Please sign in to comment.