Skip to content

Commit

Permalink
Parse % string escapes
Browse files Browse the repository at this point in the history
  • Loading branch information
richardmarshall committed Feb 17, 2024
1 parent a4c66b7 commit 3c3a62f
Show file tree
Hide file tree
Showing 8 changed files with 576 additions and 7 deletions.
12 changes: 10 additions & 2 deletions parser/declaration_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,9 +354,13 @@ func (p *Parser) parseTableProperty() (*ast.TableProperty, error) {
if !p.expectPeek(token.STRING) {
return nil, errors.WithStack(UnexpectedToken(p.peekToken, "STRING"))
}
key, err := p.parseString()
if err != nil {
return nil, errors.WithStack(err)
}
prop := &ast.TableProperty{
Meta: p.curToken,
Key: p.parseString(),
Key: key,
}
prop.Key.Meta = clearComments(prop.Key.Meta)

Expand All @@ -371,7 +375,11 @@ func (p *Parser) parseTableProperty() (*ast.TableProperty, error) {
case token.IDENT:
prop.Value = p.parseIdent()
case token.STRING:
prop.Value = p.parseString()
var err error
prop.Value, err = p.parseString()
if err != nil {
return nil, errors.WithStack(err)
}
case token.ACL, token.BACKEND:
prop.Value = p.parseIdent()
case token.TRUE, token.FALSE:
Expand Down
7 changes: 7 additions & 0 deletions parser/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,10 @@ func EmptySwitch(m *ast.Meta) *ParseError {
Message: "Switch must have at least one case",
}
}

func InvalidEscape(m *ast.Meta, msg string) *ParseError {
return &ParseError{
Token: m.Token,
Message: msg,
}
}
2 changes: 1 addition & 1 deletion parser/expression_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
func (p *Parser) registerExpressionParsers() {
p.prefixParsers = map[token.TokenType]prefixParser{
token.IDENT: func() (ast.Expression, error) { return p.parseIdent(), nil },
token.STRING: func() (ast.Expression, error) { return p.parseString(), nil },
token.STRING: func() (ast.Expression, error) { return p.parseString() },
token.INT: func() (ast.Expression, error) { return p.parseInteger() },
token.FLOAT: func() (ast.Expression, error) { return p.parseFloat() },
token.RTIME: func() (ast.Expression, error) { return p.parseRTime() },
Expand Down
60 changes: 60 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,66 @@ sub vcl_recv {
}
}

func TestStringLiteralEscapes(t *testing.T) {
// % escapes are only expanded in double-quote strings.
input := `
sub vcl_recv {
set req.http.v1 = "foo%20bar";
set req.http.v2 = {"foo%20bar"};
}`
expect := &ast.VCL{
Statements: []ast.Statement{
&ast.SubroutineDeclaration{
Meta: ast.New(T, 0),
Name: &ast.Ident{
Meta: ast.New(T, 0),
Value: "vcl_recv",
},
Block: &ast.BlockStatement{
Meta: ast.New(T, 1),
Statements: []ast.Statement{
&ast.SetStatement{
Meta: ast.New(T, 1),
Ident: &ast.Ident{
Meta: ast.New(T, 1),
Value: "req.http.v1",
},
Operator: &ast.Operator{
Meta: ast.New(T, 1),
Operator: "=",
},
Value: &ast.String{
Meta: ast.New(T, 1),
Value: "foo bar",
},
},
&ast.SetStatement{
Meta: ast.New(T, 1),
Ident: &ast.Ident{
Meta: ast.New(T, 1),
Value: "req.http.v2",
},
Operator: &ast.Operator{
Meta: ast.New(T, 1),
Operator: "=",
},
Value: &ast.String{
Meta: ast.New(T, 1),
Value: "foo%20bar",
},
},
},
},
},
},
}
vcl, err := New(lexer.NewFromString(input)).ParseVCL()
if err != nil {
t.Errorf("%+v", err)
}
assert(t, vcl, expect)
}

func TestCommentInInfixExpression(t *testing.T) {
input := `
sub vcl_recv {
Expand Down
6 changes: 5 additions & 1 deletion parser/statement_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,11 @@ func (p *Parser) parseIncludeStatement() (ast.Statement, error) {
if !p.expectPeek(token.STRING) {
return nil, errors.WithStack(UnexpectedToken(p.peekToken, "STRING"))
}
i.Module = p.parseString()
var err error
i.Module, err = p.parseString()
if err != nil {
return nil, errors.WithStack(err)
}
i.Meta.Trailing = p.trailing()

// Semicolons are actually not required at the end of include lines
Expand Down
201 changes: 201 additions & 0 deletions parser/string_escape.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
package parser

import (
"bufio"
"fmt"
"io"
"strings"
"unicode"
"unicode/utf8"

"github.com/pkg/errors"
)

// Parse string escapes and return the resulting string with the decoded escaped
// values.
func decodeStringEscapes(s string) (string, error) {
var parsed string
r := bufio.NewReader(strings.NewReader(s))

for {
c, _, err := r.ReadRune()
if err == io.EOF {
break
}
if c == 0 {
break
}
if c == '%' {
var s string
if peek(r) == 'u' {
next(r)
s, err = codePointEscape(r)
} else {
s, err = utf8Escape(r)
}
// stop processing string on a null byte.
if err == NULLbyte {
break
} else if err != nil {
return "", err
}
parsed += s
} else {
parsed += string(c)
}
}

return parsed, nil
}

// isHex reports whether the rune is a hex digit.
func isHex(c rune) bool {
return '0' <= c && c <= '9' || 'a' <= unicode.ToLower(c) && unicode.ToLower(c) <= 'f'
}

// Get int representation of provided hex digit.
// Note: Assumes isHex has already been called.
func digitVal(c rune) int {
if '0' <= c && c <= '9' {
return int(c - '0')
}
return int(unicode.ToLower(c) - 'a' + 10)
}

// Read two hex digits from the buffer and decode into an int.
func readByte(r *bufio.Reader) (byte, error) {
var x byte

for i := 0; i < 2; i++ {
c, _, err := r.ReadRune()
if err != nil {
return 0, err
}
if !isHex(c) {
return 0, fmt.Errorf("invalid utf-8 escape, incomplete byte")
}
x = x*16 + byte(digitVal(c))
}

return x, nil
}

// Sentinel error for indicating the presence of a NULL byte or zero code point
// in an escape.
var NULLbyte = errors.New("NULL")

// Convenience helper for checking the next rule in the buffer.
func peek(r *bufio.Reader) rune {
b, err := r.Peek(1)
if err != nil {
return -1
}
return rune(b[0])
}

// Convenience helper to read a single rune from the buffer.
// Should only be used after calling peek to ensure there is another rune in the
// buffer.
func next(r *bufio.Reader) rune {
c, _, _ := r.ReadRune() // nolint:errcheck
return c
}

// Decodes unicode code point escapes.
// There are two forms of escapes.
// * %XXXX
// * %{...}
func codePointEscape(r *bufio.Reader) (string, error) {
var min, max int

// Is the escape a fixed or variable width code point escape.
if peek(r) == '{' {
next(r)
min, max = 1, 6
} else {
min, max = 4, 4
}

// Read at least `min` hex digits up to `max`
var x int
for n := 0; n < max; n++ {
if !isHex(peek(r)) {
if n < min {
return "", fmt.Errorf("incomplete unicode escape. %d missing digits", min-n)
}
break
}
x = x*16 + digitVal(next(r))
}

if max == 6 {
if c := next(r); c != '}' {
return "", fmt.Errorf("incomplete %%{xxxx} escape")
}
}

// stop processing string on zero code point
if x == 0 {
return "", NULLbyte
}

if x > unicode.MaxRune {
return "", fmt.Errorf("invalid code point U+%x in unicode escape", x)
}

// Surrogate code points are not valid
if 0xD800 <= x && x <= 0xDFFF {
return "", fmt.Errorf("invalid surrogate code point U+%x in unicode escape", x)
}

return string(rune(x)), nil
}

// Decode sequences of %XX escapes.
// Each byte in the sequence is a byte of a UTF-8 encoded character.
// This escape type will include between 1 and 4 escaped bytes in the form of
// %XX%YY
func utf8Escape(r *bufio.Reader) (string, error) {
b1, err := readByte(r)
if err != nil {
return "", err
}
// Identify how many escape bytes need to be read
n := 0
switch {
case b1&(0x80) == 0: // 1 byte (ASCII)
if b1 == 0 {
return "", NULLbyte
}
return string(b1), nil
case b1&(0xe0) == 0xc0: // 2 bytes
n = 2
case b1&(0xf0) == 0xe0: // 3 bytes
n = 3
case b1&(0xf8) == 0xF0: // 4 bytes
n = 4
default:
return "", fmt.Errorf("utf-8 escape has invalid leading byte %x", b1)
}

// Read `n` additional byte escape sequences
bs := []byte{b1}
for i := 1; i < n; i++ {
if peek(r) != '%' {
return "", fmt.Errorf("incomplete utf-8 escape. %d missing bytes", n-i)
}
next(r)
b, err := readByte(r)
if err != nil {
return "", err
}
bs = append(bs, b)
}

c, _ := utf8.DecodeRune(bs)
if c == utf8.RuneError {
return "", fmt.Errorf("invalid utf-8 escape")
}

return string(c), nil
}
Loading

0 comments on commit 3c3a62f

Please sign in to comment.