From f96845fcf702ca681c081ac727a45b6d85308590 Mon Sep 17 00:00:00 2001 From: Dmitry Panov Date: Wed, 28 Aug 2024 13:40:09 +0100 Subject: [PATCH] Numeric separator literal (#603) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added support for separators in numeric literals. * Improved support for Unicode code point escapes (\u{...}) in regexes. * Improved number parsing compatibility. --------- Co-authored-by: Joan López de la Franca Beltran --- builtin_regexp.go | 4 +-- parser/lexer.go | 20 ++++++++----- parser/lexer_test.go | 5 ++++ parser/regexp.go | 17 ++++++----- parser/regexp_test.go | 14 ++++----- regexp.go | 7 +++-- regexp_test.go | 26 ++++++++++++++++ runtime_test.go | 25 ++++++++++++++++ string_ascii.go | 69 ++++++++++++++++++++++++++++--------------- tc39_test.go | 12 -------- 10 files changed, 137 insertions(+), 62 deletions(-) diff --git a/builtin_regexp.go b/builtin_regexp.go index d0be2de..8344b1c 100644 --- a/builtin_regexp.go +++ b/builtin_regexp.go @@ -241,7 +241,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) { patternStr = convertRegexpToUtf16(patternStr) } - re2Str, err1 := parser.TransformRegExp(patternStr, dotAll) + re2Str, err1 := parser.TransformRegExp(patternStr, dotAll, unicode) if err1 == nil { re2flags := "" if multiline { @@ -268,7 +268,7 @@ func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) { err = err1 return } - wrapper2, err = compileRegexp2(patternStr, multiline, dotAll, ignoreCase) + wrapper2, err = compileRegexp2(patternStr, multiline, dotAll, ignoreCase, unicode) if err != nil { err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err) return diff --git a/parser/lexer.go b/parser/lexer.go index 1d7de1b..b7dab72 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -633,9 +633,13 @@ func (self *_parser) skipWhiteSpace() { } } -func (self *_parser) scanMantissa(base int) { - for digitValue(self.chr) < base { +func (self *_parser) scanMantissa(base int, allowSeparator bool) { + for digitValue(self.chr) < base || (allowSeparator && self.chr == '_') { + afterUnderscore := self.chr == '_' self.read() + if afterUnderscore && !isDigit(self.chr, base) { + self.error(self.chrOffset, "Only one underscore is allowed as numeric separator") + } } } @@ -1140,7 +1144,7 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) if decimalPoint { offset-- - self.scanMantissa(10) + self.scanMantissa(10, true) } else { if self.chr == '0' { self.read() @@ -1156,7 +1160,7 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) // no-op default: // legacy octal - self.scanMantissa(8) + self.scanMantissa(8, false) goto end } if base > 0 { @@ -1164,15 +1168,15 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) if !isDigit(self.chr, base) { return token.ILLEGAL, self.str[offset:self.chrOffset] } - self.scanMantissa(base) + self.scanMantissa(base, true) goto end } } else { - self.scanMantissa(10) + self.scanMantissa(10, true) } if self.chr == '.' { self.read() - self.scanMantissa(10) + self.scanMantissa(10, true) } } @@ -1183,7 +1187,7 @@ func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) } if isDecimalDigit(self.chr) { self.read() - self.scanMantissa(10) + self.scanMantissa(10, true) } else { return token.ILLEGAL, self.str[offset:self.chrOffset] } diff --git a/parser/lexer_test.go b/parser/lexer_test.go index f388c47..7378df0 100644 --- a/parser/lexer_test.go +++ b/parser/lexer_test.go @@ -264,6 +264,11 @@ Second line \ token.NUMBER, "12.3", 5, ) + test("1_000 1_000_000", + token.NUMBER, "1_000", 1, + token.NUMBER, "1_000_000", 7, + ) + test(`1n`, token.NUMBER, "1n", 1, ) diff --git a/parser/regexp.go b/parser/regexp.go index 72bb3c7..0ea9b9d 100644 --- a/parser/regexp.go +++ b/parser/regexp.go @@ -41,7 +41,8 @@ type _RegExp_parser struct { goRegexp strings.Builder passOffset int - dotAll bool // Enable dotAll mode + dotAll bool // Enable dotAll mode + unicode bool } // TransformRegExp transforms a JavaScript pattern into a Go "regexp" pattern. @@ -57,16 +58,17 @@ type _RegExp_parser struct { // // If the pattern is invalid (not valid even in JavaScript), then this function // returns an empty string and a generic error. -func TransformRegExp(pattern string, dotAll bool) (transformed string, err error) { +func TransformRegExp(pattern string, dotAll, unicode bool) (transformed string, err error) { if pattern == "" { return "", nil } parser := _RegExp_parser{ - str: pattern, - length: len(pattern), - dotAll: dotAll, + str: pattern, + length: len(pattern), + dotAll: dotAll, + unicode: unicode, } err = parser.parse() if err != nil { @@ -292,7 +294,7 @@ func (self *_RegExp_parser) scanEscape(inClass bool) { case 'u': self.read() - if self.chr == '{' { + if self.chr == '{' && self.unicode { self.read() length, base = 0, 16 } else { @@ -392,7 +394,8 @@ func (self *_RegExp_parser) scanEscape(inClass bool) { digit := uint32(digitValue(self.chr)) if digit >= base { // Not a valid digit - goto skip + self.error(true, "Invalid Unicode escape") + return } self.read() } diff --git a/parser/regexp_test.go b/parser/regexp_test.go index b126769..3be77a3 100644 --- a/parser/regexp_test.go +++ b/parser/regexp_test.go @@ -10,7 +10,7 @@ func TestRegExp(t *testing.T) { { // err test := func(input string, expect interface{}) { - _, err := TransformRegExp(input, false) + _, err := TransformRegExp(input, false, false) _, incompat := err.(RegexpErrorIncompatible) is(incompat, false) is(err, expect) @@ -33,7 +33,7 @@ func TestRegExp(t *testing.T) { { // incompatible test := func(input string, expectErr interface{}) { - _, err := TransformRegExp(input, false) + _, err := TransformRegExp(input, false, false) _, incompat := err.(RegexpErrorIncompatible) is(incompat, true) is(err, expectErr) @@ -54,7 +54,7 @@ func TestRegExp(t *testing.T) { { // err test := func(input string, expect string) { - result, err := TransformRegExp(input, false) + result, err := TransformRegExp(input, false, false) is(err, nil) _, incompat := err.(RegexpErrorIncompatible) is(incompat, false) @@ -151,18 +151,18 @@ func TestRegExp(t *testing.T) { func TestTransformRegExp(t *testing.T) { tt(t, func() { - pattern, err := TransformRegExp(`\s+abc\s+`, false) + pattern, err := TransformRegExp(`\s+abc\s+`, false, false) is(err, nil) is(pattern, `[`+WhitespaceChars+`]+abc[`+WhitespaceChars+`]+`) is(regexp.MustCompile(pattern).MatchString("\t abc def"), true) }) tt(t, func() { - pattern, err := TransformRegExp(`\u{1d306}`, false) + pattern, err := TransformRegExp(`\u{1d306}`, false, true) is(err, nil) is(pattern, `\x{1d306}`) }) tt(t, func() { - pattern, err := TransformRegExp(`\u1234`, false) + pattern, err := TransformRegExp(`\u1234`, false, false) is(err, nil) is(pattern, `\x{1234}`) }) @@ -173,7 +173,7 @@ func BenchmarkTransformRegExp(b *testing.B) { b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { - _, _ = TransformRegExp(reStr, false) + _, _ = TransformRegExp(reStr, false, false) } } diff --git a/regexp.go b/regexp.go index 3da1fc4..ca28ccc 100644 --- a/regexp.go +++ b/regexp.go @@ -67,7 +67,7 @@ type regexpPattern struct { regexp2Wrapper *regexp2Wrapper } -func compileRegexp2(src string, multiline, dotAll, ignoreCase bool) (*regexp2Wrapper, error) { +func compileRegexp2(src string, multiline, dotAll, ignoreCase, unicode bool) (*regexp2Wrapper, error) { var opts regexp2.RegexOptions = regexp2.ECMAScript if multiline { opts |= regexp2.Multiline @@ -78,6 +78,9 @@ func compileRegexp2(src string, multiline, dotAll, ignoreCase bool) (*regexp2Wra if ignoreCase { opts |= regexp2.IgnoreCase } + if unicode { + opts |= regexp2.Unicode + } regexp2Pattern, err1 := regexp2.Compile(src, opts) if err1 != nil { return nil, fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", src, err1) @@ -90,7 +93,7 @@ func (p *regexpPattern) createRegexp2() { if p.regexp2Wrapper != nil { return } - rx, err := compileRegexp2(p.src, p.multiline, p.dotAll, p.ignoreCase) + rx, err := compileRegexp2(p.src, p.multiline, p.dotAll, p.ignoreCase, p.unicode) if err != nil { // At this point the regexp should have been successfully converted to re2, if it fails now, it's a bug. panic(err) diff --git a/regexp_test.go b/regexp_test.go index 2daaae1..33c826d 100644 --- a/regexp_test.go +++ b/regexp_test.go @@ -721,6 +721,32 @@ func TestRegexpDotAll(t *testing.T) { } +func TestRegexpNumSeparators(t *testing.T) { + const SCRIPT = ` + const re = /(?<=a)\u{65}_/u; + assert(re.test("ae_") && !re.test("e_")); + + assert.throws(SyntaxError, () => { + new RegExp("(?<=a)\\u{6_5}", "u"); + }); + + assert.throws(SyntaxError, () => { + new RegExp("a\\u{6_5}", "u"); + }); + + ` + testScriptWithTestLib(SCRIPT, _undefined, t) +} + +func TestRegexpUnicodeEscape(t *testing.T) { + const SCRIPT = ` + assert.sameValue("u{0_2}".match(/\u{0_2}/)[0], "u{0_2}"); + assert.sameValue("uu\x02".match(/\u{2}/u)[0], '\x02'); + assert.sameValue("uu\x02".match(/\u{2}/)[0], "uu"); + ` + testScriptWithTestLib(SCRIPT, _undefined, t) +} + func BenchmarkRegexpSplitWithBackRef(b *testing.B) { const SCRIPT = ` "aaaaaaaaaaaaaaaaaaaaaaaaa++bbbbbbbbbbbbbbbbbbbbbb+-ccccccccccccccccccccccc".split(/([+-])\1/) diff --git a/runtime_test.go b/runtime_test.go index 7e91e17..0c67ad3 100644 --- a/runtime_test.go +++ b/runtime_test.go @@ -2979,6 +2979,31 @@ func TestDestructAssignToSymbol(t *testing.T) { testScriptWithTestLib(SCRIPT, _undefined, t) } +func TestToNumber(t *testing.T) { + const SCRIPT = ` + assert(isNaN(Number("+"))); + assert(isNaN(Number("++"))); + assert(isNaN(Number("-"))); + assert(isNaN(Number("0xfp1"))); + assert(isNaN(Number("0Xfp1"))); + assert(isNaN(Number("+0xfp1"))); + assert(isNaN(Number(" +0xfp1"))); + assert(isNaN(Number(" + 0xfp1"))); + assert(isNaN(Number(" 0xfp1"))); + assert(isNaN(Number("-0xfp1"))); + assert(isNaN(Number("- 0xfp1"))); + assert(isNaN(Number(" - 0xfp1"))); + assert.sameValue(Number("0."), 0); + assert.sameValue(Number(" "), 0); + assert.sameValue(Number(" Infinity"), Infinity); + + let a = [1]; + assert.sameValue(1, a.at("0xfp1")); + assert.sameValue(1, a.at(" 0xfp1")); + ` + testScriptWithTestLib(SCRIPT, _undefined, t) +} + /* func TestArrayConcatSparse(t *testing.T) { function foo(a,b,c) diff --git a/string_ascii.go b/string_ascii.go index f83b3d5..63c92ad 100644 --- a/string_ascii.go +++ b/string_ascii.go @@ -103,8 +103,8 @@ func stringToInt(ss string) (int64, error) { return strconv.ParseInt(ss, 10, 64) } -func (s asciiString) _toInt() (int64, error) { - return stringToInt(strings.TrimSpace(string(s))) +func (s asciiString) _toInt(trimmed string) (int64, error) { + return stringToInt(trimmed) } func isRangeErr(err error) bool { @@ -114,18 +114,36 @@ func isRangeErr(err error) bool { return false } -func (s asciiString) _toFloat() (float64, error) { - ss := strings.ToLower(strings.TrimSpace(string(s))) - if ss == "" { +func (s asciiString) _toFloat(trimmed string) (float64, error) { + if trimmed == "" { return 0, nil } - if ss == "-0" { + if trimmed == "-0" { var f float64 return -f, nil } - f, err := strconv.ParseFloat(ss, 64) + // Go allows underscores in numbers, when parsed as floats, but ECMAScript expect them to be interpreted as NaN. + if strings.ContainsRune(trimmed, '_') { + return 0, strconv.ErrSyntax + } + + // Hexadecimal floats are not supported by ECMAScript. + if len(trimmed) >= 2 { + var prefix string + if trimmed[0] == '-' || trimmed[0] == '+' { + prefix = trimmed[1:] + } else { + prefix = trimmed + } + if len(prefix) >= 2 && prefix[0] == '0' && (prefix[1] == 'x' || prefix[1] == 'X') { + return 0, strconv.ErrSyntax + } + } + + f, err := strconv.ParseFloat(trimmed, 64) if err == nil && math.IsInf(f, 0) { + ss := strings.ToLower(trimmed) if strings.HasPrefix(ss, "inf") || strings.HasPrefix(ss, "-inf") || strings.HasPrefix(ss, "+inf") { // We handle "Infinity" separately, prevent from being parsed as Infinity due to strconv.ParseFloat() permissive syntax return 0, strconv.ErrSyntax @@ -138,18 +156,19 @@ func (s asciiString) _toFloat() (float64, error) { } func (s asciiString) ToInteger() int64 { - if s == "" { + ss := strings.TrimSpace(string(s)) + if ss == "" { return 0 } - if s == "Infinity" || s == "+Infinity" { + if ss == "Infinity" || ss == "+Infinity" { return math.MaxInt64 } - if s == "-Infinity" { + if ss == "-Infinity" { return math.MinInt64 } - i, err := s._toInt() + i, err := s._toInt(ss) if err != nil { - f, err := s._toFloat() + f, err := s._toFloat(ss) if err == nil { return int64(f) } @@ -170,18 +189,19 @@ func (s asciiString) String() string { } func (s asciiString) ToFloat() float64 { - if s == "" { + ss := strings.TrimSpace(string(s)) + if ss == "" { return 0 } - if s == "Infinity" || s == "+Infinity" { + if ss == "Infinity" || ss == "+Infinity" { return math.Inf(1) } - if s == "-Infinity" { + if ss == "-Infinity" { return math.Inf(-1) } - f, err := s._toFloat() + f, err := s._toFloat(ss) if err != nil { - i, err := s._toInt() + i, err := s._toInt(ss) if err == nil { return float64(i) } @@ -195,21 +215,22 @@ func (s asciiString) ToBoolean() bool { } func (s asciiString) ToNumber() Value { - if s == "" { + ss := strings.TrimSpace(string(s)) + if ss == "" { return intToValue(0) } - if s == "Infinity" || s == "+Infinity" { + if ss == "Infinity" || ss == "+Infinity" { return _positiveInf } - if s == "-Infinity" { + if ss == "-Infinity" { return _negativeInf } - if i, err := s._toInt(); err == nil { + if i, err := s._toInt(ss); err == nil { return intToValue(i) } - if f, err := s._toFloat(); err == nil { + if f, err := s._toFloat(ss); err == nil { return floatToValue(f) } @@ -230,7 +251,7 @@ func (s asciiString) Equals(other Value) bool { } if o, ok := other.(valueInt); ok { - if o1, e := s._toInt(); e == nil { + if o1, e := s._toInt(strings.TrimSpace(string(s))); e == nil { return o1 == int64(o) } return false @@ -241,7 +262,7 @@ func (s asciiString) Equals(other Value) bool { } if o, ok := other.(valueBool); ok { - if o1, e := s._toFloat(); e == nil { + if o1, e := s._toFloat(strings.TrimSpace(string(s))); e == nil { return o1 == o.ToFloat() } return false diff --git a/tc39_test.go b/tc39_test.go index 026dfba..3982e2d 100644 --- a/tc39_test.go +++ b/tc39_test.go @@ -168,17 +168,6 @@ var ( "test/language/literals/string/S7.8.4_A4.3_T2.js": true, "test/language/literals/string/S7.8.4_A4.3_T1.js": true, - // integer separators - "test/language/expressions/object/cpn-obj-lit-computed-property-name-from-integer-separators.js": true, - "test/language/expressions/class/cpn-class-expr-accessors-computed-property-name-from-integer-separators.js": true, - "test/language/statements/class/cpn-class-decl-fields-computed-property-name-from-integer-separators.js": true, - "test/language/statements/class/cpn-class-decl-computed-property-name-from-integer-separators.js": true, - "test/language/statements/class/cpn-class-decl-accessors-computed-property-name-from-integer-separators.js": true, - "test/language/statements/class/cpn-class-decl-fields-methods-computed-property-name-from-integer-separators.js": true, - "test/language/expressions/class/cpn-class-expr-fields-computed-property-name-from-integer-separators.js": true, - "test/language/expressions/class/cpn-class-expr-computed-property-name-from-integer-separators.js": true, - "test/language/expressions/class/cpn-class-expr-fields-methods-computed-property-name-from-integer-separators.js": true, - // Regexp "test/language/literals/regexp/invalid-range-negative-lookbehind.js": true, "test/language/literals/regexp/invalid-range-lookbehind.js": true, @@ -237,7 +226,6 @@ var ( "Atomics.pause", "FinalizationRegistry", "WeakRef", - "numeric-separator-literal", "__getter__", "__setter__", "ShadowRealm",