Skip to content

Commit

Permalink
Fix line number tracking for invalid tagged template tokens + minor p…
Browse files Browse the repository at this point in the history
…erf. improvement to template parsing
  • Loading branch information
adams85 committed Feb 21, 2024
1 parent 26b0754 commit c0d0668
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 33 deletions.
4 changes: 0 additions & 4 deletions src/Acornima/Parser.Expression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -478,8 +478,6 @@ private static bool IsLocalVariableAccess(Expression expr)
case Identifier:
return true;

// Original acornjs implementation doesn't handle the ParenthesizedExpression case.
// TODO: report bug
case ParenthesizedExpression parenthesizedExpression:
expr = parenthesizedExpression.Expression;
continue;
Expand All @@ -504,8 +502,6 @@ private static bool IsPrivateFieldAccess(Expression expr)
expr = chainExpression.Expression;
continue;

// Original acornjs implementation doesn't handle the ParenthesizedExpression case.
// TODO: report bug
case ParenthesizedExpression parenthesizedExpression:
expr = parenthesizedExpression.Expression;
continue;
Expand Down
6 changes: 1 addition & 5 deletions src/Acornima/Parser.Statement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ private bool IsLet(StatementContext context = StatementContext.Default)
// is allowed. However, `let [` is an explicit negative lookahead for
// ExpressionStatement, so special-case it first.

if (nextCh is '[' or '\\') // TODO: Acorn comment says '/' - report bug
if (nextCh is '[' or '\\')
{
return true;
}
Expand Down Expand Up @@ -1965,10 +1965,6 @@ private Expression ParseModuleExportName()

private ArrayList<Statement> ParseDirectivePrologue(bool allowStrictDirective)
{
// NOTE: Original acornjs implementation of strict mode detection is fragile and buggy at the moment
// (e.g.: `() => { 'a'[0]; 'use strict'; 00 }` is rejected while valid).
// TODO: report bug

if (_tokenizerOptions._ecmaVersion < EcmaVersion.ES5)
{
return new ArrayList<Statement>();
Expand Down
75 changes: 51 additions & 24 deletions src/Acornima/Tokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,7 @@ private bool ReadString(int quote)
{
// https://github.com/acornjs/acorn/blob/8.11.3/acorn/src/tokenize.js > `pp.readString = function`

Unsafe.SkipInit(out bool normalizeRaw);
_legacyOctalPosition = -1;
AcquireStringBuilder(out var sb);
try
Expand All @@ -994,7 +995,7 @@ private bool ReadString(int quote)
{
case '\\':
sb.Append(_input, chunkStart, _position - chunkStart);
if (ReadEscapedChar(sb, inTemplate: false) is null)
if (ReadEscapedChar(sb, inTemplate: false, ref normalizeRaw) is null)
{
return false;
}
Expand Down Expand Up @@ -1071,8 +1072,9 @@ private bool TryReadTemplateToken()

_inTemplateElement = true;

var success = ReadTemplateToken(out var invalidTemplate)
&& (!invalidTemplate || ReadInvalidTemplateToken());
var normalizeRaw = false;
var success = ReadTemplateToken(ref normalizeRaw, out var invalidTemplate)
&& (!invalidTemplate || ReadInvalidTemplateToken(ref normalizeRaw));

_inTemplateElement = false;

Expand Down Expand Up @@ -1100,7 +1102,7 @@ private void InvalidStringToken(int pos, string message)
Raise(pos, message);
}

private bool ReadTemplateToken(out bool invalidTemplate)
private bool ReadTemplateToken(ref bool normalizeRaw, out bool invalidTemplate)
{
// https://github.com/acornjs/acorn/blob/8.11.3/acorn/src/tokenize.js > `pp.readTmplToken = function`

Expand Down Expand Up @@ -1138,13 +1140,13 @@ private bool ReadTemplateToken(out bool invalidTemplate)
var templateCooked = DeduplicateString(value, ref _stringPool, NonIdentifierDeduplicationThreshold);

sb.Clear();
var templateRaw = DeduplicateString(ReadTemplateRaw(sb), ref _stringPool, NonIdentifierDeduplicationThreshold);
var templateRaw = DeduplicateString(ReadTemplateRaw(sb, normalizeRaw), ref _stringPool, NonIdentifierDeduplicationThreshold);

return FinishToken(TokenType.Template, new TemplateValue(templateCooked, templateRaw));

case '\\':
sb.Append(_input, chunkStart, _position - chunkStart);
if (ReadEscapedChar(sb, inTemplate: true) is null)
if (ReadEscapedChar(sb, inTemplate: true, ref normalizeRaw) is null)
{
invalidTemplate = true;
return true;
Expand All @@ -1153,6 +1155,8 @@ private bool ReadTemplateToken(out bool invalidTemplate)
break;

case '\r':
normalizeRaw = true;

++_position;
sb.Append(_input, chunkStart, _position - chunkStart);
sb[sb.Length - 1] = '\n';
Expand Down Expand Up @@ -1188,11 +1192,11 @@ private bool ReadTemplateToken(out bool invalidTemplate)
}

// Reads a template token to search for the end, without validating any escape sequences
private bool ReadInvalidTemplateToken()
private bool ReadInvalidTemplateToken(ref bool normalizeRaw)
{
// https://github.com/acornjs/acorn/blob/8.11.3/acorn/src/tokenize.js > `pp.readInvalidTemplateToken = function`

for (int ch; (ch = CharCodeAtPosition()) >= 0; _position++)
for (int ch; (ch = CharCodeAtPosition()) >= 0;)
{
switch (ch)
{
Expand All @@ -1210,45 +1214,67 @@ private bool ReadInvalidTemplateToken()
goto case '`';

case '`':
// Original acornjs implementation doesn't normalize line endings in invalid raw strings.
// TODO: report bug

AcquireStringBuilder(out var sb);
try
{
var templateRaw = DeduplicateString(ReadTemplateRaw(sb), ref _stringPool, NonIdentifierDeduplicationThreshold);
var templateRaw = DeduplicateString(ReadTemplateRaw(sb, normalizeRaw), ref _stringPool, NonIdentifierDeduplicationThreshold);

return FinishToken(TokenType.InvalidTemplate, new TemplateValue(null, templateRaw));
}
finally { ReleaseStringBuilder(ref sb); }

case '\r':
normalizeRaw = true;

if (CharCodeAtPosition(1) == '\n')
{
++_position;
}

goto case '\n';

case '\n':
case '\u2028' or '\u2029':
++_position;
++_currentLine;
_lineStart = _position;
continue;
}

_position++;
}

return Raise<bool>(_start, "Unterminated template");
}

private ReadOnlySpan<char> ReadTemplateRaw(StringBuilder sb)
private ReadOnlySpan<char> ReadTemplateRaw(StringBuilder sb, bool normalizeRaw)
{
var chunkStart = _start;
for (int index; (index = _input.IndexOf('\r', chunkStart, _position - chunkStart)) >= 0;)
if (normalizeRaw)
{
sb.Append(_input, chunkStart, index - chunkStart).Append('\n');
chunkStart = index + 1;
if (_input.CharCodeAt(index + 1) == '\n')
for (int index; (index = _input.IndexOf('\r', chunkStart, _position - chunkStart)) >= 0;)
{
chunkStart++;
sb.Append(_input, chunkStart, index - chunkStart).Append('\n');
chunkStart = index + 1;
if (_input.CharCodeAt(index + 1) == '\n')
{
chunkStart++;
}
}
}

return chunkStart == _start
? _input.SliceBetween(chunkStart, _position)
: sb.Append(_input, chunkStart, _position - chunkStart).ToString().AsSpan();
if (chunkStart != _start)
{
return sb.Append(_input, chunkStart, _position - chunkStart).ToString().AsSpan();
}
}
return _input.SliceBetween(chunkStart, _position);
}

// Used to read escaped characters
private StringBuilder? ReadEscapedChar(StringBuilder sb, bool inTemplate)
private StringBuilder? ReadEscapedChar(StringBuilder sb, bool inTemplate, ref bool normalizeRaw)
{
// https://github.com/acornjs/acorn/blob/8.11.3/acorn/src/tokenize.js > `pp.readEscapedChar = function`

++_position;
var ch = CharCodeAtPosition();
++_position;
Expand All @@ -1264,6 +1290,8 @@ private ReadOnlySpan<char> ReadTemplateRaw(StringBuilder sb)
case 'f': return sb.Append('\f');

case '\r':
normalizeRaw = true;

if (CharCodeAtPosition() == '\n') // '\r\n'
{
++_position;
Expand All @@ -1274,7 +1302,6 @@ private ReadOnlySpan<char> ReadTemplateRaw(StringBuilder sb)
case '\n':
// Unicode new line characters after \ get removed from output in both
// template literals and strings
// TODO: looks like LineStart and CurrentLine update is missing from Acorn - report bug
case '\u2028' or '\u2029':
++_currentLine;
_lineStart = _position;
Expand Down

0 comments on commit c0d0668

Please sign in to comment.