diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index ea615a3d8..d66939831 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -13,8 +13,10 @@ Semantic Versioning. * VS Code: You can now make quick-lint-js messages fun and insulting with the `quick-lint-js.snarky` setting (disabled by default). (Implemented by [vegerot][].) -* Using Greek question mark (;, U+037E) instead of a semicolon (;, U+003B) now - reports [E0457][] ("this is a Greek Question Mark, not a semicolon (';')"). +* Using Greek question mark (`;`, U+037E) instead of a semicolon (`;`, U+003B) + now reports [E0457][] ("this is a Greek Question Mark, not a semicolon + (';')"). This diagnostic is also reported for similar-looking characters like + `ǃ` (which should be `!`) and `﴾` (which should be `(`). * TypeScript: Decorators on abstract classes are now parsed. ([#1194][]) ### Fixed diff --git a/src/quick-lint-js/fe/expression.h b/src/quick-lint-js/fe/expression.h index 14486be95..bc7fddcf3 100644 --- a/src/quick-lint-js/fe/expression.h +++ b/src/quick-lint-js/fe/expression.h @@ -615,23 +615,19 @@ class Expression::Call final : public Expression { Source_Code_Span left_paren_span, const Char8 *span_end, std::optional optional_chaining_operator) : Expression(kind), - call_left_paren_begin_(left_paren_span.begin()), + call_left_paren_(left_paren_span), span_end_(span_end), children_(children), optional_chaining_operator_begin_( optional_chaining_operator.has_value() ? optional_chaining_operator->begin() : nullptr) { - QLJS_ASSERT(left_paren_span.size() == 1); if (optional_chaining_operator.has_value()) { QLJS_ASSERT(optional_chaining_operator->size() == 2); } } - Source_Code_Span left_paren_span() const { - return Source_Code_Span(this->call_left_paren_begin_, - this->call_left_paren_begin_ + 1); - } + Source_Code_Span left_paren_span() const { return this->call_left_paren_; } std::optional optional_chaining_operator_span() const { if (this->optional_chaining_operator_begin_ == nullptr) { @@ -641,7 +637,7 @@ class Expression::Call final : public Expression { this->optional_chaining_operator_begin_ + 2); } - const Char8 *call_left_paren_begin_; + Source_Code_Span call_left_paren_; const Char8 *span_end_; Expression_Arena::Array_Ptr children_; const Char8 *optional_chaining_operator_begin_ = nullptr; @@ -857,15 +853,11 @@ class Expression::Non_Null_Assertion final : public Expression { static constexpr Expression_Kind kind = Expression_Kind::Non_Null_Assertion; explicit Non_Null_Assertion(Expression *child, Source_Code_Span bang_span) - : Expression(kind), bang_end_(bang_span.end()), child_(child) { - QLJS_ASSERT(same_pointers(this->bang_span(), bang_span)); - } + : Expression(kind), bang_(bang_span), child_(child) {} - Source_Code_Span bang_span() const { - return Source_Code_Span(this->bang_end_ - 1, this->bang_end_); - } + Source_Code_Span bang_span() const { return this->bang_; } - const Char8 *bang_end_; + Source_Code_Span bang_; Expression *child_; }; static_assert(Expression_Arena::is_allocatable); @@ -895,16 +887,12 @@ class Expression::Optional final : public Expression { static constexpr Expression_Kind kind = Expression_Kind::Optional; explicit Optional(Expression *child, Source_Code_Span question_span) - : Expression(kind), child_(child), question_end_(question_span.end()) { - QLJS_ASSERT(question_span.end() - question_span.begin() == 1); - } + : Expression(kind), child_(child), question_(question_span) {} - Source_Code_Span question_span() const { - return Source_Code_Span(this->question_end_ - 1, this->question_end_); - } + Source_Code_Span question_span() const { return this->question_; } Expression *child_; - const Char8 *question_end_; + Source_Code_Span question_; }; static_assert(Expression_Arena::is_allocatable); @@ -924,25 +912,26 @@ class Expression::Paren_Empty final : public Expression { public: static constexpr Expression_Kind kind = Expression_Kind::Paren_Empty; - explicit Paren_Empty(Source_Code_Span span) : Expression(kind), span_(span) {} + explicit Paren_Empty(Source_Code_Span left_paren_span, + Source_Code_Span right_paren_span) + : Expression(kind), + left_paren_(left_paren_span), + right_paren_(right_paren_span) {} - Source_Code_Span left_paren_span() const { - return Source_Code_Span(this->span_.begin(), this->span_.begin() + 1); - } + Source_Code_Span left_paren_span() const { return this->left_paren_; } - Source_Code_Span right_paren_span() const { - return Source_Code_Span(this->span_.end() - 1, this->span_.end()); - } + Source_Code_Span right_paren_span() const { return this->right_paren_; } void report_missing_expression_error(Diag_Reporter *reporter) { reporter->report(Diag_Missing_Expression_Between_Parentheses{ - .left_paren_to_right_paren = this->span_, + .left_paren_to_right_paren = this->span(), .left_paren = this->left_paren_span(), .right_paren = this->right_paren_span(), }); } - Source_Code_Span span_; + Source_Code_Span left_paren_; + Source_Code_Span right_paren_; }; static_assert(Expression_Arena::is_allocatable); @@ -1072,16 +1061,12 @@ class Expression::Trailing_Comma final : public Expression { explicit Trailing_Comma(Expression_Arena::Array_Ptr children, Source_Code_Span comma_span) - : Expression(kind), children_(children), comma_end_(comma_span.end()) { - QLJS_ASSERT(comma_span.end() == comma_span.begin() + 1); - } + : Expression(kind), children_(children), comma_(comma_span) {} - Source_Code_Span comma_span() const { - return Source_Code_Span(this->comma_end_ - 1, this->comma_end_); - } + Source_Code_Span comma_span() const { return this->comma_; } Expression_Arena::Array_Ptr children_; - const Char8 *comma_end_; + Source_Code_Span comma_; }; class Expression::Type_Annotated final : public Expression { @@ -1093,23 +1078,18 @@ class Expression::Type_Annotated final : public Expression { const Char8 *span_end) : Expression(kind), child_(child), - colon_(colon_span.begin()), + colon_(colon_span), type_visits_(std::move(type_visits)), - span_end_(span_end) { - QLJS_ASSERT(*colon_span.begin() == u8':'); - QLJS_ASSERT(colon_span.size() == 1); - } + span_end_(span_end) {} - Source_Code_Span colon_span() const { - return Source_Code_Span(this->colon_, this->colon_ + 1); - } + Source_Code_Span colon_span() const { return this->colon_; } void visit_type_annotation(Parse_Visitor_Base &v) { std::move(this->type_visits_).move_into(v); } Expression *child_; - const Char8 *colon_; + Source_Code_Span colon_; Buffering_Visitor type_visits_{nullptr}; const Char8 *span_end_; }; @@ -1451,19 +1431,22 @@ inline Source_Code_Span Expression::span() const { case Expression_Kind::Non_Null_Assertion: { auto *assertion = expression_cast(this); return Source_Code_Span(assertion->child_->span().begin(), - assertion->bang_end_); + assertion->bang_.end()); } case Expression_Kind::Object: return expression_cast(this)->span_; case Expression_Kind::Optional: { auto *optional = expression_cast(this); return Source_Code_Span(optional->child_->span().begin(), - optional->question_end_); + optional->question_.end()); } case Expression_Kind::Paren: return expression_cast(this)->span_; - case Expression_Kind::Paren_Empty: - return expression_cast(this)->span_; + case Expression_Kind::Paren_Empty: { + auto *paren_empty = expression_cast(this); + return Source_Code_Span(paren_empty->left_paren_.begin(), + paren_empty->right_paren_.end()); + } case Expression_Kind::Private_Variable: return expression_cast(this) ->variable_identifier_.span(); @@ -1489,7 +1472,7 @@ inline Source_Code_Span Expression::span() const { case Expression_Kind::Trailing_Comma: { auto *comma = expression_cast(this); return Source_Code_Span(comma->children_.front()->span().begin(), - comma->comma_end_); + comma->comma_.end()); } case Expression_Kind::Type_Annotated: { auto *annotated = expression_cast(this); diff --git a/src/quick-lint-js/fe/lex.cpp b/src/quick-lint-js/fe/lex.cpp index bc24d7a90..95799d41c 100644 --- a/src/quick-lint-js/fe/lex.cpp +++ b/src/quick-lint-js/fe/lex.cpp @@ -93,16 +93,113 @@ constexpr char32_t right_double_quote = U'\u201d'; struct Confusable_Symbol { char32_t confusable; - Char8 confusable_name[20]; + Char8 confusable_name[51]; Char8 symbol; - Char8 symbol_name[20]; + Char8 symbol_name[21]; Token_Type symbol_token_type; }; Confusable_Symbol confusable_symbols[] = { - {0x037e, u8"Greek Question Mark", u8';', u8"semicolon", - Token_Type::semicolon}, - // TODO(strager): Add more. + // clang-format off + { 0x037e, u8"Greek Question Mark", u8';', u8"semicolon", Token_Type::semicolon}, + + { 0x02d0, u8"Modifier Letter Triangular Colon", u8':', u8"colon", Token_Type::colon}, + { 0x02f8, u8"Modifier Letter Raised Colon", u8':', u8"colon", Token_Type::colon}, + { 0x0589, u8"Armenian Full Stop", u8':', u8"colon", Token_Type::colon}, + { 0x05c3, u8"Hebrew Punctuation Sof Pasuq", u8':', u8"colon", Token_Type::colon}, + { 0x0703, u8"Syriac Supralinear Colon", u8':', u8"colon", Token_Type::colon}, + { 0x0704, u8"Syriac Sublinear Colon", u8':', u8"colon", Token_Type::colon}, + { 0x0903, u8"Devanagari Sign Visarga", u8':', u8"colon", Token_Type::colon}, + { 0x0a83, u8"Gujarati Sign Visarga", u8':', u8"colon", Token_Type::colon}, + { 0x16ec, u8"Runic Multiple Punctuation", u8':', u8"colon", Token_Type::colon}, + { 0x1803, u8"Mongolian Full Stop", u8':', u8"colon", Token_Type::colon}, + { 0x1809, u8"Mongolian Manchu Full Stop", u8':', u8"colon", Token_Type::colon}, + { 0x205a, u8"Two Dot Punctuation", u8':', u8"colon", Token_Type::colon}, + { 0x2236, u8"Ratio", u8':', u8"colon", Token_Type::colon}, + { 0xa4fd, u8"Lisu Letter Tone Mya Jeu", u8':', u8"colon", Token_Type::colon}, + { 0xa789, u8"Modifier Letter Colon", u8':', u8"colon", Token_Type::colon}, + { 0xfe30, u8"Presentation Form For Vertical Two Dot Leader", u8':', u8"colon", Token_Type::colon}, + { 0xff1a, u8"Fullwidth Colon", u8':', u8"colon", Token_Type::colon}, + + { 0x00b8, u8"Cedilla", u8',', u8"comma", Token_Type::comma}, + { 0x060d, u8"Arabic Date Separator", u8',', u8"comma", Token_Type::comma}, + { 0x066b, u8"Arabic Decimal Separator", u8',', u8"comma", Token_Type::comma}, + { 0x201a, u8"Single Low-9 Quotation Mark", u8',', u8"comma", Token_Type::comma}, + { 0xa4f9, u8"Lisu Letter Tone Na Po", u8',', u8"comma", Token_Type::comma}, + + { 0x01c3, u8"Latin Letter Retroflex Click", u8'!', u8"exclamation mark", Token_Type::bang}, + { 0x2d51, u8"Tifinagh Letter Tuareg Yang", u8'!', u8"exclamation mark", Token_Type::bang}, + { 0xff01, u8"Fullwidth Exclamation Mark", u8'!', u8"exclamation mark", Token_Type::bang}, + + // TODO(strager): Also match symbols like "․․․". + { 0x0660, u8"Arabic-Indic Digit Zero", u8'.', u8"dot", Token_Type::dot}, + { 0x06f0, u8"Extended Arabic-Indic Digit Zero", u8'.', u8"dot", Token_Type::dot}, + { 0x0701, u8"Syriac Supralinear Full Stop", u8'.', u8"dot", Token_Type::dot}, + { 0x0702, u8"Syriac Sublinear Full Stop", u8'.', u8"dot", Token_Type::dot}, + { 0x2024, u8"One Dot Leader", u8'.', u8"dot", Token_Type::dot}, + { 0xa4f8, u8"Lisu Letter Tone Mya Ti", u8'.', u8"dot", Token_Type::dot}, + { 0xa60e, u8"Vai Full Stop", u8'.', u8"dot", Token_Type::dot}, + {0x10a50, u8"Kharoshthi Punctuation Dot", u8'.', u8"dot", Token_Type::dot}, + {0x1d16d, u8"Musical Symbol Combining Augmentation Dot", u8'.', u8"dot", Token_Type::dot}, + + // NOTE(strager): We diverge from Unicode here. Unicode considers a few of these as parentheses. + { 0x2772, u8"Light Left Tortoise Shell Bracket Ornament", u8'(', u8"left square bracket", Token_Type::left_square}, + { 0x2773, u8"Light Right Tortoise Shell Bracket Ornament", u8')', u8"right square bracket", Token_Type::right_square}, + { 0x3014, u8"Left Tortoise Shell Bracket", u8'(', u8"left square bracket", Token_Type::left_square}, + { 0x3015, u8"Right Tortoise Shell Bracket", u8')', u8"right square bracket", Token_Type::right_square}, + { 0xff3b, u8"Fullwidth Left Square Bracket", u8'(', u8"left square bracket", Token_Type::left_square}, + { 0xff3d, u8"Fullwidth Right Square Bracket", u8')', u8"right square bracket", Token_Type::right_square}, + + { 0x2768, u8"Medium Left Parenthesis Ornament", u8'(', u8"left parenthesis", Token_Type::left_paren}, + { 0x2769, u8"Medium Right Parenthesis Ornament", u8')', u8"right parenthesis", Token_Type::right_paren}, + { 0xfd3e, u8"Ornate Left Parenthesis", u8'(', u8"left parenthesis", Token_Type::left_paren}, + { 0xfd3f, u8"Ornate Right Parenthesis", u8')', u8"right parenthesis", Token_Type::right_paren}, + + { 0x2774, u8"Medium Left Curly Bracket Ornament", u8'{', u8"left curly bracket", Token_Type::left_curly}, + { 0x2775, u8"Medium Right Curly Bracket Ornament", u8'}', u8"right curly bracket", Token_Type::right_curly}, + {0x1d114, u8"Musical Symbol Brace", u8'{', u8"left curly bracket", Token_Type::left_curly}, + + // TODO(strager): Also match symbols like "ꝸ=" and "᐀᐀". + // NOTE(strager): 0x0294 is legal in identifiers. + { 0x0294, u8"Latin Letter Glottal Stop", u8'?', u8"question mark", Token_Type::question}, + { 0x0241, u8"Latin Capital Letter Glottal Stop", u8'?', u8"question mark", Token_Type::question}, + { 0x097d, u8"Devanagari Letter Glottal Stop", u8'?', u8"question mark", Token_Type::question}, + { 0x13ae, u8"Cherokee Letter He", u8'?', u8"question mark", Token_Type::question}, + { 0xa6eb, u8"Bamum Letter Ntuu", u8'?', u8"question mark", Token_Type::question}, + + { 0xa778, u8"Latin Small Letter Um", u8'&', u8"ampersand", Token_Type::ampersand}, + + { 0x066d, u8"Arabic Five Pointed Star", u8'*', u8"asterisk", Token_Type::star}, + { 0x204e, u8"Low Asterisk", u8'*', u8"asterisk", Token_Type::star}, + { 0x2217, u8"Asterisk Operator", u8'*', u8"asterisk", Token_Type::star}, + {0x1031f, u8"Old Italic Letter Ess", u8'*', u8"asterisk", Token_Type::star}, + + { 0x02c4, u8"Modifier Letter Up Arrowhead", u8'^', u8"circumflex", Token_Type::circumflex}, + { 0x02c6, u8"Modifier Letter Circumflex Accent", u8'^', u8"circumflex", Token_Type::circumflex}, + + { 0x02c2, u8"Modifier Letter Left Arrowhead", u8'<', u8"less than", Token_Type::less}, + { 0x1438, u8"Canadian Syllabics Pa", u8'<', u8"less than", Token_Type::less}, + { 0x16b2, u8"Runic Letter Kauna", u8'<', u8"less than", Token_Type::less}, + { 0x2039, u8"Single Left-Pointing Angle Quotation Mark", u8'<', u8"less than", Token_Type::less}, + { 0x276e, u8"Heavy Left-Pointing Angle Quotation Mark Ornament", u8'<', u8"less than", Token_Type::less}, + {0x1d236, u8"Greek Instrumental Notation Symbol-40", u8'<', u8"less than", Token_Type::less}, + + { 0x02c3, u8"Modifier Letter Right Arrowhead", u8'>', u8"greater than", Token_Type::greater}, + { 0x1433, u8"Canadian Syllabics Po", u8'>', u8"greater than", Token_Type::greater}, + { 0x203a, u8"Single Right-Pointing Angle Quotation Mark", u8'>', u8"greater than", Token_Type::greater}, + { 0x276f, u8"Heavy Right-Pointing Angle Quotation Mark Ornament", u8'>', u8"greater than", Token_Type::greater}, + {0x16f3f, u8"Miao Letter Archaic Zza", u8'>', u8"greater than", Token_Type::greater}, + {0x1d237, u8"Greek Instrumental Notation Symbol-42", u8'>', u8"greater than", Token_Type::greater}, + + { 0x02dc, u8"Small Tilde", u8'~', u8"tilde", Token_Type::tilde}, + { 0x1fc0, u8"Greek Perispomeni", u8'~', u8"tilde", Token_Type::tilde}, + { 0x2053, u8"Swung Dash", u8'~', u8"tilde", Token_Type::tilde}, + { 0x223c, u8"Tilde Operator", u8'~', u8"tilde", Token_Type::tilde}, + + { 0x1400, u8"Canadian Syllabics Hyphen", u8'=', u8"equals", Token_Type::equal}, + { 0x2e40, u8"Double Hyphen", u8'=', u8"equals", Token_Type::equal}, + { 0x30a0, u8"Katakana-Hiragana Double Hyphen", u8'=', u8"equals", Token_Type::equal}, + { 0xa4ff, u8"Lisu Punctuation Full Stop", u8'=', u8"equals", Token_Type::equal}, }; bool look_up_in_unicode_table(const std::uint8_t* table, std::size_t table_size, diff --git a/src/quick-lint-js/fe/parse-expression.cpp b/src/quick-lint-js/fe/parse-expression.cpp index da00833c2..07f595fba 100644 --- a/src/quick-lint-js/fe/parse-expression.cpp +++ b/src/quick-lint-js/fe/parse-expression.cpp @@ -563,8 +563,8 @@ Expression* Parser::parse_primary_expression(Parse_Visitor_Base& v, // () => {} Source_Code_Span right_paren_span = this->peek().span(); this->skip(); - return this->make_expression( - Source_Code_Span(left_paren_span.begin(), right_paren_span.end())); + return this->make_expression(left_paren_span, + right_paren_span); } // (x) => {} @@ -2581,7 +2581,7 @@ Expression* Parser::parse_arrow_function_expression_remainder( paren_empty->report_missing_expression_error(this->diag_reporter_); } else { // () => {} - parameter_list_begin = paren_empty->span_.begin(); + parameter_list_begin = paren_empty->left_paren_.begin(); } break; } diff --git a/test/test-parse-expression.cpp b/test/test-parse-expression.cpp index 4710e1cd4..4abaddfcb 100644 --- a/test/test-parse-expression.cpp +++ b/test/test-parse-expression.cpp @@ -3834,6 +3834,69 @@ TEST_F(Test_Parse_Expression, precedence) { } } } + +TEST_F(Test_Parse_Expression, confusable_symbols) { + // In a previous version of quick-lint-js, confusable symbols would cause + // assertion failures during parsing. + + { + Test_Parser p(u8"f﴾)"_sv, capture_diags); + Expression* ast = p.parse_expression(); + EXPECT_EQ(ast->kind(), Expression_Kind::Call); + p.assert_offsets(static_cast(ast)->left_paren_span(), + u8"f"_sv.size(), u8"f﴾"_sv.size()); + } + + { + Test_Parser p(u8"(foo ʔ)"_sv, capture_diags); + Expression* ast = p.parse_expression(); + EXPECT_EQ(ast->without_paren()->kind(), Expression_Kind::Optional); + p.assert_offsets(static_cast(ast->without_paren()) + ->question_span(), + u8"(foo "_sv.size(), u8"(foo ʔ"_sv.size()); + } + + { + Test_Parser p(u8"(x ։ y)"_sv, capture_diags); + Expression* ast = p.parse_expression(); + EXPECT_EQ(ast->without_paren()->kind(), Expression_Kind::Type_Annotated); + p.assert_offsets( + static_cast(ast->without_paren()) + ->colon_span(), + u8"(x "_sv.size(), u8"(x ։"_sv.size()); + } + + { + Test_Parser p(u8"foo ǃ"_sv, capture_diags); + Expression* ast = p.parse_expression(); + EXPECT_EQ(ast->kind(), Expression_Kind::Non_Null_Assertion); + p.assert_offsets( + static_cast(ast)->bang_span(), + u8"foo "_sv.size(), u8"foo ǃ"_sv.size()); + } + + { + Test_Parser p(u8"(foo ‚)"_sv, capture_diags); + Expression* ast = p.parse_expression(); + EXPECT_EQ(ast->without_paren()->kind(), Expression_Kind::Trailing_Comma); + p.assert_offsets( + static_cast(ast->without_paren()) + ->comma_span(), + u8"(foo "_sv.size(), u8"(foo ‚"_sv.size()); + } + + { + Test_Parser p(u8"﴾﴿"_sv, capture_diags); + Expression* ast = p.parse_expression(); + EXPECT_EQ(ast->kind(), Expression_Kind::Paren_Empty); + p.assert_offsets( + static_cast(ast)->left_paren_span(), + u8""_sv.size(), u8"﴾"_sv.size()); + p.assert_offsets( + static_cast(ast)->right_paren_span(), + u8"﴾"_sv.size(), u8"﴾﴿"_sv.size()); + } +} } }