diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index aa29186689..54747b0878 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -10,6 +10,7 @@ Semantic Versioning. ### Added +* Unicode 15.1 is now supported, including new CJK code points in identifiers. * Missing `break;` (or `return;` or `throw ...;` or `// fallthrough`) after a clause in `switch` statement now reports [E0427][] ("missing 'break;' or '// fallthrough' comment between statement and 'case'"). (Implemented by [Yash diff --git a/src/quick-lint-js/fe/lex-unicode-generated.cpp b/src/quick-lint-js/fe/lex-unicode-generated.cpp index a4f0bff99c..b826a9a7cc 100644 --- a/src/quick-lint-js/fe/lex-unicode-generated.cpp +++ b/src/quick-lint-js/fe/lex-unicode-generated.cpp @@ -468,7 +468,11 @@ const std::uint8_t Lexer::unicode_tables_chunks[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -584,7 +588,7 @@ const std::uint8_t Lexer::unicode_tables_chunks[] = { 0xe0, 0x00, 0x00, 0x00, 0xfe, 0xff, 0x3e, 0x1f, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xfe, 0xfe, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf7, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0xff, 0xff, 0xff, 0x0f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, @@ -614,7 +618,7 @@ const std::uint8_t Lexer::unicode_tables_chunks[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x00, 0xff, 0x03, 0xfe, 0xff, 0xff, 0x87, - 0xfe, 0xff, 0xff, 0x07, 0xc0, 0xff, 0xff, 0xff, + 0xfe, 0xff, 0xff, 0x07, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xfc, 0xfc, 0xfc, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -877,25 +881,25 @@ const Lexer::Unicode_Table_Chunk_Index_Type Lexer::identifier_start_chunk_indexe 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x71, 0x1f, 0x1f, 0x1f, 0x1f, + 0x01, 0x01, 0x01, 0x71, 0x01, 0x01, 0x72, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x01, 0x01, 0x72, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x01, 0x01, 0x73, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x73, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x74, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x74, + 0x01, 0x01, 0x01, 0x75, }; static_assert(Lexer::identifier_start_chunk_indexes_size == sizeof(Lexer::identifier_start_chunk_indexes)); const Lexer::Unicode_Table_Chunk_Index_Type Lexer::identifier_part_chunk_indexes[] = { - 0x75, 0x01, 0x02, 0x76, 0x77, 0x78, 0x79, 0x7a, - 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, - 0x83, 0x01, 0x11, 0x84, 0x13, 0x01, 0x14, 0x85, - 0x86, 0x87, 0x88, 0x89, 0x8a, 0x01, 0x01, 0x1c, - 0x8b, 0x1e, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x1f, 0x1f, 0x1f, 0x1f, 0x8c, 0x8d, 0x1f, 0x1f, - 0x8e, 0x23, 0x1f, 0x1f, 0x01, 0x01, 0x01, 0x01, + 0x76, 0x01, 0x02, 0x77, 0x78, 0x79, 0x7a, 0x7b, + 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, + 0x84, 0x01, 0x11, 0x85, 0x13, 0x01, 0x14, 0x86, + 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x01, 0x01, 0x1c, + 0x8c, 0x1e, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x1f, 0x1f, 0x1f, 0x1f, 0x8d, 0x8e, 0x1f, 0x1f, + 0x8f, 0x23, 0x1f, 0x1f, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x1b, 0x01, 0x01, @@ -909,8 +913,8 @@ const Lexer::Unicode_Table_Chunk_Index_Type Lexer::identifier_part_chunk_indexes 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x24, 0x01, 0x8f, 0x26, - 0x90, 0x91, 0x92, 0x93, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x24, 0x01, 0x90, 0x26, + 0x91, 0x92, 0x93, 0x94, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, @@ -920,21 +924,21 @@ const Lexer::Unicode_Table_Chunk_Index_Type Lexer::identifier_part_chunk_indexes 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x1f, 0x01, 0x2c, 0x94, 0x01, 0x2e, 0x95, 0x96, - 0x31, 0x97, 0x98, 0x99, 0x9a, 0x36, 0x01, 0x37, - 0x38, 0x39, 0x9b, 0x3b, 0x3c, 0x9c, 0x9d, 0x9e, - 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, - 0xa7, 0xa8, 0xa9, 0x1f, 0xaa, 0xab, 0xac, 0xad, + 0x1f, 0x01, 0x2c, 0x95, 0x01, 0x2e, 0x96, 0x97, + 0x31, 0x98, 0x99, 0x9a, 0x9b, 0x36, 0x01, 0x37, + 0x38, 0x39, 0x9c, 0x3b, 0x3c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0x1f, 0xab, 0xac, 0xad, 0xae, 0x01, 0x01, 0x01, 0x4f, 0x50, 0x51, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x52, - 0x01, 0x01, 0x01, 0x01, 0xae, 0x1f, 0x1f, 0x1f, + 0x01, 0x01, 0x01, 0x01, 0xaf, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x01, 0x01, 0x54, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x01, 0x01, 0xaf, 0xb0, 0x1f, 0x1f, 0x57, 0xb1, + 0x01, 0x01, 0xb0, 0xb1, 0x1f, 0x1f, 0x57, 0xb2, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x59, @@ -944,15 +948,15 @@ const Lexer::Unicode_Table_Chunk_Index_Type Lexer::identifier_part_chunk_indexes 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x5c, 0x01, 0x5d, 0x5e, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x1f, 0x1f, 0x1f, 0x1f, 0xb2, 0x1f, 0x1f, 0x1f, + 0x1f, 0x1f, 0x1f, 0x1f, 0xb3, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0xb3, - 0x1f, 0xb4, 0xb5, 0x1f, 0x60, 0x61, 0x62, 0xb6, - 0x1f, 0x1f, 0xb7, 0x1f, 0x1f, 0x1f, 0x1f, 0x64, - 0xb8, 0xb9, 0xba, 0x1f, 0xbb, 0x1f, 0x1f, 0x69, - 0xbc, 0xbd, 0x1f, 0x1f, 0x1f, 0x1f, 0x6c, 0x1f, + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0xb4, + 0x1f, 0xb5, 0xb6, 0x1f, 0x60, 0x61, 0x62, 0xb7, + 0x1f, 0x1f, 0xb8, 0x1f, 0x1f, 0x1f, 0x1f, 0x64, + 0xb9, 0xba, 0xbb, 0x1f, 0xbc, 0x1f, 0x1f, 0x69, + 0xbd, 0xbe, 0x1f, 0x1f, 0x1f, 0x1f, 0x6c, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x1f, 0x1f, 0x1f, 0xbe, 0x1f, 0x1f, 0x1f, 0x1f, + 0x1f, 0x1f, 0x1f, 0xbf, 0x1f, 0x1f, 0x1f, 0x1f, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, @@ -982,14 +986,14 @@ const Lexer::Unicode_Table_Chunk_Index_Type Lexer::identifier_part_chunk_indexes 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x71, 0x1f, 0x1f, 0x1f, 0x1f, + 0x01, 0x01, 0x01, 0x71, 0x01, 0x01, 0x72, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x01, 0x01, 0x72, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x01, 0x01, 0x73, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x73, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x74, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x74, 0x1f, 0x1f, 0x1f, 0x1f, + 0x01, 0x01, 0x01, 0x75, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, @@ -1337,7 +1341,7 @@ const Lexer::Unicode_Table_Chunk_Index_Type Lexer::identifier_part_chunk_indexes 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x1f, 0xbf, + 0x1f, 0xc0, }; static_assert(Lexer::identifier_part_chunk_indexes_size == sizeof(Lexer::identifier_part_chunk_indexes)); } diff --git a/test/test-lex-unicode.cpp b/test/test-lex-unicode.cpp index 76d68f3752..9f149b1994 100644 --- a/test/test-lex-unicode.cpp +++ b/test/test-lex-unicode.cpp @@ -24,11 +24,14 @@ std::string pretty(char32_t c) { } bool icu_data_is_valid() { - std::uint8_t minimum_unicode_version = 15; + std::uint8_t minimum_unicode_major_version = 15; + std::uint8_t minimum_unicode_minor_version = 1; UVersionInfo version; ::u_getUnicodeVersion(version); - if (version[0] >= minimum_unicode_version) { + if (version[0] > minimum_unicode_major_version || + (version[0] == minimum_unicode_major_version && + version[1] >= minimum_unicode_minor_version)) { return true; } @@ -37,9 +40,9 @@ bool icu_data_is_valid() { std::fprintf(stderr, "warning: The ICU library has data for Unicode version " "%u.%u.%u.%u, which is too old. Upgrade ICU to Unicode " - "version %u or newer. Skipping tests...\n", + "version %u.%u or newer. Skipping tests...\n", version[0], version[1], version[2], version[3], - minimum_unicode_version); + minimum_unicode_major_version, minimum_unicode_minor_version); did_log_warning = true; } return false; diff --git a/tools/generate-lex-unicode/index.js b/tools/generate-lex-unicode/index.js index f75ee38158..73280251d9 100755 --- a/tools/generate-lex-unicode/index.js +++ b/tools/generate-lex-unicode/index.js @@ -169,34 +169,34 @@ function isIDContinue(codePoint) { } let ID_START_CODE_POINTS = new Set( - require("@unicode/unicode-15.0.0/Binary_Property/ID_Start/code-points.js") + require("@unicode/unicode-15.1.0/Binary_Property/ID_Start/code-points.js") ); let ID_CONTINUE_CODE_POINTS = new Set( - require("@unicode/unicode-15.0.0/Binary_Property/ID_Continue/code-points.js") + require("@unicode/unicode-15.1.0/Binary_Property/ID_Continue/code-points.js") ); // Pattern_White_Space -// https://www.unicode.org/Public/11.0.0/ucd/PropList.txt +// https://www.unicode.org/Public/15.1.0/ucd/PropList.txt let PATTERN_WHITE_SPACE_CODE_POINTS = new Set([ 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x0020, 0x0085, 0x200e, 0x200f, 0x2028, 0x2029, ]); // Other_ID_Start -// https://www.unicode.org/Public/11.0.0/ucd/PropList.txt +// https://www.unicode.org/Public/15.1.0/ucd/PropList.txt let OTHER_ID_START_CODE_POINTS = new Set([ 0x1885, 0x1886, 0x2118, 0x212e, 0x309b, 0x309c, ]); // Other_ID_Continue -// https://www.unicode.org/Public/11.0.0/ucd/PropList.txt +// https://www.unicode.org/Public/15.1.0/ucd/PropList.txt let OTHER_ID_CONTINUE_CODE_POINTS = new Set([ 0x00b7, 0x0387, 0x1369, 0x136a, 0x136b, 0x136c, 0x136d, 0x136e, 0x136f, - 0x1370, 0x1371, 0x19da, + 0x1370, 0x1371, 0x19da, 0x200c, 0x200d, 0x30fb, 0xff65, ]); // Pattern_Syntax -// https://www.unicode.org/Public/11.0.0/ucd/PropList.txt +// https://www.unicode.org/Public/15.1.0/ucd/PropList.txt let PATTERN_SYNTAX_CODE_POINTS = new Set([ 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x003a, 0x003b, 0x003c, @@ -537,6 +537,8 @@ function testIsIDStart() { assert.ok(isIDStart(0x309b)); // KATAKANA-HIRAGANA VOICED SOUND MARK assert.ok(isIDStart(0x309c)); // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + assert.ok(isIDStart(0x2ee5d)); + // Pattern_Syntax (disallowed) for (let codePoint of PATTERN_SYNTAX_CODE_POINTS) { assert.ok(!isIDStart(codePoint), codePoint.toString(16)); @@ -618,8 +620,6 @@ function testIsIDContinue() { assert.ok(!isIDContinue(0x20a0)); // EURO-CURRENCY SIGN (Sc) assert.ok(!isIDContinue(0x0024)); // DOLLAR SIGN (Sc) assert.ok(!isIDContinue(0x20dd)); // COMBINING ENCLOSING CIRCLE (Me) - assert.ok(!isIDContinue(0x200c)); // ZERO WIDTH NON-JOINER (Cf) - assert.ok(!isIDContinue(0x200d)); // ZERO WIDTH JOINER (Cf) assert.ok(!isIDContinue(0x202c)); // POP DIRECTIONAL FORMATTING (Cf) } diff --git a/tools/generate-lex-unicode/package.json b/tools/generate-lex-unicode/package.json index 639cb10e48..e2737cdd28 100644 --- a/tools/generate-lex-unicode/package.json +++ b/tools/generate-lex-unicode/package.json @@ -3,7 +3,8 @@ "fmt": "prettier --write '*.js' '*.json'" }, "dependencies": { - "@unicode/unicode-15.0.0": "*" + "@unicode/unicode-15.0.0": "*", + "@unicode/unicode-15.1.0": "*" }, "devDependencies": { "prettier": "^2.8.4" diff --git a/tools/generate-lex-unicode/yarn.lock b/tools/generate-lex-unicode/yarn.lock index 4be648b2ae..417416b1ce 100644 --- a/tools/generate-lex-unicode/yarn.lock +++ b/tools/generate-lex-unicode/yarn.lock @@ -3,9 +3,14 @@ "@unicode/unicode-15.0.0@*": - version "1.3.1" - resolved "https://registry.yarnpkg.com/@unicode/unicode-15.0.0/-/unicode-15.0.0-1.3.1.tgz#6d2d9b4ac73a5227122ede626a7977556decf81d" - integrity sha512-zxm5Cx0v9vGxFOM8tVuArWHxxJTk+stiLA+ZHKt2mJO3HHmM6uN8OFcDGuvcix3MqguQ75am0XvpUgEz4P4vFw== + version "1.5.2" + resolved "https://registry.yarnpkg.com/@unicode/unicode-15.0.0/-/unicode-15.0.0-1.5.2.tgz#5350ad022050ca5e165f8352d9f971d418c99deb" + integrity sha512-PepMvMxf9j4sp4bZn7W9JJoMxynHk66ZPCsx6n3v47T5vmM+qfIy0z1MMU+EDmRZr2cvs1aT9ZwUEMRPVXR23g== + +"@unicode/unicode-15.1.0@*": + version "1.5.2" + resolved "https://registry.yarnpkg.com/@unicode/unicode-15.1.0/-/unicode-15.1.0-1.5.2.tgz#0358d05ab99a7d05c12c68cc2123cf124c34beb9" + integrity sha512-7PAgnShDr8ziK6XeHB/TUVFboDFEhaQKKyrw55/Kx9o6AQDy1s7dJ9KRpRerW9nrR5qMGUQvOqTXOAek6ZIXkg== prettier@^2.8.4: version "2.8.4"