diff --git a/abb/abbLexer.g4 b/abb/abbLexer.g4 index 7885c064c2..05a31370a5 100644 --- a/abb/abbLexer.g4 +++ b/abb/abbLexer.g4 @@ -1,107 +1,257 @@ // $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false + + // $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine + + // $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true lexer grammar abbLexer; -options { - caseInsensitive = true; -} - -MODULE : 'module'; -ENDMODULE : 'endmodule'; -PROC : 'PROC'; -ENDPROC : 'ENDPROC'; -LOCAL : 'LOCAL'; -CONST : 'CONST'; -PERS : 'PERS'; -VAR : 'VAR'; -TOOLDATA : 'TOOLDATA'; -WOBJDATA : 'WOBJDATA'; -SPEEDDATA : 'SPEEDDATA'; -ZONEDATA : 'ZONEDATA'; -CLOCK : 'CLOCK'; -BOOL : 'BOOL'; -ON_CALL : '\\ON'; -OFF_CALL : '\\OFF'; - -SLASH : '/'; -EQUALS : ':='; -COMMA : ','; -CURLY_OPEN : '{'; -CURLY_CLOSE : '}'; -COLON : ':'; -SEMICOLON : ';'; -BRACKET_OPEN : '('; -BRACKET_CLOSE : ')'; -SQUARE_OPEN : '['; -SQUARE_CLOSE : ']'; -DOT : '.'; -DOUBLEDOT : '..'; -REL_BIGGER : '>'; -REL_BIGGER_OR_EQUAL : '>='; -REL_SMALLER : '<'; -REL_SMALLER_OR_EQUAL : '<='; -REL_EQUAL : '=='; -REL_NOTEQUAL : '<>'; -PLUS : '+'; -MINUS : '-'; -MULTIPLY : '*'; -PERCENT : '%'; -HASH : '#'; - -WS: (' ' | '\t' | '\u000C') -> skip; - -NEWLINE: '\r'? '\n'; - -LINE_COMMENT: '!' ~ ('\n' | '\r')* -> skip; - -BOOLLITERAL: 'FALSE' | 'TRUE'; - -CHARLITERAL: '\'' (EscapeSequence | ~ ('\'' | '\\' | '\r' | '\n')) '\''; - -STRINGLITERAL: '"' (EscapeSequence | ~ ('\\' | '"' | '\r' | '\n'))* '"'; - -fragment EscapeSequence: - '\\' ( - 'b' - | 't' - | 'n' - | 'f' - | 'r' - | '"' - | '\'' - | '\\' - | '0' .. '3' '0' .. '7' '0' .. '7' - | '0' .. '7' '0' .. '7' - | '0' .. '7' - ) -; - -FLOATLITERAL: - ('0' .. '9')+ '.' ('0' .. '9')* Exponent? - | '.' ('0' .. '9')+ Exponent? - | ('0' .. '9')+ Exponent -; - -fragment Exponent: 'E' ('+' | '-')? ('0' .. '9')+; - -INTLITERAL: ('0' .. '9')+ | HexPrefix HexDigit+ HexSuffix | BinPrefix BinDigit+ BinSuffix; - -fragment HexPrefix: '\'' 'H'; - -fragment HexDigit: '0' .. '9' | 'A' .. 'F'; - -fragment HexSuffix: '\''; - -fragment BinPrefix: '\'' 'B'; - -fragment BinDigit: '0' | '1'; - -fragment BinSuffix: '\''; - -IDENTIFIER: IdentifierStart IdentifierPart*; - -fragment IdentifierStart: 'A' .. 'Z' | '_'; - -fragment IdentifierPart: IdentifierStart | '0' .. '9'; \ No newline at end of file + +options { caseInsensitive = true; } +MODULE + : 'module' + ; + +ENDMODULE + : 'endmodule' + ; + +PROC + : 'PROC' + ; + +ENDPROC + : 'ENDPROC' + ; + +LOCAL + : 'LOCAL' + ; + +CONST + : 'CONST' + ; + +PERS + : 'PERS' + ; + +VAR + : 'VAR' + ; + +TOOLDATA + : 'TOOLDATA' + ; + +WOBJDATA + : 'WOBJDATA' + ; + +SPEEDDATA + : 'SPEEDDATA' + ; + +ZONEDATA + : 'ZONEDATA' + ; + +CLOCK + : 'CLOCK' + ; + +BOOL + : 'BOOL' + ; + +ON_CALL + : '\\ON' + ; + +OFF_CALL + : '\\OFF' + ; + +SLASH + : '/' + ; + +EQUALS + : ':=' + ; + +COMMA + : ',' + ; + +CURLY_OPEN + : '{' + ; + +CURLY_CLOSE + : '}' + ; + +COLON + : ':' + ; + +SEMICOLON + : ';' + ; + +BRACKET_OPEN + : '(' + ; + +BRACKET_CLOSE + : ')' + ; + +SQUARE_OPEN + : '[' + ; + +SQUARE_CLOSE + : ']' + ; + +DOT + : '.' + ; + +DOUBLEDOT + : '..' + ; + +REL_BIGGER + : '>' + ; + +REL_BIGGER_OR_EQUAL + : '>=' + ; + +REL_SMALLER + : '<' + ; + +REL_SMALLER_OR_EQUAL + : '<=' + ; + +REL_EQUAL + : '==' + ; + +REL_NOTEQUAL + : '<>' + ; + +PLUS + : '+' + ; + +MINUS + : '-' + ; + +MULTIPLY + : '*' + ; + +PERCENT + : '%' + ; + +HASH + : '#' + ; + +WS + : (' ' | '\t' | '\u000C') -> skip + ; + +NEWLINE + : '\r'? '\n' + ; + +LINE_COMMENT + : '!' ~ ('\n' | '\r')* -> skip + ; + +BOOLLITERAL + : 'FALSE' + | 'TRUE' + ; + +CHARLITERAL + : '\'' (EscapeSequence | ~ ('\'' | '\\' | '\r' | '\n')) '\'' + ; + +STRINGLITERAL + : '"' (EscapeSequence | ~ ('\\' | '"' | '\r' | '\n'))* '"' + ; + +fragment EscapeSequence + : '\\' ('b' | 't' | 'n' | 'f' | 'r' | '"' | '\'' | '\\' | '0' .. '3' '0' .. '7' '0' .. '7' | '0' .. '7' '0' .. '7' | '0' .. '7') + ; + +FLOATLITERAL + : ('0' .. '9')+ '.' ('0' .. '9')* Exponent? + | '.' ('0' .. '9')+ Exponent? + | ('0' .. '9')+ Exponent + ; + +fragment Exponent + : 'E' ('+' | '-')? ('0' .. '9')+ + ; + +INTLITERAL + : ('0' .. '9')+ + | HexPrefix HexDigit+ HexSuffix + | BinPrefix BinDigit+ BinSuffix + ; + +fragment HexPrefix + : '\'' 'H' + ; + +fragment HexDigit + : '0' .. '9' + | 'A' .. 'F' + ; + +fragment HexSuffix + : '\'' + ; + +fragment BinPrefix + : '\'' 'B' + ; + +fragment BinDigit + : '0' + | '1' + ; + +fragment BinSuffix + : '\'' + ; + +IDENTIFIER + : IdentifierStart IdentifierPart* + ; + +fragment IdentifierStart + : 'A' .. 'Z' + | '_' + ; + +fragment IdentifierPart + : IdentifierStart + | '0' .. '9' + ; + diff --git a/antlr/antlr2/ANTLRv2Lexer.g4 b/antlr/antlr2/ANTLRv2Lexer.g4 index 1f63cd1099..4b04be6ecf 100644 --- a/antlr/antlr2/ANTLRv2Lexer.g4 +++ b/antlr/antlr2/ANTLRv2Lexer.g4 @@ -27,405 +27,777 @@ */ // $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false -// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine -// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true - -lexer grammar ANTLRv2Lexer; - -options { - superClass = LexerAdaptor; -} - -channels { - OFF_CHANNEL -} - -tokens { - DOC_COMMENT, - PARSER, - LEXER, - RULE, - BLOCK, - OPTIONAL, - CLOSURE, - POSITIVE_CLOSURE, - SYNPRED, - RANGE, - CHAR_RANGE, - EPSILON, - ALT, - EOR, - EOB, - EOA, // end of alt - ID, - ARG, - ARGLIST, - RET, - LEXER_GRAMMAR, - PARSER_GRAMMAR, - TREE_GRAMMAR, - COMBINED_GRAMMAR, - INITACTION, - LABEL, // $x used in rewrite rules - TEMPLATE, - SCOPE, - SEMPRED, - GATED_SEMPRED, // {p}? => - SYN_SEMPRED, // (...) => it's a manually-specified synpred converted to sempred - BACKTRACK_SEMPRED, // auto backtracking mode syn pred converted to sempred - FRAGMENT, - TREE_BEGIN, - ROOT, - BANG, - RANGE, - REWRITE, - ACTION_CONTENT -} - -DOC_COMMENT: '/**' .*? ('*/' | EOF) -> channel(OFF_CHANNEL); - -SL_COMMENT: '//' ~ [\r\n]* -> channel(OFF_CHANNEL); - -ML_COMMENT: '/*' .*? '*/' -> channel(OFF_CHANNEL); - -INT: '0' .. '9'+; - -CHAR_LITERAL: '\'' LITERAL_CHAR '\''; - -fragment LITERAL_CHAR: ESC | ~ ('\'' | '\\'); - -STRING_LITERAL: '"' LIT_STR* '"'; - -fragment LIT_STR: ESC | ~ ('\\' | '"'); - -fragment ESC: - '\\' ( - 'n' - | 'r' - | 't' - | 'b' - | 'f' - | '"' - | '\'' - | '\\' - | '>' - | 'u' XDIGIT XDIGIT XDIGIT XDIGIT - | OctDigit (OctDigit OctDigit?)? - | . - ) -; - -fragment XDIGIT: '0' .. '9' | 'a' .. 'f' | 'A' .. 'F'; - -BEGIN_ARGUMENT: LBrack { this.handleBeginArgument(); }; - -BEGIN_ACTION: LBrace -> pushMode (Actionx); - -OPTIONS: 'options' -> pushMode (Options); - -TOKENS: 'tokens' -> pushMode (Tokens); - -HEADER : 'header'; -CLASS : 'class'; -EXTENDS : 'extends'; -LEXCLASS : 'lexclass'; -TREEPARSER : 'treeparser'; -EXCEPTION : 'exception'; -CATCH : 'catch'; -FINALLY : 'finally'; -FRAGMENT : 'fragment'; -GRAMMAR : 'grammar'; -LEXER : 'Lexer'; -PARSER : 'Parser'; -PRIVATE : 'private'; -PROTECTED : 'protected'; -PUBLIC : 'public'; -RETURNS : 'returns'; -SCOPE : 'scope'; -THROWS : 'throws'; -TREE : 'tree'; -fragment WS_LOOP : (WS | SL_COMMENT | ML_COMMENT)*; -OPEN_ELEMENT_OPTION : Lt; -CLOSE_ELEMENT_OPTION : Gt; -AT : At; -BANG : '!'; -COLON : Colon; -COLONCOLON : DColon; -COMMA : Comma; -DOT : Dot; -EQUAL : Equal; -LBRACE : LBrace; -LBRACK : LBrack; -LPAREN : LParen; -OR : Pipe; -PLUS : Plus; -QM : Question; -RANGE : Range; -RBRACE : RBrace; -RBRACK : RBrack; -REWRITE : RArrow; -ROOT : '^'; -RPAREN : RParen; -SEMI : Semi; -SEMPREDOP : '=>'; -STAR : Star; -TREE_BEGIN : '^('; -DOLLAR : Dollar; -PEQ : PlusAssign; -NOT : Tilde; - -WS: (' ' | '\t' | '\r'? '\n')+ -> channel(OFF_CHANNEL); - -TOKEN_REF: 'A' .. 'Z' ('a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9')*; - -RULE_REF: 'a' .. 'z' ('a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9')*; - -// ====================================================== -// Lexer fragments -// -// ----------------------------------- -// Whitespace & Comments - -fragment Ws: Hws | Vws; - -fragment Hws: [ \t]; - -fragment Vws: [\r\n\f]; - -fragment BlockComment: '/*' .*? ('*/' | EOF); - -fragment DocComment: '/**' .*? ('*/' | EOF); - -fragment LineComment: '//' ~ [\r\n]*; - -// ----------------------------------- -// Escapes -// Any kind of escaped character that we can embed within ANTLR literal strings. - -fragment EscSeq: Esc ([btnfr"'\\] | UnicodeEsc | OctEsc | . | EOF); - -fragment EscAny: Esc .; - -fragment UnicodeEsc: 'u' (HexDigit (HexDigit (HexDigit HexDigit?)?)?)?; - -fragment OctEsc: OctDigit (OctDigit OctDigit?)?; - -// ----------------------------------- -// Numerals - -fragment DecimalNumeral: '0' | [1-9] DecDigit*; -// ----------------------------------- -// Digits - -fragment HexDigit: [0-9a-fA-F]; - -fragment DecDigit: [0-9]; - -fragment OctDigit: [0-7]; - -// ----------------------------------- -// Literals - -fragment BoolLiteral: 'true' | 'false'; - -fragment CharLiteral: SQuote (EscSeq | ~ ['\r\n\\]) SQuote; - -fragment SQuoteLiteral: SQuote (EscSeq | ~ ['\r\n\\])* SQuote; - -fragment DQuoteLiteral: DQuote (EscSeq | ~ ["\r\n\\])* DQuote; - -fragment USQuoteLiteral: SQuote (EscSeq | ~ ['\r\n\\])*; -// ----------------------------------- -// Character ranges - -fragment NameChar: - NameStartChar - | '0' .. '9' - | Underscore - | '\u00B7' - | '\u0300' .. '\u036F' - | '\u203F' .. '\u2040' -; - -fragment NameStartChar: - 'A' .. 'Z' - | 'a' .. 'z' - | '\u00C0' .. '\u00D6' - | '\u00D8' .. '\u00F6' - | '\u00F8' .. '\u02FF' - | '\u0370' .. '\u037D' - | '\u037F' .. '\u1FFF' - | '\u200C' .. '\u200D' - | '\u2070' .. '\u218F' - | '\u2C00' .. '\u2FEF' - | '\u3001' .. '\uD7FF' - | '\uF900' .. '\uFDCF' - | '\uFDF0' .. '\uFFFD' -; - -// ignores | ['\u10000-'\uEFFFF] ; - -// ----------------------------------- -// Types - -fragment Int: 'int'; - -// ----------------------------------- -// Symbols -fragment Esc : '\\'; -fragment Colon : ':'; -fragment DColon : '::'; -fragment SQuote : '\''; -fragment DQuote : '"'; -fragment LParen : '('; -fragment RParen : ')'; -fragment LBrace : '{'; -fragment RBrace : '}'; -fragment LBrack : '['; -fragment RBrack : ']'; -fragment RArrow : '->'; -fragment Lt : '<'; -fragment Gt : '>'; -fragment Equal : '='; -fragment Question : '?'; -fragment Star : '*'; -fragment Plus : '+'; -fragment PlusAssign : '+='; -fragment Underscore : '_'; -fragment Pipe : '|'; -fragment Dollar : '$'; -fragment Comma : ','; -fragment Semi : ';'; -fragment Dot : '.'; -fragment Range : '..'; -fragment At : '@'; -fragment Pound : '#'; -fragment Tilde : '~'; - -// ====================================================== -// Lexer modes -// ------------------------- -// Arguments -mode Argument; -// E.g., [int x, List a[]] -NESTED_ARGUMENT: LBrack -> type (ARGUMENT_CONTENT), pushMode (Argument); - -ARGUMENT_ESCAPE: EscAny -> type (ARGUMENT_CONTENT); - -ARGUMENT_STRING_LITERAL: DQuoteLiteral -> type (ARGUMENT_CONTENT); +// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine -ARGUMENT_CHAR_LITERAL: SQuoteLiteral -> type (ARGUMENT_CONTENT); -END_ARGUMENT: RBrack { this.handleEndArgument(); }; -// added this to return non-EOF token type here. EOF does something weird +// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true -UNTERMINATED_ARGUMENT: EOF -> popMode; +lexer grammar ANTLRv2Lexer; -ARGUMENT_CONTENT: .; -// ------------------------- -// Actions -// -// Many language targets use {} as block delimiters and so we -// must recursively match {} delimited blocks to balance the -// braces. Additionally, we must make some assumptions about -// literal string representation in the target language. We assume -// that they are delimited by ' or " and so consume these -// in their own alts so as not to inadvertantly match {}. +options { superClass = LexerAdaptor; } +channels { OFF_CHANNEL } +tokens { DOC_COMMENT , PARSER , LEXER , RULE , BLOCK , OPTIONAL , CLOSURE , POSITIVE_CLOSURE , SYNPRED , RANGE , CHAR_RANGE , EPSILON , ALT , EOR , EOB , EOA , // end of alt +ID , ARG , ARGLIST , RET , LEXER_GRAMMAR , PARSER_GRAMMAR , TREE_GRAMMAR , COMBINED_GRAMMAR , INITACTION , LABEL , // $x used in rewrite rules +TEMPLATE , SCOPE , SEMPRED , GATED_SEMPRED , // {p}? => +SYN_SEMPRED , // (...) => it's a manually-specified synpred converted to sempred +BACKTRACK_SEMPRED , // auto backtracking mode syn pred converted to sempred +FRAGMENT , TREE_BEGIN , ROOT , BANG , RANGE , REWRITE , ACTION_CONTENT } +DOC_COMMENT + : '/**' .*? ('*/' | EOF) -> channel (OFF_CHANNEL) + ; + +SL_COMMENT + : '//' ~ [\r\n]* -> channel (OFF_CHANNEL) + ; + +ML_COMMENT + : '/*' .*? '*/' -> channel (OFF_CHANNEL) + ; + +INT + : '0' .. '9'+ + ; + +CHAR_LITERAL + : '\'' LITERAL_CHAR '\'' + ; + +fragment LITERAL_CHAR + : ESC + | ~ ('\'' | '\\') + ; + +STRING_LITERAL + : '"' LIT_STR* '"' + ; + +fragment LIT_STR + : ESC + | ~ ('\\' | '"') + ; + +fragment ESC + : '\\' ('n' | 'r' | 't' | 'b' | 'f' | '"' | '\'' | '\\' | '>' | 'u' XDIGIT XDIGIT XDIGIT XDIGIT | OctDigit (OctDigit OctDigit?)? | .) + ; + +fragment XDIGIT + : '0' .. '9' + | 'a' .. 'f' + | 'A' .. 'F' + ; + +BEGIN_ARGUMENT + : LBrack + { this.handleBeginArgument(); } + ; + +BEGIN_ACTION + : LBrace -> pushMode (Actionx) + ; + +OPTIONS + : 'options' -> pushMode (Options) + ; + +TOKENS + : 'tokens' -> pushMode (Tokens) + ; + +HEADER + : 'header' + ; + +CLASS + : 'class' + ; + +EXTENDS + : 'extends' + ; + +LEXCLASS + : 'lexclass' + ; + +TREEPARSER + : 'treeparser' + ; + +EXCEPTION + : 'exception' + ; + +CATCH + : 'catch' + ; + +FINALLY + : 'finally' + ; + +FRAGMENT + : 'fragment' + ; + +GRAMMAR + : 'grammar' + ; + +LEXER + : 'Lexer' + ; + +PARSER + : 'Parser' + ; + +PRIVATE + : 'private' + ; + +PROTECTED + : 'protected' + ; + +PUBLIC + : 'public' + ; + +RETURNS + : 'returns' + ; -mode Actionx; -NESTED_ACTION: LBrace -> type (ACTION_CONTENT), pushMode (Actionx); +SCOPE + : 'scope' + ; -ACTION_ESCAPE: EscAny -> type (ACTION_CONTENT); +THROWS + : 'throws' + ; -ACTION_STRING_LITERAL: DQuoteLiteral -> type (ACTION_CONTENT); +TREE + : 'tree' + ; -ACTION_CHAR_LITERAL: SQuoteLiteral -> type (ACTION_CONTENT); +fragment WS_LOOP + : (WS | SL_COMMENT | ML_COMMENT)* + ; -ACTION_DOC_COMMENT: DocComment -> type (ACTION_CONTENT); +OPEN_ELEMENT_OPTION + : Lt + ; -ACTION_BLOCK_COMMENT: BlockComment -> type (ACTION_CONTENT); +CLOSE_ELEMENT_OPTION + : Gt + ; -ACTION_LINE_COMMENT: LineComment -> type (ACTION_CONTENT); +AT + : At + ; -END_ACTION: RBrace { this.handleEndAction(); }; +BANG + : '!' + ; -UNTERMINATED_ACTION: EOF -> popMode; +COLON + : Colon + ; -ACTION_CONTENT: .; +COLONCOLON + : DColon + ; -// ------------------------- +COMMA + : Comma + ; + +DOT + : Dot + ; + +EQUAL + : Equal + ; + +LBRACE + : LBrace + ; + +LBRACK + : LBrack + ; + +LPAREN + : LParen + ; + +OR + : Pipe + ; + +PLUS + : Plus + ; + +QM + : Question + ; + +RANGE + : Range + ; + +RBRACE + : RBrace + ; + +RBRACK + : RBrack + ; + +REWRITE + : RArrow + ; + +ROOT + : '^' + ; + +RPAREN + : RParen + ; + +SEMI + : Semi + ; + +SEMPREDOP + : '=>' + ; + +STAR + : Star + ; + +TREE_BEGIN + : '^(' + ; + +DOLLAR + : Dollar + ; + +PEQ + : PlusAssign + ; + +NOT + : Tilde + ; + +WS + : (' ' | '\t' | '\r'? '\n')+ -> channel (OFF_CHANNEL) + ; + +TOKEN_REF + : 'A' .. 'Z' ('a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9')* + ; + +RULE_REF + : 'a' .. 'z' ('a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9')* + ; + // ====================================================== + + // Lexer fragments + + // + + // ----------------------------------- + + // Whitespace & Comments + +fragment Ws + : Hws + | Vws + ; + +fragment Hws + : [ \t] + ; + +fragment Vws + : [\r\n\f] + ; + +fragment BlockComment + : '/*' .*? ('*/' | EOF) + ; + +fragment DocComment + : '/**' .*? ('*/' | EOF) + ; + +fragment LineComment + : '//' ~ [\r\n]* + ; + // ----------------------------------- + + // Escapes + + // Any kind of escaped character that we can embed within ANTLR literal strings. + +fragment EscSeq + : Esc ([btnfr"'\\] | UnicodeEsc | OctEsc | . | EOF) + ; + +fragment EscAny + : Esc . + ; + +fragment UnicodeEsc + : 'u' (HexDigit (HexDigit (HexDigit HexDigit?)?)?)? + ; + +fragment OctEsc + : OctDigit (OctDigit OctDigit?)? + ; + // ----------------------------------- + + // Numerals + +fragment DecimalNumeral + : '0' + | [1-9] DecDigit* + ; + // ----------------------------------- + + // Digits + +fragment HexDigit + : [0-9a-fA-F] + ; + +fragment DecDigit + : [0-9] + ; + +fragment OctDigit + : [0-7] + ; + // ----------------------------------- + + // Literals + +fragment BoolLiteral + : 'true' + | 'false' + ; + +fragment CharLiteral + : SQuote (EscSeq | ~ ['\r\n\\]) SQuote + ; + +fragment SQuoteLiteral + : SQuote (EscSeq | ~ ['\r\n\\])* SQuote + ; + +fragment DQuoteLiteral + : DQuote (EscSeq | ~ ["\r\n\\])* DQuote + ; + +fragment USQuoteLiteral + : SQuote (EscSeq | ~ ['\r\n\\])* + ; + // ----------------------------------- + + // Character ranges + +fragment NameChar + : NameStartChar + | '0' .. '9' + | Underscore + | '\u00B7' + | '\u0300' .. '\u036F' + | '\u203F' .. '\u2040' + ; + +fragment NameStartChar + : 'A' .. 'Z' + | 'a' .. 'z' + | '\u00C0' .. '\u00D6' + | '\u00D8' .. '\u00F6' + | '\u00F8' .. '\u02FF' + | '\u0370' .. '\u037D' + | '\u037F' .. '\u1FFF' + | '\u200C' .. '\u200D' + | '\u2070' .. '\u218F' + | '\u2C00' .. '\u2FEF' + | '\u3001' .. '\uD7FF' + | '\uF900' .. '\uFDCF' + | '\uFDF0' .. '\uFFFD' + ; + // ignores | ['\u10000-'\uEFFFF] ; + + // ----------------------------------- + + // Types + +fragment Int + : 'int' + ; + // ----------------------------------- + + // Symbols + +fragment Esc + : '\\' + ; + +fragment Colon + : ':' + ; + +fragment DColon + : '::' + ; + +fragment SQuote + : '\'' + ; + +fragment DQuote + : '"' + ; + +fragment LParen + : '(' + ; + +fragment RParen + : ')' + ; + +fragment LBrace + : '{' + ; + +fragment RBrace + : '}' + ; + +fragment LBrack + : '[' + ; + +fragment RBrack + : ']' + ; + +fragment RArrow + : '->' + ; + +fragment Lt + : '<' + ; + +fragment Gt + : '>' + ; + +fragment Equal + : '=' + ; + +fragment Question + : '?' + ; + +fragment Star + : '*' + ; + +fragment Plus + : '+' + ; + +fragment PlusAssign + : '+=' + ; + +fragment Underscore + : '_' + ; + +fragment Pipe + : '|' + ; + +fragment Dollar + : '$' + ; + +fragment Comma + : ',' + ; + +fragment Semi + : ';' + ; + +fragment Dot + : '.' + ; + +fragment Range + : '..' + ; + +fragment At + : '@' + ; + +fragment Pound + : '#' + ; + +fragment Tilde + : '~' + ; + // ====================================================== + + // Lexer modes + + // ------------------------- + + // Arguments + +mode Argument; +// E.g., [int x, List a[]] +NESTED_ARGUMENT + : LBrack -> type (ARGUMENT_CONTENT) , pushMode (Argument) + ; + +ARGUMENT_ESCAPE + : EscAny -> type (ARGUMENT_CONTENT) + ; + +ARGUMENT_STRING_LITERAL + : DQuoteLiteral -> type (ARGUMENT_CONTENT) + ; + +ARGUMENT_CHAR_LITERAL + : SQuoteLiteral -> type (ARGUMENT_CONTENT) + ; + +END_ARGUMENT + : RBrack + { this.handleEndArgument(); } + ; + // added this to return non-EOF token type here. EOF does something weird + +UNTERMINATED_ARGUMENT + : EOF -> popMode + ; + +ARGUMENT_CONTENT + : . + ; + // ------------------------- + + // Actions + + // + + // Many language targets use {} as block delimiters and so we + + // must recursively match {} delimited blocks to balance the + + // braces. Additionally, we must make some assumptions about + + // literal string representation in the target language. We assume + + // that they are delimited by ' or " and so consume these + + // in their own alts so as not to inadvertantly match {}. + +mode Actionx; +NESTED_ACTION + : LBrace -> type (ACTION_CONTENT) , pushMode (Actionx) + ; + +ACTION_ESCAPE + : EscAny -> type (ACTION_CONTENT) + ; + +ACTION_STRING_LITERAL + : DQuoteLiteral -> type (ACTION_CONTENT) + ; + +ACTION_CHAR_LITERAL + : SQuoteLiteral -> type (ACTION_CONTENT) + ; + +ACTION_DOC_COMMENT + : DocComment -> type (ACTION_CONTENT) + ; + +ACTION_BLOCK_COMMENT + : BlockComment -> type (ACTION_CONTENT) + ; + +ACTION_LINE_COMMENT + : LineComment -> type (ACTION_CONTENT) + ; + +END_ACTION + : RBrace + { this.handleEndAction(); } + ; + +UNTERMINATED_ACTION + : EOF -> popMode + ; + +ACTION_CONTENT + : . + ; + // ------------------------- + mode Options; -OPT_DOC_COMMENT: DocComment -> type (DOC_COMMENT), channel (OFF_CHANNEL); - -OPT_BLOCK_COMMENT: BlockComment -> type (ML_COMMENT), channel (OFF_CHANNEL); - -OPT_LINE_COMMENT: LineComment -> type (SL_COMMENT), channel (OFF_CHANNEL); - -OPT_LBRACE: LBrace { this.handleOptionsLBrace(); }; - -OPT_RBRACE: RBrace -> type (RBRACE), popMode; - -OPT_ID: Id -> type (ID); - -OPT_DOT: Dot -> type (DOT); - -OPT_ASSIGN: Equal -> type (EQUAL); - -OPT_STRING_LITERAL: SQuoteLiteral -> type (CHAR_LITERAL); - -OPT_STRING_LITERAL2: DQuoteLiteral -> type (STRING_LITERAL); - -OPT_RANGE: Range -> type(RANGE); - -OPT_INT: DecimalNumeral -> type (INT); - -OPT_STAR: Star -> type (STAR); - -OPT_SEMI: Semi -> type (SEMI); - -OPT_WS: Ws+ -> type (WS), channel (OFF_CHANNEL); - -// ------------------------- - +OPT_DOC_COMMENT + : DocComment -> type (DOC_COMMENT) , channel (OFF_CHANNEL) + ; + +OPT_BLOCK_COMMENT + : BlockComment -> type (ML_COMMENT) , channel (OFF_CHANNEL) + ; + +OPT_LINE_COMMENT + : LineComment -> type (SL_COMMENT) , channel (OFF_CHANNEL) + ; + +OPT_LBRACE + : LBrace + { this.handleOptionsLBrace(); } + ; + +OPT_RBRACE + : RBrace -> type (RBRACE) , popMode + ; + +OPT_ID + : Id -> type (ID) + ; + +OPT_DOT + : Dot -> type (DOT) + ; + +OPT_ASSIGN + : Equal -> type (EQUAL) + ; + +OPT_STRING_LITERAL + : SQuoteLiteral -> type (CHAR_LITERAL) + ; + +OPT_STRING_LITERAL2 + : DQuoteLiteral -> type (STRING_LITERAL) + ; + +OPT_RANGE + : Range -> type (RANGE) + ; + +OPT_INT + : DecimalNumeral -> type (INT) + ; + +OPT_STAR + : Star -> type (STAR) + ; + +OPT_SEMI + : Semi -> type (SEMI) + ; + +OPT_WS + : Ws+ -> type (WS) , channel (OFF_CHANNEL) + ; + // ------------------------- + mode Tokens; -TOK_DOC_COMMENT: DocComment -> type (DOC_COMMENT), channel (OFF_CHANNEL); - -TOK_BLOCK_COMMENT: BlockComment -> type (ML_COMMENT), channel (OFF_CHANNEL); - -TOK_LINE_COMMENT: LineComment -> type (SL_COMMENT), channel (OFF_CHANNEL); - -TOK_LBRACE: LBrace -> type (LBRACE); - -TOK_RBRACE: RBrace -> type (RBRACE), popMode; - -TOK_ID: Id -> type (TOKEN_REF); - -TOK_EQ: Equal -> type (EQUAL); - -TOK_CL: '\'' LITERAL_CHAR '\'' -> type(CHAR_LITERAL); - -TOK_SL: '"' LIT_STR* '"' -> type(STRING_LITERAL); - -TOK_SEMI: Semi -> type (SEMI); - -TOK_RANGE: Range -> type(RANGE); - -TOK_WS: Ws+ -> type (WS), channel (OFF_CHANNEL); - -// ------------------------- - +TOK_DOC_COMMENT + : DocComment -> type (DOC_COMMENT) , channel (OFF_CHANNEL) + ; + +TOK_BLOCK_COMMENT + : BlockComment -> type (ML_COMMENT) , channel (OFF_CHANNEL) + ; + +TOK_LINE_COMMENT + : LineComment -> type (SL_COMMENT) , channel (OFF_CHANNEL) + ; + +TOK_LBRACE + : LBrace -> type (LBRACE) + ; + +TOK_RBRACE + : RBrace -> type (RBRACE) , popMode + ; + +TOK_ID + : Id -> type (TOKEN_REF) + ; + +TOK_EQ + : Equal -> type (EQUAL) + ; + +TOK_CL + : '\'' LITERAL_CHAR '\'' -> type (CHAR_LITERAL) + ; + +TOK_SL + : '"' LIT_STR* '"' -> type (STRING_LITERAL) + ; + +TOK_SEMI + : Semi -> type (SEMI) + ; + +TOK_RANGE + : Range -> type (RANGE) + ; + +TOK_WS + : Ws+ -> type (WS) , channel (OFF_CHANNEL) + ; + // ------------------------- + mode LexerCharSet; -LEXER_CHAR_SET_BODY: (~ [\]\\] | EscAny)+ -> more; - -LEXER_CHAR_SET: RBrack -> popMode; - -UNTERMINATED_CHAR_SET: EOF -> popMode; - -// ------------------------------------------------------------------------------ -// Grammar specific Keywords, Punctuation, etc. +LEXER_CHAR_SET_BODY + : (~ [\]\\] | EscAny)+ -> more + ; + +LEXER_CHAR_SET + : RBrack -> popMode + ; + +UNTERMINATED_CHAR_SET + : EOF -> popMode + ; + // ------------------------------------------------------------------------------ + + // Grammar specific Keywords, Punctuation, etc. + +fragment Id + : NameStartChar NameChar* + ; -fragment Id: NameStartChar NameChar*; \ No newline at end of file diff --git a/antlr/antlr3/ANTLRv3Lexer.g4 b/antlr/antlr3/ANTLRv3Lexer.g4 index 5571c2e0dd..f654f0a5e9 100644 --- a/antlr/antlr3/ANTLRv3Lexer.g4 +++ b/antlr/antlr3/ANTLRv3Lexer.g4 @@ -27,418 +27,756 @@ */ // $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false -// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine -// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true - -lexer grammar ANTLRv3Lexer; - -options { - superClass = LexerAdaptor; -} - -channels { - OFF_CHANNEL -} - -tokens { - DOC_COMMENT, - PARSER, - LEXER, - RULE, - BLOCK, - OPTIONAL, - CLOSURE, - POSITIVE_CLOSURE, - SYNPRED, - RANGE, - CHAR_RANGE, - EPSILON, - ALT, - EOR, - EOB, - EOA, // end of alt - ID, - ARG, - ARGLIST, - RET, - LEXER_GRAMMAR, - PARSER_GRAMMAR, - TREE_GRAMMAR, - COMBINED_GRAMMAR, - INITACTION, - LABEL, // $x used in rewrite rules - TEMPLATE, - SCOPE, - SEMPRED, - GATED_SEMPRED, // {p}? => - SYN_SEMPRED, // (...) => it's a manually-specified synpred converted to sempred - BACKTRACK_SEMPRED, // auto backtracking mode syn pred converted to sempred - FRAGMENT, - TREE_BEGIN, - ROOT, - BANG, - RANGE, - REWRITE, - ACTION_CONTENT -} - -DOC_COMMENT: '/**' .*? ('*/' | EOF) -> channel(OFF_CHANNEL); - -SL_COMMENT: '//' ~ [\r\n]* -> channel(OFF_CHANNEL); - -ML_COMMENT: '/*' .*? '*/' -> channel(OFF_CHANNEL); - -INT: '0' .. '9'+; - -CHAR_LITERAL: '\'' LITERAL_CHAR '\''; - -STRING_LITERAL: '\'' LITERAL_CHAR LITERAL_CHAR* '\''; - -fragment LITERAL_CHAR: ESC | ~ ('\'' | '\\'); - -// This seems to be available in Antlr3. - -DOUBLE_QUOTE_STRING_LITERAL: '"' (ESC | ~ ('\\' | '"'))* '"'; - -// This seems to be available in Antlr3. - -DOUBLE_ANGLE_STRING_LITERAL: '<<' .*? '>>'; - -fragment ESC: - '\\' ( - 'n' - | 'r' - | 't' - | 'b' - | 'f' - | '"' - | '\'' - | '\\' - | '>' - | 'u' XDIGIT XDIGIT XDIGIT XDIGIT - | . - ) -; - -fragment XDIGIT: '0' .. '9' | 'a' .. 'f' | 'A' .. 'F'; - -// ------------------------- -// Arguments -// -// Certain argument lists, such as those specifying call parameters -// to a rule invocation, or input parameters to a rule specification -// are contained within square brackets. - -BEGIN_ARGUMENT: LBrack { this.handleBeginArgument(); }; - -// ------------------------- -// Actions - -BEGIN_ACTION: LBrace -> pushMode (Actionx); - -// ------------------------- -// Keywords -// -// Keywords may not be used as labels for rules or in any other context where -// they would be ambiguous with the keyword vs some other identifier. OPTIONS, -// TOKENS, & CHANNELS blocks are handled idiomatically in dedicated lexical modes. - -OPTIONS: 'options' -> pushMode (Options); - -TOKENS: 'tokens' -> pushMode (Tokens); - -CATCH : 'catch'; -FINALLY : 'finally'; -FRAGMENT : 'fragment'; -GRAMMAR : 'grammar'; -LEXER : 'lexer'; -PARSER : 'parser'; -PRIVATE : 'private'; -PROTECTED : 'protected'; -PUBLIC : 'public'; -RETURNS : 'returns'; -SCOPE : 'scope'; -THROWS : 'throws'; -TREE : 'tree'; - -fragment WS_LOOP: (WS | SL_COMMENT | ML_COMMENT)*; - -//// ================================= - -AT : At; -BANG : '!'; -COLON : Colon; -COLONCOLON : DColon; -COMMA : Comma; -DOT : Dot; -EQUAL : Equal; -LBRACE : LBrace; -LBRACK : LBrack; -LPAREN : LParen; -OR : Pipe; -PLUS : Plus; -QM : Question; -RANGE : '..'; -RBRACE : RBrace; -RBRACK : RBrack; -REWRITE : RArrow; -ROOT : '^'; -RPAREN : RParen; -SEMI : Semi; -SEMPREDOP : '=>'; -STAR : Star; -TREE_BEGIN : '^('; -DOLLAR : Dollar; -PEQ : PlusAssign; -NOT : Tilde; - -WS: (' ' | '\t' | '\r'? '\n')+ -> channel(OFF_CHANNEL); - -TOKEN_REF: 'A' .. 'Z' ('a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9')*; - -RULE_REF: 'a' .. 'z' ('a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9')*; - -// ====================================================== -// Lexer fragments -// -// ----------------------------------- -// Whitespace & Comments - -fragment Ws: Hws | Vws; - -fragment Hws: [ \t]; - -fragment Vws: [\r\n\f]; - -fragment BlockComment: '/*' .*? ('*/' | EOF); - -fragment DocComment: '/**' .*? ('*/' | EOF); - -fragment LineComment: '//' ~ [\r\n]*; -// ----------------------------------- -// Escapes -// Any kind of escaped character that we can embed within ANTLR literal strings. - -fragment EscSeq: Esc ([btnfr"'\\] | UnicodeEsc | . | EOF); - -fragment EscAny: Esc .; - -fragment UnicodeEsc: 'u' (HexDigit (HexDigit (HexDigit HexDigit?)?)?)?; -// ----------------------------------- -// Numerals - -fragment DecimalNumeral: '0' | [1-9] DecDigit*; -// ----------------------------------- -// Digits - -fragment HexDigit: [0-9a-fA-F]; - -fragment DecDigit: [0-9]; -// ----------------------------------- -// Literals - -fragment BoolLiteral: 'true' | 'false'; - -fragment CharLiteral: SQuote (EscSeq | ~ ['\r\n\\]) SQuote; - -fragment SQuoteLiteral: SQuote (EscSeq | ~ ['\r\n\\])* SQuote; - -fragment DQuoteLiteral: DQuote (EscSeq | ~ ["\r\n\\])* DQuote; - -fragment USQuoteLiteral: SQuote (EscSeq | ~ ['\r\n\\])*; -// ----------------------------------- -// Character ranges - -fragment NameChar: - NameStartChar - | '0' .. '9' - | Underscore - | '\u00B7' - | '\u0300' .. '\u036F' - | '\u203F' .. '\u2040' -; - -fragment NameStartChar: - 'A' .. 'Z' - | 'a' .. 'z' - | '\u00C0' .. '\u00D6' - | '\u00D8' .. '\u00F6' - | '\u00F8' .. '\u02FF' - | '\u0370' .. '\u037D' - | '\u037F' .. '\u1FFF' - | '\u200C' .. '\u200D' - | '\u2070' .. '\u218F' - | '\u2C00' .. '\u2FEF' - | '\u3001' .. '\uD7FF' - | '\uF900' .. '\uFDCF' - | '\uFDF0' .. '\uFFFD' -; - -// ignores | ['\u10000-'\uEFFFF] ; - -// ----------------------------------- -// Types -fragment Int: 'int'; -// ----------------------------------- -// Symbols - -fragment Esc : '\\'; -fragment Colon : ':'; -fragment DColon : '::'; - -fragment SQuote: '\''; - -fragment DQuote: '"'; - -fragment LParen : '('; -fragment RParen : ')'; -fragment LBrace : '{'; -fragment RBrace : '}'; -fragment LBrack : '['; -fragment RBrack : ']'; -fragment RArrow : '->'; - -fragment Lt: '<'; - -fragment Gt: '>'; +// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine -fragment Equal : '='; -fragment Question : '?'; -fragment Star : '*'; -fragment Plus : '+'; -fragment PlusAssign : '+='; -fragment Underscore : '_'; -fragment Pipe : '|'; -fragment Dollar : '$'; -fragment Comma : ','; -fragment Semi : ';'; -fragment Dot : '.'; -fragment Range : '..'; -fragment At : '@'; -fragment Pound: '#'; +// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true -fragment Tilde: '~'; +lexer grammar ANTLRv3Lexer; -// ====================================================== -// Lexer modes -// ------------------------- -// Arguments +options { superClass = LexerAdaptor; } +channels { OFF_CHANNEL } +tokens { DOC_COMMENT , PARSER , LEXER , RULE , BLOCK , OPTIONAL , CLOSURE , POSITIVE_CLOSURE , SYNPRED , RANGE , CHAR_RANGE , EPSILON , ALT , EOR , EOB , EOA , // end of alt +ID , ARG , ARGLIST , RET , LEXER_GRAMMAR , PARSER_GRAMMAR , TREE_GRAMMAR , COMBINED_GRAMMAR , INITACTION , LABEL , // $x used in rewrite rules +TEMPLATE , SCOPE , SEMPRED , GATED_SEMPRED , // {p}? => +SYN_SEMPRED , // (...) => it's a manually-specified synpred converted to sempred +BACKTRACK_SEMPRED , // auto backtracking mode syn pred converted to sempred +FRAGMENT , TREE_BEGIN , ROOT , BANG , RANGE , REWRITE , ACTION_CONTENT } +DOC_COMMENT + : '/**' .*? ('*/' | EOF) -> channel (OFF_CHANNEL) + ; + +SL_COMMENT + : '//' ~ [\r\n]* -> channel (OFF_CHANNEL) + ; + +ML_COMMENT + : '/*' .*? '*/' -> channel (OFF_CHANNEL) + ; + +INT + : '0' .. '9'+ + ; + +CHAR_LITERAL + : '\'' LITERAL_CHAR '\'' + ; + +STRING_LITERAL + : '\'' LITERAL_CHAR LITERAL_CHAR* '\'' + ; + +fragment LITERAL_CHAR + : ESC + | ~ ('\'' | '\\') + ; + // This seems to be available in Antlr3. + +DOUBLE_QUOTE_STRING_LITERAL + : '"' (ESC | ~ ('\\' | '"'))* '"' + ; + // This seems to be available in Antlr3. + +DOUBLE_ANGLE_STRING_LITERAL + : '<<' .*? '>>' + ; + +fragment ESC + : '\\' ('n' | 'r' | 't' | 'b' | 'f' | '"' | '\'' | '\\' | '>' | 'u' XDIGIT XDIGIT XDIGIT XDIGIT | .) + ; + +fragment XDIGIT + : '0' .. '9' + | 'a' .. 'f' + | 'A' .. 'F' + ; + // ------------------------- + + // Arguments + + // + + // Certain argument lists, such as those specifying call parameters + + // to a rule invocation, or input parameters to a rule specification + + // are contained within square brackets. + +BEGIN_ARGUMENT + : LBrack + { this.handleBeginArgument(); } + ; + // ------------------------- + + // Actions + +BEGIN_ACTION + : LBrace -> pushMode (Actionx) + ; + // ------------------------- + + // Keywords + + // + + // Keywords may not be used as labels for rules or in any other context where + + // they would be ambiguous with the keyword vs some other identifier. OPTIONS, + + // TOKENS, & CHANNELS blocks are handled idiomatically in dedicated lexical modes. + +OPTIONS + : 'options' -> pushMode (Options) + ; + +TOKENS + : 'tokens' -> pushMode (Tokens) + ; + +CATCH + : 'catch' + ; + +FINALLY + : 'finally' + ; + +FRAGMENT + : 'fragment' + ; + +GRAMMAR + : 'grammar' + ; + +LEXER + : 'lexer' + ; + +PARSER + : 'parser' + ; + +PRIVATE + : 'private' + ; + +PROTECTED + : 'protected' + ; + +PUBLIC + : 'public' + ; + +RETURNS + : 'returns' + ; + +SCOPE + : 'scope' + ; + +THROWS + : 'throws' + ; + +TREE + : 'tree' + ; + +fragment WS_LOOP + : (WS | SL_COMMENT | ML_COMMENT)* + ; + //// ================================= + +AT + : At + ; + +BANG + : '!' + ; + +COLON + : Colon + ; + +COLONCOLON + : DColon + ; + +COMMA + : Comma + ; + +DOT + : Dot + ; + +EQUAL + : Equal + ; + +LBRACE + : LBrace + ; + +LBRACK + : LBrack + ; + +LPAREN + : LParen + ; + +OR + : Pipe + ; + +PLUS + : Plus + ; + +QM + : Question + ; + +RANGE + : '..' + ; + +RBRACE + : RBrace + ; + +RBRACK + : RBrack + ; + +REWRITE + : RArrow + ; + +ROOT + : '^' + ; + +RPAREN + : RParen + ; + +SEMI + : Semi + ; + +SEMPREDOP + : '=>' + ; + +STAR + : Star + ; + +TREE_BEGIN + : '^(' + ; + +DOLLAR + : Dollar + ; + +PEQ + : PlusAssign + ; + +NOT + : Tilde + ; + +WS + : (' ' | '\t' | '\r'? '\n')+ -> channel (OFF_CHANNEL) + ; + +TOKEN_REF + : 'A' .. 'Z' ('a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9')* + ; + +RULE_REF + : 'a' .. 'z' ('a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9')* + ; + // ====================================================== + + // Lexer fragments + + // + + // ----------------------------------- + + // Whitespace & Comments + +fragment Ws + : Hws + | Vws + ; + +fragment Hws + : [ \t] + ; + +fragment Vws + : [\r\n\f] + ; + +fragment BlockComment + : '/*' .*? ('*/' | EOF) + ; + +fragment DocComment + : '/**' .*? ('*/' | EOF) + ; + +fragment LineComment + : '//' ~ [\r\n]* + ; + // ----------------------------------- + + // Escapes + + // Any kind of escaped character that we can embed within ANTLR literal strings. + +fragment EscSeq + : Esc ([btnfr"'\\] | UnicodeEsc | . | EOF) + ; + +fragment EscAny + : Esc . + ; + +fragment UnicodeEsc + : 'u' (HexDigit (HexDigit (HexDigit HexDigit?)?)?)? + ; + // ----------------------------------- + + // Numerals + +fragment DecimalNumeral + : '0' + | [1-9] DecDigit* + ; + // ----------------------------------- + + // Digits + +fragment HexDigit + : [0-9a-fA-F] + ; + +fragment DecDigit + : [0-9] + ; + // ----------------------------------- + + // Literals + +fragment BoolLiteral + : 'true' + | 'false' + ; + +fragment CharLiteral + : SQuote (EscSeq | ~ ['\r\n\\]) SQuote + ; + +fragment SQuoteLiteral + : SQuote (EscSeq | ~ ['\r\n\\])* SQuote + ; + +fragment DQuoteLiteral + : DQuote (EscSeq | ~ ["\r\n\\])* DQuote + ; + +fragment USQuoteLiteral + : SQuote (EscSeq | ~ ['\r\n\\])* + ; + // ----------------------------------- + + // Character ranges + +fragment NameChar + : NameStartChar + | '0' .. '9' + | Underscore + | '\u00B7' + | '\u0300' .. '\u036F' + | '\u203F' .. '\u2040' + ; + +fragment NameStartChar + : 'A' .. 'Z' + | 'a' .. 'z' + | '\u00C0' .. '\u00D6' + | '\u00D8' .. '\u00F6' + | '\u00F8' .. '\u02FF' + | '\u0370' .. '\u037D' + | '\u037F' .. '\u1FFF' + | '\u200C' .. '\u200D' + | '\u2070' .. '\u218F' + | '\u2C00' .. '\u2FEF' + | '\u3001' .. '\uD7FF' + | '\uF900' .. '\uFDCF' + | '\uFDF0' .. '\uFFFD' + ; + // ignores | ['\u10000-'\uEFFFF] ; + + // ----------------------------------- + + // Types + +fragment Int + : 'int' + ; + // ----------------------------------- + + // Symbols + +fragment Esc + : '\\' + ; + +fragment Colon + : ':' + ; + +fragment DColon + : '::' + ; + +fragment SQuote + : '\'' + ; + +fragment DQuote + : '"' + ; + +fragment LParen + : '(' + ; + +fragment RParen + : ')' + ; + +fragment LBrace + : '{' + ; + +fragment RBrace + : '}' + ; + +fragment LBrack + : '[' + ; + +fragment RBrack + : ']' + ; + +fragment RArrow + : '->' + ; + +fragment Lt + : '<' + ; + +fragment Gt + : '>' + ; + +fragment Equal + : '=' + ; + +fragment Question + : '?' + ; + +fragment Star + : '*' + ; + +fragment Plus + : '+' + ; + +fragment PlusAssign + : '+=' + ; + +fragment Underscore + : '_' + ; + +fragment Pipe + : '|' + ; + +fragment Dollar + : '$' + ; + +fragment Comma + : ',' + ; + +fragment Semi + : ';' + ; + +fragment Dot + : '.' + ; + +fragment Range + : '..' + ; + +fragment At + : '@' + ; + +fragment Pound + : '#' + ; + +fragment Tilde + : '~' + ; + // ====================================================== + + // Lexer modes + + // ------------------------- + + // Arguments + mode Argument; // E.g., [int x, List a[]] -NESTED_ARGUMENT: LBrack -> type (ARGUMENT_CONTENT), pushMode (Argument); - -ARGUMENT_ESCAPE: EscAny -> type (ARGUMENT_CONTENT); - -ARGUMENT_STRING_LITERAL: DQuoteLiteral -> type (ARGUMENT_CONTENT); - -ARGUMENT_CHAR_LITERAL: SQuoteLiteral -> type (ARGUMENT_CONTENT); - -END_ARGUMENT: RBrack { this.handleEndArgument(); }; -// added this to return non-EOF token type here. EOF does something weird - -UNTERMINATED_ARGUMENT: EOF -> popMode; - -ARGUMENT_CONTENT: .; - -// ------------------------- -// Actions -// -// Many language targets use {} as block delimiters and so we -// must recursively match {} delimited blocks to balance the -// braces. Additionally, we must make some assumptions about -// literal string representation in the target language. We assume -// that they are delimited by ' or " and so consume these -// in their own alts so as not to inadvertantly match {}. +NESTED_ARGUMENT + : LBrack -> type (ARGUMENT_CONTENT) , pushMode (Argument) + ; + +ARGUMENT_ESCAPE + : EscAny -> type (ARGUMENT_CONTENT) + ; + +ARGUMENT_STRING_LITERAL + : DQuoteLiteral -> type (ARGUMENT_CONTENT) + ; + +ARGUMENT_CHAR_LITERAL + : SQuoteLiteral -> type (ARGUMENT_CONTENT) + ; + +END_ARGUMENT + : RBrack + { this.handleEndArgument(); } + ; + // added this to return non-EOF token type here. EOF does something weird + +UNTERMINATED_ARGUMENT + : EOF -> popMode + ; + +ARGUMENT_CONTENT + : . + ; + // ------------------------- + + // Actions + + // + + // Many language targets use {} as block delimiters and so we + + // must recursively match {} delimited blocks to balance the + + // braces. Additionally, we must make some assumptions about + + // literal string representation in the target language. We assume + + // that they are delimited by ' or " and so consume these + + // in their own alts so as not to inadvertantly match {}. + mode Actionx; -NESTED_ACTION: LBrace -> type (ACTION_CONTENT), pushMode (Actionx); - -ACTION_ESCAPE: EscAny -> type (ACTION_CONTENT); - -ACTION_STRING_LITERAL: DQuoteLiteral -> type (ACTION_CONTENT); - -ACTION_CHAR_LITERAL: SQuoteLiteral -> type (ACTION_CONTENT); - -ACTION_DOC_COMMENT: DocComment -> type (ACTION_CONTENT); - -ACTION_BLOCK_COMMENT: BlockComment -> type (ACTION_CONTENT); - -ACTION_LINE_COMMENT: LineComment -> type (ACTION_CONTENT); - -END_ACTION: RBrace { this.handleEndAction(); }; - -UNTERMINATED_ACTION: EOF -> popMode; - -ACTION_CONTENT: .; - -// ------------------------- - +NESTED_ACTION + : LBrace -> type (ACTION_CONTENT) , pushMode (Actionx) + ; + +ACTION_ESCAPE + : EscAny -> type (ACTION_CONTENT) + ; + +ACTION_STRING_LITERAL + : DQuoteLiteral -> type (ACTION_CONTENT) + ; + +ACTION_CHAR_LITERAL + : SQuoteLiteral -> type (ACTION_CONTENT) + ; + +ACTION_DOC_COMMENT + : DocComment -> type (ACTION_CONTENT) + ; + +ACTION_BLOCK_COMMENT + : BlockComment -> type (ACTION_CONTENT) + ; + +ACTION_LINE_COMMENT + : LineComment -> type (ACTION_CONTENT) + ; + +END_ACTION + : RBrace + { this.handleEndAction(); } + ; + +UNTERMINATED_ACTION + : EOF -> popMode + ; + +ACTION_CONTENT + : . + ; + // ------------------------- + mode Options; -OPT_DOC_COMMENT: DocComment -> type (DOC_COMMENT), channel (OFF_CHANNEL); - -OPT_BLOCK_COMMENT: BlockComment -> type (ML_COMMENT), channel (OFF_CHANNEL); - -OPT_LINE_COMMENT: LineComment -> type (SL_COMMENT), channel (OFF_CHANNEL); - -OPT_LBRACE: LBrace { this.handleOptionsLBrace(); }; - -OPT_RBRACE: RBrace -> type (RBRACE), popMode; - -OPT_ID: Id -> type (ID); - -OPT_DOT: Dot -> type (DOT); - -OPT_ASSIGN: Equal -> type (EQUAL); - -OPT_STRING_LITERAL: SQuoteLiteral -> type (STRING_LITERAL); - -OPT_INT: DecimalNumeral -> type (INT); - -OPT_STAR: Star -> type (STAR); - -OPT_SEMI: Semi -> type (SEMI); - -OPT_WS: Ws+ -> type (WS), channel (OFF_CHANNEL); - -// ------------------------- - +OPT_DOC_COMMENT + : DocComment -> type (DOC_COMMENT) , channel (OFF_CHANNEL) + ; + +OPT_BLOCK_COMMENT + : BlockComment -> type (ML_COMMENT) , channel (OFF_CHANNEL) + ; + +OPT_LINE_COMMENT + : LineComment -> type (SL_COMMENT) , channel (OFF_CHANNEL) + ; + +OPT_LBRACE + : LBrace + { this.handleOptionsLBrace(); } + ; + +OPT_RBRACE + : RBrace -> type (RBRACE) , popMode + ; + +OPT_ID + : Id -> type (ID) + ; + +OPT_DOT + : Dot -> type (DOT) + ; + +OPT_ASSIGN + : Equal -> type (EQUAL) + ; + +OPT_STRING_LITERAL + : SQuoteLiteral -> type (STRING_LITERAL) + ; + +OPT_INT + : DecimalNumeral -> type (INT) + ; + +OPT_STAR + : Star -> type (STAR) + ; + +OPT_SEMI + : Semi -> type (SEMI) + ; + +OPT_WS + : Ws+ -> type (WS) , channel (OFF_CHANNEL) + ; + // ------------------------- + mode Tokens; -TOK_DOC_COMMENT: DocComment -> type (DOC_COMMENT), channel (OFF_CHANNEL); - -TOK_BLOCK_COMMENT: BlockComment -> type (ML_COMMENT), channel (OFF_CHANNEL); - -TOK_LINE_COMMENT: LineComment -> type (SL_COMMENT), channel (OFF_CHANNEL); - -TOK_LBRACE: LBrace -> type (LBRACE); - -TOK_RBRACE: RBrace -> type (RBRACE), popMode; - -TOK_ID: Id -> type (TOKEN_REF); - -TOK_EQ: Equal -> type (EQUAL); - -TOK_CL: '\'' LITERAL_CHAR '\'' -> type(CHAR_LITERAL); - -TOK_SL: '\'' LITERAL_CHAR LITERAL_CHAR* '\'' -> type(STRING_LITERAL); - -TOK_SEMI: Semi -> type (SEMI); - -TOK_WS: Ws+ -> type (WS), channel (OFF_CHANNEL); - -// ------------------------- - +TOK_DOC_COMMENT + : DocComment -> type (DOC_COMMENT) , channel (OFF_CHANNEL) + ; + +TOK_BLOCK_COMMENT + : BlockComment -> type (ML_COMMENT) , channel (OFF_CHANNEL) + ; + +TOK_LINE_COMMENT + : LineComment -> type (SL_COMMENT) , channel (OFF_CHANNEL) + ; + +TOK_LBRACE + : LBrace -> type (LBRACE) + ; + +TOK_RBRACE + : RBrace -> type (RBRACE) , popMode + ; + +TOK_ID + : Id -> type (TOKEN_REF) + ; + +TOK_EQ + : Equal -> type (EQUAL) + ; + +TOK_CL + : '\'' LITERAL_CHAR '\'' -> type (CHAR_LITERAL) + ; + +TOK_SL + : '\'' LITERAL_CHAR LITERAL_CHAR* '\'' -> type (STRING_LITERAL) + ; + +TOK_SEMI + : Semi -> type (SEMI) + ; + +TOK_WS + : Ws+ -> type (WS) , channel (OFF_CHANNEL) + ; + // ------------------------- + mode LexerCharSet; -LEXER_CHAR_SET_BODY: (~ [\]\\] | EscAny)+ -> more; - -LEXER_CHAR_SET: RBrack -> popMode; - -UNTERMINATED_CHAR_SET: EOF -> popMode; - -// ------------------------------------------------------------------------------ -// Grammar specific Keywords, Punctuation, etc. +LEXER_CHAR_SET_BODY + : (~ [\]\\] | EscAny)+ -> more + ; + +LEXER_CHAR_SET + : RBrack -> popMode + ; + +UNTERMINATED_CHAR_SET + : EOF -> popMode + ; + // ------------------------------------------------------------------------------ + + // Grammar specific Keywords, Punctuation, etc. + +fragment Id + : NameStartChar NameChar* + ; -fragment Id: NameStartChar NameChar*; \ No newline at end of file diff --git a/html/HTMLLexer.g4 b/html/HTMLLexer.g4 index 99fde39b3a..83a6a3a8a9 100644 --- a/html/HTMLLexer.g4 +++ b/html/HTMLLexer.g4 @@ -27,108 +27,183 @@ */ // $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false -// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine -// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true - -lexer grammar HTMLLexer; - -HTML_COMMENT: ''; - -HTML_CONDITIONAL_COMMENT: ''; - -XML: ''; - -CDATA: ''; - -DTD: ''; -SCRIPTLET: '' | '<%' .*? '%>'; -SEA_WS: (' ' | '\t' | '\r'? '\n')+; - -SCRIPT_OPEN: '' -> pushMode(SCRIPT); - -STYLE_OPEN: '' -> pushMode(STYLE); - -TAG_OPEN: '<' -> pushMode(TAG); - -HTML_TEXT: ~'<'+; - -// tag declarations +// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine -mode TAG; -TAG_CLOSE: '>' -> popMode; +// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true -TAG_SLASH_CLOSE: '/>' -> popMode; +lexer grammar HTMLLexer; -TAG_SLASH: '/'; +HTML_COMMENT + : '' + ; -// lexing mode for attribute values +HTML_CONDITIONAL_COMMENT + : '' + ; -TAG_EQUALS: '=' -> pushMode(ATTVALUE); +XML + : '' + ; -TAG_NAME: TAG_NameStartChar TAG_NameChar*; +CDATA + : '' + ; -TAG_WHITESPACE: [ \t\r\n] -> channel(HIDDEN); +DTD + : '' + ; -fragment HEXDIGIT: [a-fA-F0-9]; +SCRIPTLET + : '' + | '<%' .*? '%>' + ; -fragment DIGIT: [0-9]; +SEA_WS + : (' ' | '\t' | '\r'? '\n')+ + ; -fragment TAG_NameChar: - TAG_NameStartChar - | '-' - | '_' - | '.' - | DIGIT - | '\u00B7' - | '\u0300' ..'\u036F' - | '\u203F' ..'\u2040' -; +SCRIPT_OPEN + : '' -> pushMode (SCRIPT) + ; -fragment TAG_NameStartChar: - [:a-zA-Z] - | '\u2070' ..'\u218F' - | '\u2C00' ..'\u2FEF' - | '\u3001' ..'\uD7FF' - | '\uF900' ..'\uFDCF' - | '\uFDF0' ..'\uFFFD' -; +STYLE_OPEN + : '' -> pushMode (STYLE) + ; -// +TAG_OPEN + : '<' -> pushMode (TAG) + ; +HTML_TEXT + : ~ '<'+ + ; + // tag declarations + +mode TAG; +TAG_CLOSE + : '>' -> popMode + ; + +TAG_SLASH_CLOSE + : '/>' -> popMode + ; + +TAG_SLASH + : '/' + ; + // lexing mode for attribute values + +TAG_EQUALS + : '=' -> pushMode (ATTVALUE) + ; + +TAG_NAME + : TAG_NameStartChar TAG_NameChar* + ; + +TAG_WHITESPACE + : [ \t\r\n] -> channel (HIDDEN) + ; + +fragment HEXDIGIT + : [a-fA-F0-9] + ; + +fragment DIGIT + : [0-9] + ; + +fragment TAG_NameChar + : TAG_NameStartChar + | '-' + | '_' + | '.' + | DIGIT + | '\u00B7' + | '\u0300' .. '\u036F' + | '\u203F' .. '\u2040' + ; + +fragment TAG_NameStartChar + : [:a-zA-Z] + | '\u2070' .. '\u218F' + | '\u2C00' .. '\u2FEF' + | '\u3001' .. '\uD7FF' + | '\uF900' .. '\uFDCF' + | '\uFDF0' .. '\uFFFD' + ; + // + mode SCRIPT; - -SCRIPT_BODY: .*? '' -> popMode; - -SCRIPT_SHORT_BODY: .*? '' -> popMode; - -// - +SCRIPT_BODY + : .*? '' -> popMode + ; + +SCRIPT_SHORT_BODY + : .*? '' -> popMode + ; + // + mode STYLE; - -STYLE_BODY: .*? '' -> popMode; - -STYLE_SHORT_BODY: .*? '' -> popMode; - -// attribute values - +STYLE_BODY + : .*? '' -> popMode + ; + +STYLE_SHORT_BODY + : .*? '' -> popMode + ; + // attribute values + mode ATTVALUE; - // an attribute value may have spaces b/t the '=' and the value -ATTVALUE_VALUE: ' '* ATTRIBUTE -> popMode; - -ATTRIBUTE: DOUBLE_QUOTE_STRING | SINGLE_QUOTE_STRING | ATTCHARS | HEXCHARS | DECCHARS; - -fragment ATTCHARS: ATTCHAR+ ' '?; - -fragment ATTCHAR: '-' | '_' | '.' | '/' | '+' | ',' | '?' | '=' | ':' | ';' | '#' | [0-9a-zA-Z]; - -fragment HEXCHARS: '#' [0-9a-fA-F]+; - -fragment DECCHARS: [0-9]+ '%'?; -fragment DOUBLE_QUOTE_STRING: '"' ~[<"]* '"'; +ATTVALUE_VALUE + : ' '* ATTRIBUTE -> popMode + ; + +ATTRIBUTE + : DOUBLE_QUOTE_STRING + | SINGLE_QUOTE_STRING + | ATTCHARS + | HEXCHARS + | DECCHARS + ; + +fragment ATTCHARS + : ATTCHAR+ ' '? + ; + +fragment ATTCHAR + : '-' + | '_' + | '.' + | '/' + | '+' + | ',' + | '?' + | '=' + | ':' + | ';' + | '#' + | [0-9a-zA-Z] + ; + +fragment HEXCHARS + : '#' [0-9a-fA-F]+ + ; + +fragment DECCHARS + : [0-9]+ '%'? + ; + +fragment DOUBLE_QUOTE_STRING + : '"' ~ [<"]* '"' + ; + +fragment SINGLE_QUOTE_STRING + : '\'' ~ [<']* '\'' + ; -fragment SINGLE_QUOTE_STRING: '\'' ~[<']* '\''; \ No newline at end of file diff --git a/html/desc.xml b/html/desc.xml index c76647fab4..e7da74e727 100644 --- a/html/desc.xml +++ b/html/desc.xml @@ -1,4 +1,3 @@ - - - + + \ No newline at end of file diff --git a/python/python3_12_1/PythonLexer.g4 b/python/python3_12_1/PythonLexer.g4 index e3a5ed3ecb..f7dbb67dfc 100644 --- a/python/python3_12_1/PythonLexer.g4 +++ b/python/python3_12_1/PythonLexer.g4 @@ -20,1350 +20,1809 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - /* +/* * Project : an ANTLR4 lexer grammar for Python 3 * https://github.com/RobEin/ANTLR4-parser-for-Python-3.12 * Developed by : Robert Einhorn, robert.einhorn.hu@gmail.com */ - lexer grammar PythonLexer; -options { superClass=PythonLexerBase; } -tokens { - INDENT, DEDENT // https://docs.python.org/3.12/reference/lexical_analysis.html#indentation - , FSTRING_START, FSTRING_MIDDLE, FSTRING_END // https://peps.python.org/pep-0701/#specification +options { superClass = PythonLexerBase; } +tokens { INDENT , DEDENT // https://docs.python.org/3.12/reference/lexical_analysis.html#indentation +, FSTRING_START , FSTRING_MIDDLE , FSTRING_END // https://peps.python.org/pep-0701/#specification } - - // https://docs.python.org/3.12/reference/lexical_analysis.html /* * default lexer mode */ + // https://docs.python.org/3.12/reference/lexical_analysis.html#keywords -FALSE : 'False'; -AWAIT : 'await'; -ELSE : 'else'; -IMPORT : 'import'; -PASS : 'pass'; -NONE : 'None'; -BREAK : 'break'; -EXCEPT : 'except'; -IN : 'in'; -RAISE : 'raise'; -TRUE : 'True'; -CLASS : 'class'; -FINALLY : 'finally'; -IS : 'is'; -RETURN : 'return'; -AND : 'and'; -CONTINUE : 'continue'; -FOR : 'for'; -LAMBDA : 'lambda'; -TRY : 'try'; -AS : 'as'; -DEF : 'def'; -FROM : 'from'; -NONLOCAL : 'nonlocal'; -WHILE : 'while'; -ASSERT : 'assert'; -DEL : 'del'; -GLOBAL : 'global'; -NOT : 'not'; -WITH : 'with'; -ASYNC : 'async'; -ELIF : 'elif'; -IF : 'if'; -OR : 'or'; -YIELD : 'yield'; - -// https://docs.python.org/3.12/library/token.html#module-token -LPAR : '('; // OPEN_PAREN -LSQB : '['; // OPEN_BRACK -LBRACE : '{'; // OPEN_BRACE -RPAR : ')'; // CLOSE_PAREN -RSQB : ']'; // CLOSE_BRACK -RBRACE : '}'; // CLOSE_BRACE -DOT : '.'; -COLON : ':'; -COMMA : ','; -SEMI : ';'; -PLUS : '+'; -MINUS : '-'; -STAR : '*'; -SLASH : '/'; -VBAR : '|'; -AMPER : '&'; -LESS : '<'; -GREATER : '>'; -EQUAL : '='; -PERCENT : '%'; -EQEQUAL : '=='; -NOTEQUAL : '!='; -LESSEQUAL : '<='; -GREATEREQUAL : '>='; -TILDE : '~'; -CIRCUMFLEX : '^'; -LEFTSHIFT : '<<'; -RIGHTSHIFT : '>>'; -DOUBLESTAR : '**'; -PLUSEQUAL : '+='; -MINEQUAL : '-='; -STAREQUAL : '*='; -SLASHEQUAL : '/='; -PERCENTEQUAL : '%='; -AMPEREQUAL : '&='; -VBAREQUAL : '|='; -CIRCUMFLEXEQUAL : '^='; -LEFTSHIFTEQUAL : '<<='; -RIGHTSHIFTEQUAL : '>>='; -DOUBLESTAREQUAL : '**='; -DOUBLESLASH : '//'; -DOUBLESLASHEQUAL : '//='; -AT : '@'; -ATEQUAL : '@='; -RARROW : '->'; -ELLIPSIS : '...'; -COLONEQUAL : ':='; -EXCLAMATION : '!'; - -// https://docs.python.org/3.12/reference/lexical_analysis.html#identifiers -NAME - : ID_START ID_CONTINUE* - ; -// https://docs.python.org/3.12/reference/lexical_analysis.html#numeric-literals -NUMBER - : INTEGER - | FLOAT_NUMBER - | IMAG_NUMBER - ; +FALSE + : 'False' + ; -// https://docs.python.org/3.12/reference/lexical_analysis.html#string-and-bytes-literals -STRING - : STRING_LITERAL - | BYTES_LITERAL - ; +AWAIT + : 'await' + ; -// https://peps.python.org/pep-0484/#type-comments -TYPE_COMMENT - : '#' WS? 'type:' ~[\r\n]* - ; +ELSE + : 'else' + ; -// https://docs.python.org/3.12/reference/lexical_analysis.html#physical-lines -NEWLINE - : OS_INDEPENDENT_NL - ; +IMPORT + : 'import' + ; + +PASS + : 'pass' + ; + +NONE + : 'None' + ; + +BREAK + : 'break' + ; + +EXCEPT + : 'except' + ; + +IN + : 'in' + ; + +RAISE + : 'raise' + ; + +TRUE + : 'True' + ; + +CLASS + : 'class' + ; + +FINALLY + : 'finally' + ; + +IS + : 'is' + ; + +RETURN + : 'return' + ; + +AND + : 'and' + ; + +CONTINUE + : 'continue' + ; + +FOR + : 'for' + ; + +LAMBDA + : 'lambda' + ; + +TRY + : 'try' + ; + +AS + : 'as' + ; + +DEF + : 'def' + ; + +FROM + : 'from' + ; + +NONLOCAL + : 'nonlocal' + ; + +WHILE + : 'while' + ; + +ASSERT + : 'assert' + ; + +DEL + : 'del' + ; + +GLOBAL + : 'global' + ; + +NOT + : 'not' + ; + +WITH + : 'with' + ; + +ASYNC + : 'async' + ; + +ELIF + : 'elif' + ; + +IF + : 'if' + ; + +OR + : 'or' + ; + +YIELD + : 'yield' + ; + // https://docs.python.org/3.12/library/token.html#module-token + +LPAR + : '(' + ; // OPEN_PAREN + +LSQB + : '[' + ; // OPEN_BRACK + +LBRACE + : '{' + ; // OPEN_BRACE + +RPAR + : ')' + ; // CLOSE_PAREN + +RSQB + : ']' + ; // CLOSE_BRACK + +RBRACE + : '}' + ; // CLOSE_BRACE + +DOT + : '.' + ; + +COLON + : ':' + ; + +COMMA + : ',' + ; + +SEMI + : ';' + ; + +PLUS + : '+' + ; + +MINUS + : '-' + ; + +STAR + : '*' + ; + +SLASH + : '/' + ; + +VBAR + : '|' + ; + +AMPER + : '&' + ; + +LESS + : '<' + ; + +GREATER + : '>' + ; + +EQUAL + : '=' + ; + +PERCENT + : '%' + ; + +EQEQUAL + : '==' + ; + +NOTEQUAL + : '!=' + ; + +LESSEQUAL + : '<=' + ; + +GREATEREQUAL + : '>=' + ; + +TILDE + : '~' + ; -// https://docs.python.org/3.12/reference/lexical_analysis.html#comments -COMMENT : '#' ~[\r\n]* -> channel(HIDDEN); +CIRCUMFLEX + : '^' + ; -// https://docs.python.org/3.12/reference/lexical_analysis.html#whitespace-between-tokens -WS : [ \t\f]+ -> channel(HIDDEN); +LEFTSHIFT + : '<<' + ; -// https://docs.python.org/3.12/reference/lexical_analysis.html#explicit-line-joining -EXPLICIT_LINE_JOINING : '\\' NEWLINE -> channel(HIDDEN); +RIGHTSHIFT + : '>>' + ; -// https://docs.python.org/3.12/reference/lexical_analysis.html#formatted-string-literals -SINGLE_QUOTE_FSTRING_START : F_STRING_PREFIX ['] -> type(FSTRING_START), pushMode(SINGLE_QUOTE_FSTRING_MODE); -DOUBLE_QUOTE_FSTRING_START : F_STRING_PREFIX ["] -> type(FSTRING_START), pushMode(DOUBLE_QUOTE_FSTRING_MODE); -LONG_SINGLE_QUOTE_FSTRING_START : F_STRING_PREFIX ['][']['] -> type(FSTRING_START), pushMode(LONG_SINGLE_QUOTE_FSTRING_MODE); -LONG_DOUBLE_QUOTE_FSTRING_START : F_STRING_PREFIX ["]["]["] -> type(FSTRING_START), pushMode(LONG_DOUBLE_QUOTE_FSTRING_MODE); +DOUBLESTAR + : '**' + ; -ERROR_TOKEN : . ; // catch the unrecognized characters and redirect these errors to the parser +PLUSEQUAL + : '+=' + ; +MINEQUAL + : '-=' + ; +STAREQUAL + : '*=' + ; + +SLASHEQUAL + : '/=' + ; + +PERCENTEQUAL + : '%=' + ; + +AMPEREQUAL + : '&=' + ; + +VBAREQUAL + : '|=' + ; + +CIRCUMFLEXEQUAL + : '^=' + ; + +LEFTSHIFTEQUAL + : '<<=' + ; + +RIGHTSHIFTEQUAL + : '>>=' + ; + +DOUBLESTAREQUAL + : '**=' + ; + +DOUBLESLASH + : '//' + ; + +DOUBLESLASHEQUAL + : '//=' + ; + +AT + : '@' + ; + +ATEQUAL + : '@=' + ; + +RARROW + : '->' + ; + +ELLIPSIS + : '...' + ; + +COLONEQUAL + : ':=' + ; + +EXCLAMATION + : '!' + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#identifiers + +NAME + : ID_START ID_CONTINUE* + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#numeric-literals + +NUMBER + : INTEGER + | FLOAT_NUMBER + | IMAG_NUMBER + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#string-and-bytes-literals + +STRING + : STRING_LITERAL + | BYTES_LITERAL + ; + // https://peps.python.org/pep-0484/#type-comments + +TYPE_COMMENT + : '#' WS? 'type:' ~ [\r\n]* + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#physical-lines + +NEWLINE + : OS_INDEPENDENT_NL + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#comments + +COMMENT + : '#' ~ [\r\n]* -> channel (HIDDEN) + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#whitespace-between-tokens + +WS + : [ \t\f]+ -> channel (HIDDEN) + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#explicit-line-joining + +EXPLICIT_LINE_JOINING + : '\\' NEWLINE -> channel (HIDDEN) + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#formatted-string-literals + +SINGLE_QUOTE_FSTRING_START + : F_STRING_PREFIX ['] -> type (FSTRING_START) , pushMode (SINGLE_QUOTE_FSTRING_MODE) + ; + +DOUBLE_QUOTE_FSTRING_START + : F_STRING_PREFIX ["] -> type (FSTRING_START) , pushMode (DOUBLE_QUOTE_FSTRING_MODE) + ; + +LONG_SINGLE_QUOTE_FSTRING_START + : F_STRING_PREFIX ['] ['] ['] -> type (FSTRING_START) , pushMode (LONG_SINGLE_QUOTE_FSTRING_MODE) + ; + +LONG_DOUBLE_QUOTE_FSTRING_START + : F_STRING_PREFIX ["] ["] ["] -> type (FSTRING_START) , pushMode (LONG_DOUBLE_QUOTE_FSTRING_MODE) + ; + +ERROR_TOKEN + : . + ; // catch the unrecognized characters and redirect these errors to the parser + /* * other lexer modes */ - + + mode SINGLE_QUOTE_FSTRING_MODE; - SINGLE_QUOTE_FSTRING_END : ['] -> type(FSTRING_END), popMode; - SINGLE_QUOTE_FSTRING_MIDDLE : SINGLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); - SINGLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE +SINGLE_QUOTE_FSTRING_END + : ['] -> type (FSTRING_END) , popMode + ; +SINGLE_QUOTE_FSTRING_MIDDLE + : SINGLE_QUOTE_FSTRING_LITERAL -> type (FSTRING_MIDDLE) + ; + +SINGLE_QUOTE_FSTRING_LBRACE + : '{' -> type (LBRACE) + ; // will be closed in DEFAULT_MODE or SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE + mode DOUBLE_QUOTE_FSTRING_MODE; - DOUBLE_QUOTE_FSTRING_END : ["] -> type(FSTRING_END), popMode; - DOUBLE_QUOTE_FSTRING_MIDDLE : DOUBLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); - DOUBLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE +DOUBLE_QUOTE_FSTRING_END + : ["] -> type (FSTRING_END) , popMode + ; + +DOUBLE_QUOTE_FSTRING_MIDDLE + : DOUBLE_QUOTE_FSTRING_LITERAL -> type (FSTRING_MIDDLE) + ; +DOUBLE_QUOTE_FSTRING_LBRACE + : '{' -> type (LBRACE) + ; // will be closed in DEFAULT_MODE or DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE + mode LONG_SINGLE_QUOTE_FSTRING_MODE; - LONG_SINGLE_QUOTE_FSTRING_END : ['][']['] -> type(FSTRING_END), popMode; - LONG_SINGLE_QUOTE_FSTRING_MIDDLE : SINGLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); - LONG_SINGLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE +LONG_SINGLE_QUOTE_FSTRING_END + : ['] ['] ['] -> type (FSTRING_END) , popMode + ; + +LONG_SINGLE_QUOTE_FSTRING_MIDDLE + : SINGLE_QUOTE_FSTRING_LITERAL -> type (FSTRING_MIDDLE) + ; +LONG_SINGLE_QUOTE_FSTRING_LBRACE + : '{' -> type (LBRACE) + ; // will be closed in DEFAULT_MODE or SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE + mode LONG_DOUBLE_QUOTE_FSTRING_MODE; - LONG_DOUBLE_QUOTE_FSTRING_END : ["]["]["] -> type(FSTRING_END), popMode; - LONG_DOUBLE_QUOTE_FSTRING_MIDDLE : DOUBLE_QUOTE_FSTRING_LITERAL -> type(FSTRING_MIDDLE); - LONG_DOUBLE_QUOTE_FSTRING_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE or DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE +LONG_DOUBLE_QUOTE_FSTRING_END + : ["] ["] ["] -> type (FSTRING_END) , popMode + ; +LONG_DOUBLE_QUOTE_FSTRING_MIDDLE + : DOUBLE_QUOTE_FSTRING_LITERAL -> type (FSTRING_MIDDLE) + ; + +LONG_DOUBLE_QUOTE_FSTRING_LBRACE + : '{' -> type (LBRACE) + ; // will be closed in DEFAULT_MODE or DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE + mode SINGLE_QUOTE_FORMAT_SPECIFICATION_MODE; // only used after a format specifier colon - SINGLE_QUOTE_FORMAT_SPECIFICATION_FSTRING_MIDDLE : FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE+ -> type(FSTRING_MIDDLE); - SINGLE_QUOTE_FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class - SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to ..._QUOTE_FSTRING_MODE by PythonLexerBase class +SINGLE_QUOTE_FORMAT_SPECIFICATION_FSTRING_MIDDLE + : FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE+ -> type (FSTRING_MIDDLE) + ; + +SINGLE_QUOTE_FORMAT_SPECIFICATION_LBRACE + : '{' -> type (LBRACE) + ; // will be closed in DEFAULT_MODE by PythonLexerBase class + +SINGLE_QUOTE_FORMAT_SPECIFICATION_RBRACE + : '}' -> type (RBRACE) + ; // popMode to ..._QUOTE_FSTRING_MODE by PythonLexerBase class + mode DOUBLE_QUOTE_FORMAT_SPECIFICATION_MODE; // only used after a format specifier colon - DOUBLE_QUOTE_FORMAT_SPECIFICATION_FSTRING_MIDDLE : FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE+ -> type(FSTRING_MIDDLE); - DOUBLE_QUOTE_FORMAT_SPECIFICATION_LBRACE : '{' -> type(LBRACE); // will be closed in DEFAULT_MODE by PythonLexerBase class - DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE : '}' -> type(RBRACE); // popMode to ..._QUOTE_FSTRING_MODE by PythonLexerBase class +DOUBLE_QUOTE_FORMAT_SPECIFICATION_FSTRING_MIDDLE + : FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE+ -> type (FSTRING_MIDDLE) + ; +DOUBLE_QUOTE_FORMAT_SPECIFICATION_LBRACE + : '{' -> type (LBRACE) + ; // will be closed in DEFAULT_MODE by PythonLexerBase class + +DOUBLE_QUOTE_FORMAT_SPECIFICATION_RBRACE + : '}' -> type (RBRACE) + ; // popMode to ..._QUOTE_FSTRING_MODE by PythonLexerBase class + /* * fragments */ + + + // https://docs.python.org/3.12/reference/lexical_analysis.html#literals + + // https://docs.python.org/3.12/reference/lexical_analysis.html#string-and-bytes-literals + +fragment STRING_LITERAL + : STRING_PREFIX? (SHORT_STRING | LONG_STRING) + ; -// https://docs.python.org/3.12/reference/lexical_analysis.html#literals - -// https://docs.python.org/3.12/reference/lexical_analysis.html#string-and-bytes-literals -fragment STRING_LITERAL : STRING_PREFIX? (SHORT_STRING | LONG_STRING); -fragment STRING_PREFIX : 'r' | 'u' | 'R' | 'U'; +fragment STRING_PREFIX + : 'r' + | 'u' + | 'R' + | 'U' + ; fragment SHORT_STRING - : '\'' SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* '\'' - | '"' SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* '"' - ; + : '\'' SHORT_STRING_ITEM_FOR_SINGLE_QUOTE* '\'' + | '"' SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE* '"' + ; fragment LONG_STRING - : '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' - | '"""' LONG_STRING_ITEM*? '"""' - ; - -fragment SHORT_STRING_ITEM_FOR_SINGLE_QUOTE : SHORT_STRING_CHAR_NO_SINGLE_QUOTE | STRING_ESCAPE_SEQ; -fragment SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE : SHORT_STRING_CHAR_NO_DOUBLE_QUOTE | STRING_ESCAPE_SEQ; + : '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' + | '"""' LONG_STRING_ITEM*? '"""' + ; -fragment LONG_STRING_ITEM : LONG_STRING_CHAR | STRING_ESCAPE_SEQ; +fragment SHORT_STRING_ITEM_FOR_SINGLE_QUOTE + : SHORT_STRING_CHAR_NO_SINGLE_QUOTE + | STRING_ESCAPE_SEQ + ; -fragment SHORT_STRING_CHAR_NO_SINGLE_QUOTE : ~[\\\r\n']; // -fragment SHORT_STRING_CHAR_NO_DOUBLE_QUOTE : ~[\\\r\n"]; // +fragment SHORT_STRING_ITEM_FOR_DOUBLE_QUOTE + : SHORT_STRING_CHAR_NO_DOUBLE_QUOTE + | STRING_ESCAPE_SEQ + ; -fragment LONG_STRING_CHAR : ~'\\'; // +fragment LONG_STRING_ITEM + : LONG_STRING_CHAR + | STRING_ESCAPE_SEQ + ; +fragment SHORT_STRING_CHAR_NO_SINGLE_QUOTE + : ~ [\\\r\n'] + ; // + +fragment SHORT_STRING_CHAR_NO_DOUBLE_QUOTE + : ~ [\\\r\n"] + ; // + +fragment LONG_STRING_CHAR + : ~ '\\' + ; // + fragment STRING_ESCAPE_SEQ - : '\\' OS_INDEPENDENT_NL // \ escape sequence - | '\\' . // "\" - ; // the \ (not \n) escape sequences will be removed from the string literals by the PythonLexerBase class + : '\\' OS_INDEPENDENT_NL // \ escape sequence + | '\\' . // "\" + + ; // the \ (not \n) escape sequences will be removed from the string literals by the PythonLexerBase class + +fragment BYTES_LITERAL + : BYTES_PREFIX (SHORT_BYTES | LONG_BYTES) + ; -fragment BYTES_LITERAL : BYTES_PREFIX (SHORT_BYTES | LONG_BYTES); -fragment BYTES_PREFIX : 'b' | 'B' | 'br' | 'Br' | 'bR' | 'BR' | 'rb' | 'rB' | 'Rb' | 'RB'; +fragment BYTES_PREFIX + : 'b' + | 'B' + | 'br' + | 'Br' + | 'bR' + | 'BR' + | 'rb' + | 'rB' + | 'Rb' + | 'RB' + ; fragment SHORT_BYTES - : '\'' SHORT_BYTES_ITEM_FOR_SINGLE_QUOTE* '\'' - | '"' SHORT_BYTES_ITEM_FOR_DOUBLE_QUOTE* '"' - ; + : '\'' SHORT_BYTES_ITEM_FOR_SINGLE_QUOTE* '\'' + | '"' SHORT_BYTES_ITEM_FOR_DOUBLE_QUOTE* '"' + ; fragment LONG_BYTES - : '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' - | '"""' LONG_BYTES_ITEM*? '"""' - ; - -fragment SHORT_BYTES_ITEM_FOR_SINGLE_QUOTE : SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ; -fragment SHORT_BYTES_ITEM_FOR_DOUBLE_QUOTE : SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ; - -fragment LONG_BYTES_ITEM : LONG_BYTES_CHAR | BYTES_ESCAPE_SEQ; - -fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE // - : [\u0000-\u0009] - | [\u000B-\u000C] - | [\u000E-\u0026] - | [\u0028-\u005B] - | [\u005D-\u007F] - ; - -fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE // - : [\u0000-\u0009] - | [\u000B-\u000C] - | [\u000E-\u0021] - | [\u0023-\u005B] - | [\u005D-\u007F] - ; - -fragment LONG_BYTES_CHAR : [\u0000-\u005B] | [\u005D-\u007F]; // -fragment BYTES_ESCAPE_SEQ : '\\' [\u0000-\u007F]; // "\" - -// https://docs.python.org/3.12/library/string.html#format-specification-mini-language -fragment SINGLE_QUOTE_FSTRING_LITERAL : (FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE | DOUBLE_BRACES)+; -fragment DOUBLE_QUOTE_FSTRING_LITERAL : (FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE | DOUBLE_BRACES)+; - -// https://docs.python.org/3.12/reference/lexical_analysis.html#formatted-string-literals -fragment F_STRING_PREFIX : 'f' | 'F' | 'fr' | 'Fr' | 'fR' | 'FR' | 'rf' | 'rF' | 'Rf' | 'RF'; -fragment FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE : ~[{}']; -fragment FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE : ~[{}"]; -fragment DOUBLE_BRACES : '{{' | '}}'; - -// https://docs.python.org/3.12/reference/lexical_analysis.html#integer-literals -fragment INTEGER : DEC_INTEGER | BIN_INTEGER | OCT_INTEGER | HEX_INTEGER; -fragment DEC_INTEGER : NON_ZERO_DIGIT ('_'? DIGIT)* | '0'+ ('_'? '0')*; -fragment BIN_INTEGER : '0' ('b' | 'B') ('_'? BIN_DIGIT)+; -fragment OCT_INTEGER : '0' ('o' | 'O') ('_'? OCT_DIGIT)+; -fragment HEX_INTEGER : '0' ('x' | 'X') ('_'? HEX_DIGIT)+; -fragment NON_ZERO_DIGIT : [1-9]; -fragment DIGIT : [0-9]; -fragment BIN_DIGIT : '0' | '1'; -fragment OCT_DIGIT : [0-7]; -fragment HEX_DIGIT : DIGIT | [a-f] | [A-F]; - -// https://docs.python.org/3.12/reference/lexical_analysis.html#floating-point-literals -fragment FLOAT_NUMBER : POINT_FLOAT | EXPONENT_FLOAT; -fragment POINT_FLOAT : DIGIT_PART? FRACTION | DIGIT_PART '.'; -fragment EXPONENT_FLOAT : (DIGIT_PART | POINT_FLOAT) EXPONENT; -fragment DIGIT_PART : DIGIT ('_'? DIGIT)*; -fragment FRACTION : '.' DIGIT_PART; -fragment EXPONENT : ('e' | 'E') ('+' | '-')? DIGIT_PART; - -// https://docs.python.org/3.12/reference/lexical_analysis.html#imaginary-literals -fragment IMAG_NUMBER : (FLOAT_NUMBER | DIGIT_PART) ('j' | 'J'); - -// https://docs.python.org/3.12/reference/lexical_analysis.html#physical-lines -fragment OS_INDEPENDENT_NL : '\r'? '\n'; // Unix, Windows - -// https://github.com/RobEin/ANTLR4-parser-for-Python-3.12/tree/main/valid_chars_in_py_identifiers -fragment ID_CONTINUE: - ID_START - | '\u{0030}' .. '\u{0039}' - | '\u{00B7}' - | '\u{0300}' .. '\u{036F}' - | '\u{0387}' - | '\u{0483}' .. '\u{0487}' - | '\u{0591}' .. '\u{05BD}' - | '\u{05BF}' - | '\u{05C1}' .. '\u{05C2}' - | '\u{05C4}' .. '\u{05C5}' - | '\u{05C7}' - | '\u{0610}' .. '\u{061A}' - | '\u{064B}' .. '\u{0669}' - | '\u{0670}' - | '\u{06D6}' .. '\u{06DC}' - | '\u{06DF}' .. '\u{06E4}' - | '\u{06E7}' .. '\u{06E8}' - | '\u{06EA}' .. '\u{06ED}' - | '\u{06F0}' .. '\u{06F9}' - | '\u{0711}' - | '\u{0730}' .. '\u{074A}' - | '\u{07A6}' .. '\u{07B0}' - | '\u{07C0}' .. '\u{07C9}' - | '\u{07EB}' .. '\u{07F3}' - | '\u{07FD}' - | '\u{0816}' .. '\u{0819}' - | '\u{081B}' .. '\u{0823}' - | '\u{0825}' .. '\u{0827}' - | '\u{0829}' .. '\u{082D}' - | '\u{0859}' .. '\u{085B}' - | '\u{0898}' .. '\u{089F}' - | '\u{08CA}' .. '\u{08E1}' - | '\u{08E3}' .. '\u{0903}' - | '\u{093A}' .. '\u{093C}' - | '\u{093E}' .. '\u{094F}' - | '\u{0951}' .. '\u{0957}' - | '\u{0962}' .. '\u{0963}' - | '\u{0966}' .. '\u{096F}' - | '\u{0981}' .. '\u{0983}' - | '\u{09BC}' - | '\u{09BE}' .. '\u{09C4}' - | '\u{09C7}' .. '\u{09C8}' - | '\u{09CB}' .. '\u{09CD}' - | '\u{09D7}' - | '\u{09E2}' .. '\u{09E3}' - | '\u{09E6}' .. '\u{09EF}' - | '\u{09FE}' - | '\u{0A01}' .. '\u{0A03}' - | '\u{0A3C}' - | '\u{0A3E}' .. '\u{0A42}' - | '\u{0A47}' .. '\u{0A48}' - | '\u{0A4B}' .. '\u{0A4D}' - | '\u{0A51}' - | '\u{0A66}' .. '\u{0A71}' - | '\u{0A75}' - | '\u{0A81}' .. '\u{0A83}' - | '\u{0ABC}' - | '\u{0ABE}' .. '\u{0AC5}' - | '\u{0AC7}' .. '\u{0AC9}' - | '\u{0ACB}' .. '\u{0ACD}' - | '\u{0AE2}' .. '\u{0AE3}' - | '\u{0AE6}' .. '\u{0AEF}' - | '\u{0AFA}' .. '\u{0AFF}' - | '\u{0B01}' .. '\u{0B03}' - | '\u{0B3C}' - | '\u{0B3E}' .. '\u{0B44}' - | '\u{0B47}' .. '\u{0B48}' - | '\u{0B4B}' .. '\u{0B4D}' - | '\u{0B55}' .. '\u{0B57}' - | '\u{0B62}' .. '\u{0B63}' - | '\u{0B66}' .. '\u{0B6F}' - | '\u{0B82}' - | '\u{0BBE}' .. '\u{0BC2}' - | '\u{0BC6}' .. '\u{0BC8}' - | '\u{0BCA}' .. '\u{0BCD}' - | '\u{0BD7}' - | '\u{0BE6}' .. '\u{0BEF}' - | '\u{0C00}' .. '\u{0C04}' - | '\u{0C3C}' - | '\u{0C3E}' .. '\u{0C44}' - | '\u{0C46}' .. '\u{0C48}' - | '\u{0C4A}' .. '\u{0C4D}' - | '\u{0C55}' .. '\u{0C56}' - | '\u{0C62}' .. '\u{0C63}' - | '\u{0C66}' .. '\u{0C6F}' - | '\u{0C81}' .. '\u{0C83}' - | '\u{0CBC}' - | '\u{0CBE}' .. '\u{0CC4}' - | '\u{0CC6}' .. '\u{0CC8}' - | '\u{0CCA}' .. '\u{0CCD}' - | '\u{0CD5}' .. '\u{0CD6}' - | '\u{0CE2}' .. '\u{0CE3}' - | '\u{0CE6}' .. '\u{0CEF}' - | '\u{0CF3}' - | '\u{0D00}' .. '\u{0D03}' - | '\u{0D3B}' .. '\u{0D3C}' - | '\u{0D3E}' .. '\u{0D44}' - | '\u{0D46}' .. '\u{0D48}' - | '\u{0D4A}' .. '\u{0D4D}' - | '\u{0D57}' - | '\u{0D62}' .. '\u{0D63}' - | '\u{0D66}' .. '\u{0D6F}' - | '\u{0D81}' .. '\u{0D83}' - | '\u{0DCA}' - | '\u{0DCF}' .. '\u{0DD4}' - | '\u{0DD6}' - | '\u{0DD8}' .. '\u{0DDF}' - | '\u{0DE6}' .. '\u{0DEF}' - | '\u{0DF2}' .. '\u{0DF3}' - | '\u{0E31}' - | '\u{0E33}' .. '\u{0E3A}' - | '\u{0E47}' .. '\u{0E4E}' - | '\u{0E50}' .. '\u{0E59}' - | '\u{0EB1}' - | '\u{0EB3}' .. '\u{0EBC}' - | '\u{0EC8}' .. '\u{0ECE}' - | '\u{0ED0}' .. '\u{0ED9}' - | '\u{0F18}' .. '\u{0F19}' - | '\u{0F20}' .. '\u{0F29}' - | '\u{0F35}' - | '\u{0F37}' - | '\u{0F39}' - | '\u{0F3E}' .. '\u{0F3F}' - | '\u{0F71}' .. '\u{0F84}' - | '\u{0F86}' .. '\u{0F87}' - | '\u{0F8D}' .. '\u{0F97}' - | '\u{0F99}' .. '\u{0FBC}' - | '\u{0FC6}' - | '\u{102B}' .. '\u{103E}' - | '\u{1040}' .. '\u{1049}' - | '\u{1056}' .. '\u{1059}' - | '\u{105E}' .. '\u{1060}' - | '\u{1062}' .. '\u{1064}' - | '\u{1067}' .. '\u{106D}' - | '\u{1071}' .. '\u{1074}' - | '\u{1082}' .. '\u{108D}' - | '\u{108F}' .. '\u{109D}' - | '\u{135D}' .. '\u{135F}' - | '\u{1369}' .. '\u{1371}' - | '\u{1712}' .. '\u{1715}' - | '\u{1732}' .. '\u{1734}' - | '\u{1752}' .. '\u{1753}' - | '\u{1772}' .. '\u{1773}' - | '\u{17B4}' .. '\u{17D3}' - | '\u{17DD}' - | '\u{17E0}' .. '\u{17E9}' - | '\u{180B}' .. '\u{180D}' - | '\u{180F}' .. '\u{1819}' - | '\u{18A9}' - | '\u{1920}' .. '\u{192B}' - | '\u{1930}' .. '\u{193B}' - | '\u{1946}' .. '\u{194F}' - | '\u{19D0}' .. '\u{19DA}' - | '\u{1A17}' .. '\u{1A1B}' - | '\u{1A55}' .. '\u{1A5E}' - | '\u{1A60}' .. '\u{1A7C}' - | '\u{1A7F}' .. '\u{1A89}' - | '\u{1A90}' .. '\u{1A99}' - | '\u{1AB0}' .. '\u{1ABD}' - | '\u{1ABF}' .. '\u{1ACE}' - | '\u{1B00}' .. '\u{1B04}' - | '\u{1B34}' .. '\u{1B44}' - | '\u{1B50}' .. '\u{1B59}' - | '\u{1B6B}' .. '\u{1B73}' - | '\u{1B80}' .. '\u{1B82}' - | '\u{1BA1}' .. '\u{1BAD}' - | '\u{1BB0}' .. '\u{1BB9}' - | '\u{1BE6}' .. '\u{1BF3}' - | '\u{1C24}' .. '\u{1C37}' - | '\u{1C40}' .. '\u{1C49}' - | '\u{1C50}' .. '\u{1C59}' - | '\u{1CD0}' .. '\u{1CD2}' - | '\u{1CD4}' .. '\u{1CE8}' - | '\u{1CED}' - | '\u{1CF4}' - | '\u{1CF7}' .. '\u{1CF9}' - | '\u{1DC0}' .. '\u{1DFF}' - | '\u{203F}' .. '\u{2040}' - | '\u{2054}' - | '\u{20D0}' .. '\u{20DC}' - | '\u{20E1}' - | '\u{20E5}' .. '\u{20F0}' - | '\u{2CEF}' .. '\u{2CF1}' - | '\u{2D7F}' - | '\u{2DE0}' .. '\u{2DFF}' - | '\u{302A}' .. '\u{302F}' - | '\u{3099}' .. '\u{309A}' - | '\u{A620}' .. '\u{A629}' - | '\u{A66F}' - | '\u{A674}' .. '\u{A67D}' - | '\u{A69E}' .. '\u{A69F}' - | '\u{A6F0}' .. '\u{A6F1}' - | '\u{A802}' - | '\u{A806}' - | '\u{A80B}' - | '\u{A823}' .. '\u{A827}' - | '\u{A82C}' - | '\u{A880}' .. '\u{A881}' - | '\u{A8B4}' .. '\u{A8C5}' - | '\u{A8D0}' .. '\u{A8D9}' - | '\u{A8E0}' .. '\u{A8F1}' - | '\u{A8FF}' .. '\u{A909}' - | '\u{A926}' .. '\u{A92D}' - | '\u{A947}' .. '\u{A953}' - | '\u{A980}' .. '\u{A983}' - | '\u{A9B3}' .. '\u{A9C0}' - | '\u{A9D0}' .. '\u{A9D9}' - | '\u{A9E5}' - | '\u{A9F0}' .. '\u{A9F9}' - | '\u{AA29}' .. '\u{AA36}' - | '\u{AA43}' - | '\u{AA4C}' .. '\u{AA4D}' - | '\u{AA50}' .. '\u{AA59}' - | '\u{AA7B}' .. '\u{AA7D}' - | '\u{AAB0}' - | '\u{AAB2}' .. '\u{AAB4}' - | '\u{AAB7}' .. '\u{AAB8}' - | '\u{AABE}' .. '\u{AABF}' - | '\u{AAC1}' - | '\u{AAEB}' .. '\u{AAEF}' - | '\u{AAF5}' .. '\u{AAF6}' - | '\u{ABE3}' .. '\u{ABEA}' - | '\u{ABEC}' .. '\u{ABED}' - | '\u{ABF0}' .. '\u{ABF9}' - | '\u{FB1E}' - | '\u{FE00}' .. '\u{FE0F}' - | '\u{FE20}' .. '\u{FE2F}' - | '\u{FE33}' .. '\u{FE34}' - | '\u{FE4D}' .. '\u{FE4F}' - | '\u{FF10}' .. '\u{FF19}' - | '\u{FF3F}' - | '\u{FF9E}' .. '\u{FF9F}' - | '\u{101FD}' - | '\u{102E0}' - | '\u{10376}' .. '\u{1037A}' - | '\u{104A0}' .. '\u{104A9}' - | '\u{10A01}' .. '\u{10A03}' - | '\u{10A05}' .. '\u{10A06}' - | '\u{10A0C}' .. '\u{10A0F}' - | '\u{10A38}' .. '\u{10A3A}' - | '\u{10A3F}' - | '\u{10AE5}' .. '\u{10AE6}' - | '\u{10D24}' .. '\u{10D27}' - | '\u{10D30}' .. '\u{10D39}' - | '\u{10EAB}' .. '\u{10EAC}' - | '\u{10EFD}' .. '\u{10EFF}' - | '\u{10F46}' .. '\u{10F50}' - | '\u{10F82}' .. '\u{10F85}' - | '\u{11000}' .. '\u{11002}' - | '\u{11038}' .. '\u{11046}' - | '\u{11066}' .. '\u{11070}' - | '\u{11073}' .. '\u{11074}' - | '\u{1107F}' .. '\u{11082}' - | '\u{110B0}' .. '\u{110BA}' - | '\u{110C2}' - | '\u{110F0}' .. '\u{110F9}' - | '\u{11100}' .. '\u{11102}' - | '\u{11127}' .. '\u{11134}' - | '\u{11136}' .. '\u{1113F}' - | '\u{11145}' .. '\u{11146}' - | '\u{11173}' - | '\u{11180}' .. '\u{11182}' - | '\u{111B3}' .. '\u{111C0}' - | '\u{111C9}' .. '\u{111CC}' - | '\u{111CE}' .. '\u{111D9}' - | '\u{1122C}' .. '\u{11237}' - | '\u{1123E}' - | '\u{11241}' - | '\u{112DF}' .. '\u{112EA}' - | '\u{112F0}' .. '\u{112F9}' - | '\u{11300}' .. '\u{11303}' - | '\u{1133B}' .. '\u{1133C}' - | '\u{1133E}' .. '\u{11344}' - | '\u{11347}' .. '\u{11348}' - | '\u{1134B}' .. '\u{1134D}' - | '\u{11357}' - | '\u{11362}' .. '\u{11363}' - | '\u{11366}' .. '\u{1136C}' - | '\u{11370}' .. '\u{11374}' - | '\u{11435}' .. '\u{11446}' - | '\u{11450}' .. '\u{11459}' - | '\u{1145E}' - | '\u{114B0}' .. '\u{114C3}' - | '\u{114D0}' .. '\u{114D9}' - | '\u{115AF}' .. '\u{115B5}' - | '\u{115B8}' .. '\u{115C0}' - | '\u{115DC}' .. '\u{115DD}' - | '\u{11630}' .. '\u{11640}' - | '\u{11650}' .. '\u{11659}' - | '\u{116AB}' .. '\u{116B7}' - | '\u{116C0}' .. '\u{116C9}' - | '\u{1171D}' .. '\u{1172B}' - | '\u{11730}' .. '\u{11739}' - | '\u{1182C}' .. '\u{1183A}' - | '\u{118E0}' .. '\u{118E9}' - | '\u{11930}' .. '\u{11935}' - | '\u{11937}' .. '\u{11938}' - | '\u{1193B}' .. '\u{1193E}' - | '\u{11940}' - | '\u{11942}' .. '\u{11943}' - | '\u{11950}' .. '\u{11959}' - | '\u{119D1}' .. '\u{119D7}' - | '\u{119DA}' .. '\u{119E0}' - | '\u{119E4}' - | '\u{11A01}' .. '\u{11A0A}' - | '\u{11A33}' .. '\u{11A39}' - | '\u{11A3B}' .. '\u{11A3E}' - | '\u{11A47}' - | '\u{11A51}' .. '\u{11A5B}' - | '\u{11A8A}' .. '\u{11A99}' - | '\u{11C2F}' .. '\u{11C36}' - | '\u{11C38}' .. '\u{11C3F}' - | '\u{11C50}' .. '\u{11C59}' - | '\u{11C92}' .. '\u{11CA7}' - | '\u{11CA9}' .. '\u{11CB6}' - | '\u{11D31}' .. '\u{11D36}' - | '\u{11D3A}' - | '\u{11D3C}' .. '\u{11D3D}' - | '\u{11D3F}' .. '\u{11D45}' - | '\u{11D47}' - | '\u{11D50}' .. '\u{11D59}' - | '\u{11D8A}' .. '\u{11D8E}' - | '\u{11D90}' .. '\u{11D91}' - | '\u{11D93}' .. '\u{11D97}' - | '\u{11DA0}' .. '\u{11DA9}' - | '\u{11EF3}' .. '\u{11EF6}' - | '\u{11F00}' .. '\u{11F01}' - | '\u{11F03}' - | '\u{11F34}' .. '\u{11F3A}' - | '\u{11F3E}' .. '\u{11F42}' - | '\u{11F50}' .. '\u{11F59}' - | '\u{13440}' - | '\u{13447}' .. '\u{13455}' - | '\u{16A60}' .. '\u{16A69}' - | '\u{16AC0}' .. '\u{16AC9}' - | '\u{16AF0}' .. '\u{16AF4}' - | '\u{16B30}' .. '\u{16B36}' - | '\u{16B50}' .. '\u{16B59}' - | '\u{16F4F}' - | '\u{16F51}' .. '\u{16F87}' - | '\u{16F8F}' .. '\u{16F92}' - | '\u{16FE4}' - | '\u{16FF0}' .. '\u{16FF1}' - | '\u{1BC9D}' .. '\u{1BC9E}' - | '\u{1CF00}' .. '\u{1CF2D}' - | '\u{1CF30}' .. '\u{1CF46}' - | '\u{1D165}' .. '\u{1D169}' - | '\u{1D16D}' .. '\u{1D172}' - | '\u{1D17B}' .. '\u{1D182}' - | '\u{1D185}' .. '\u{1D18B}' - | '\u{1D1AA}' .. '\u{1D1AD}' - | '\u{1D242}' .. '\u{1D244}' - | '\u{1D7CE}' .. '\u{1D7FF}' - | '\u{1DA00}' .. '\u{1DA36}' - | '\u{1DA3B}' .. '\u{1DA6C}' - | '\u{1DA75}' - | '\u{1DA84}' - | '\u{1DA9B}' .. '\u{1DA9F}' - | '\u{1DAA1}' .. '\u{1DAAF}' - | '\u{1E000}' .. '\u{1E006}' - | '\u{1E008}' .. '\u{1E018}' - | '\u{1E01B}' .. '\u{1E021}' - | '\u{1E023}' .. '\u{1E024}' - | '\u{1E026}' .. '\u{1E02A}' - | '\u{1E08F}' - | '\u{1E130}' .. '\u{1E136}' - | '\u{1E140}' .. '\u{1E149}' - | '\u{1E2AE}' - | '\u{1E2EC}' .. '\u{1E2F9}' - | '\u{1E4EC}' .. '\u{1E4F9}' - | '\u{1E8D0}' .. '\u{1E8D6}' - | '\u{1E944}' .. '\u{1E94A}' - | '\u{1E950}' .. '\u{1E959}' - | '\u{1FBF0}' .. '\u{1FBF9}' - | '\u{E0100}' .. '\u{E01EF}' -; - -// https://github.com/RobEin/ANTLR4-parser-for-Python-3.12/tree/main/valid_chars_in_py_identifiers -fragment ID_START: - '\u{0041}' .. '\u{005A}' - | '\u{005F}' - | '\u{0061}' .. '\u{007A}' - | '\u{00AA}' - | '\u{00B5}' - | '\u{00BA}' - | '\u{00C0}' .. '\u{00D6}' - | '\u{00D8}' .. '\u{00F6}' - | '\u{00F8}' .. '\u{02C1}' - | '\u{02C6}' .. '\u{02D1}' - | '\u{02E0}' .. '\u{02E4}' - | '\u{02EC}' - | '\u{02EE}' - | '\u{0370}' .. '\u{0374}' - | '\u{0376}' .. '\u{0377}' - | '\u{037B}' .. '\u{037D}' - | '\u{037F}' - | '\u{0386}' - | '\u{0388}' .. '\u{038A}' - | '\u{038C}' - | '\u{038E}' .. '\u{03A1}' - | '\u{03A3}' .. '\u{03F5}' - | '\u{03F7}' .. '\u{0481}' - | '\u{048A}' .. '\u{052F}' - | '\u{0531}' .. '\u{0556}' - | '\u{0559}' - | '\u{0560}' .. '\u{0588}' - | '\u{05D0}' .. '\u{05EA}' - | '\u{05EF}' .. '\u{05F2}' - | '\u{0620}' .. '\u{064A}' - | '\u{066E}' .. '\u{066F}' - | '\u{0671}' .. '\u{06D3}' - | '\u{06D5}' - | '\u{06E5}' .. '\u{06E6}' - | '\u{06EE}' .. '\u{06EF}' - | '\u{06FA}' .. '\u{06FC}' - | '\u{06FF}' - | '\u{0710}' - | '\u{0712}' .. '\u{072F}' - | '\u{074D}' .. '\u{07A5}' - | '\u{07B1}' - | '\u{07CA}' .. '\u{07EA}' - | '\u{07F4}' .. '\u{07F5}' - | '\u{07FA}' - | '\u{0800}' .. '\u{0815}' - | '\u{081A}' - | '\u{0824}' - | '\u{0828}' - | '\u{0840}' .. '\u{0858}' - | '\u{0860}' .. '\u{086A}' - | '\u{0870}' .. '\u{0887}' - | '\u{0889}' .. '\u{088E}' - | '\u{08A0}' .. '\u{08C9}' - | '\u{0904}' .. '\u{0939}' - | '\u{093D}' - | '\u{0950}' - | '\u{0958}' .. '\u{0961}' - | '\u{0971}' .. '\u{0980}' - | '\u{0985}' .. '\u{098C}' - | '\u{098F}' .. '\u{0990}' - | '\u{0993}' .. '\u{09A8}' - | '\u{09AA}' .. '\u{09B0}' - | '\u{09B2}' - | '\u{09B6}' .. '\u{09B9}' - | '\u{09BD}' - | '\u{09CE}' - | '\u{09DC}' .. '\u{09DD}' - | '\u{09DF}' .. '\u{09E1}' - | '\u{09F0}' .. '\u{09F1}' - | '\u{09FC}' - | '\u{0A05}' .. '\u{0A0A}' - | '\u{0A0F}' .. '\u{0A10}' - | '\u{0A13}' .. '\u{0A28}' - | '\u{0A2A}' .. '\u{0A30}' - | '\u{0A32}' .. '\u{0A33}' - | '\u{0A35}' .. '\u{0A36}' - | '\u{0A38}' .. '\u{0A39}' - | '\u{0A59}' .. '\u{0A5C}' - | '\u{0A5E}' - | '\u{0A72}' .. '\u{0A74}' - | '\u{0A85}' .. '\u{0A8D}' - | '\u{0A8F}' .. '\u{0A91}' - | '\u{0A93}' .. '\u{0AA8}' - | '\u{0AAA}' .. '\u{0AB0}' - | '\u{0AB2}' .. '\u{0AB3}' - | '\u{0AB5}' .. '\u{0AB9}' - | '\u{0ABD}' - | '\u{0AD0}' - | '\u{0AE0}' .. '\u{0AE1}' - | '\u{0AF9}' - | '\u{0B05}' .. '\u{0B0C}' - | '\u{0B0F}' .. '\u{0B10}' - | '\u{0B13}' .. '\u{0B28}' - | '\u{0B2A}' .. '\u{0B30}' - | '\u{0B32}' .. '\u{0B33}' - | '\u{0B35}' .. '\u{0B39}' - | '\u{0B3D}' - | '\u{0B5C}' .. '\u{0B5D}' - | '\u{0B5F}' .. '\u{0B61}' - | '\u{0B71}' - | '\u{0B83}' - | '\u{0B85}' .. '\u{0B8A}' - | '\u{0B8E}' .. '\u{0B90}' - | '\u{0B92}' .. '\u{0B95}' - | '\u{0B99}' .. '\u{0B9A}' - | '\u{0B9C}' - | '\u{0B9E}' .. '\u{0B9F}' - | '\u{0BA3}' .. '\u{0BA4}' - | '\u{0BA8}' .. '\u{0BAA}' - | '\u{0BAE}' .. '\u{0BB9}' - | '\u{0BD0}' - | '\u{0C05}' .. '\u{0C0C}' - | '\u{0C0E}' .. '\u{0C10}' - | '\u{0C12}' .. '\u{0C28}' - | '\u{0C2A}' .. '\u{0C39}' - | '\u{0C3D}' - | '\u{0C58}' .. '\u{0C5A}' - | '\u{0C5D}' - | '\u{0C60}' .. '\u{0C61}' - | '\u{0C80}' - | '\u{0C85}' .. '\u{0C8C}' - | '\u{0C8E}' .. '\u{0C90}' - | '\u{0C92}' .. '\u{0CA8}' - | '\u{0CAA}' .. '\u{0CB3}' - | '\u{0CB5}' .. '\u{0CB9}' - | '\u{0CBD}' - | '\u{0CDD}' .. '\u{0CDE}' - | '\u{0CE0}' .. '\u{0CE1}' - | '\u{0CF1}' .. '\u{0CF2}' - | '\u{0D04}' .. '\u{0D0C}' - | '\u{0D0E}' .. '\u{0D10}' - | '\u{0D12}' .. '\u{0D3A}' - | '\u{0D3D}' - | '\u{0D4E}' - | '\u{0D54}' .. '\u{0D56}' - | '\u{0D5F}' .. '\u{0D61}' - | '\u{0D7A}' .. '\u{0D7F}' - | '\u{0D85}' .. '\u{0D96}' - | '\u{0D9A}' .. '\u{0DB1}' - | '\u{0DB3}' .. '\u{0DBB}' - | '\u{0DBD}' - | '\u{0DC0}' .. '\u{0DC6}' - | '\u{0E01}' .. '\u{0E30}' - | '\u{0E32}' - | '\u{0E40}' .. '\u{0E46}' - | '\u{0E81}' .. '\u{0E82}' - | '\u{0E84}' - | '\u{0E86}' .. '\u{0E8A}' - | '\u{0E8C}' .. '\u{0EA3}' - | '\u{0EA5}' - | '\u{0EA7}' .. '\u{0EB0}' - | '\u{0EB2}' - | '\u{0EBD}' - | '\u{0EC0}' .. '\u{0EC4}' - | '\u{0EC6}' - | '\u{0EDC}' .. '\u{0EDF}' - | '\u{0F00}' - | '\u{0F40}' .. '\u{0F47}' - | '\u{0F49}' .. '\u{0F6C}' - | '\u{0F88}' .. '\u{0F8C}' - | '\u{1000}' .. '\u{102A}' - | '\u{103F}' - | '\u{1050}' .. '\u{1055}' - | '\u{105A}' .. '\u{105D}' - | '\u{1061}' - | '\u{1065}' .. '\u{1066}' - | '\u{106E}' .. '\u{1070}' - | '\u{1075}' .. '\u{1081}' - | '\u{108E}' - | '\u{10A0}' .. '\u{10C5}' - | '\u{10C7}' - | '\u{10CD}' - | '\u{10D0}' .. '\u{10FA}' - | '\u{10FC}' .. '\u{1248}' - | '\u{124A}' .. '\u{124D}' - | '\u{1250}' .. '\u{1256}' - | '\u{1258}' - | '\u{125A}' .. '\u{125D}' - | '\u{1260}' .. '\u{1288}' - | '\u{128A}' .. '\u{128D}' - | '\u{1290}' .. '\u{12B0}' - | '\u{12B2}' .. '\u{12B5}' - | '\u{12B8}' .. '\u{12BE}' - | '\u{12C0}' - | '\u{12C2}' .. '\u{12C5}' - | '\u{12C8}' .. '\u{12D6}' - | '\u{12D8}' .. '\u{1310}' - | '\u{1312}' .. '\u{1315}' - | '\u{1318}' .. '\u{135A}' - | '\u{1380}' .. '\u{138F}' - | '\u{13A0}' .. '\u{13F5}' - | '\u{13F8}' .. '\u{13FD}' - | '\u{1401}' .. '\u{166C}' - | '\u{166F}' .. '\u{167F}' - | '\u{1681}' .. '\u{169A}' - | '\u{16A0}' .. '\u{16EA}' - | '\u{16EE}' .. '\u{16F8}' - | '\u{1700}' .. '\u{1711}' - | '\u{171F}' .. '\u{1731}' - | '\u{1740}' .. '\u{1751}' - | '\u{1760}' .. '\u{176C}' - | '\u{176E}' .. '\u{1770}' - | '\u{1780}' .. '\u{17B3}' - | '\u{17D7}' - | '\u{17DC}' - | '\u{1820}' .. '\u{1878}' - | '\u{1880}' .. '\u{18A8}' - | '\u{18AA}' - | '\u{18B0}' .. '\u{18F5}' - | '\u{1900}' .. '\u{191E}' - | '\u{1950}' .. '\u{196D}' - | '\u{1970}' .. '\u{1974}' - | '\u{1980}' .. '\u{19AB}' - | '\u{19B0}' .. '\u{19C9}' - | '\u{1A00}' .. '\u{1A16}' - | '\u{1A20}' .. '\u{1A54}' - | '\u{1AA7}' - | '\u{1B05}' .. '\u{1B33}' - | '\u{1B45}' .. '\u{1B4C}' - | '\u{1B83}' .. '\u{1BA0}' - | '\u{1BAE}' .. '\u{1BAF}' - | '\u{1BBA}' .. '\u{1BE5}' - | '\u{1C00}' .. '\u{1C23}' - | '\u{1C4D}' .. '\u{1C4F}' - | '\u{1C5A}' .. '\u{1C7D}' - | '\u{1C80}' .. '\u{1C88}' - | '\u{1C90}' .. '\u{1CBA}' - | '\u{1CBD}' .. '\u{1CBF}' - | '\u{1CE9}' .. '\u{1CEC}' - | '\u{1CEE}' .. '\u{1CF3}' - | '\u{1CF5}' .. '\u{1CF6}' - | '\u{1CFA}' - | '\u{1D00}' .. '\u{1DBF}' - | '\u{1E00}' .. '\u{1F15}' - | '\u{1F18}' .. '\u{1F1D}' - | '\u{1F20}' .. '\u{1F45}' - | '\u{1F48}' .. '\u{1F4D}' - | '\u{1F50}' .. '\u{1F57}' - | '\u{1F59}' - | '\u{1F5B}' - | '\u{1F5D}' - | '\u{1F5F}' .. '\u{1F7D}' - | '\u{1F80}' .. '\u{1FB4}' - | '\u{1FB6}' .. '\u{1FBC}' - | '\u{1FBE}' - | '\u{1FC2}' .. '\u{1FC4}' - | '\u{1FC6}' .. '\u{1FCC}' - | '\u{1FD0}' .. '\u{1FD3}' - | '\u{1FD6}' .. '\u{1FDB}' - | '\u{1FE0}' .. '\u{1FEC}' - | '\u{1FF2}' .. '\u{1FF4}' - | '\u{1FF6}' .. '\u{1FFC}' - | '\u{2071}' - | '\u{207F}' - | '\u{2090}' .. '\u{209C}' - | '\u{2102}' - | '\u{2107}' - | '\u{210A}' .. '\u{2113}' - | '\u{2115}' - | '\u{2118}' .. '\u{211D}' - | '\u{2124}' - | '\u{2126}' - | '\u{2128}' - | '\u{212A}' .. '\u{2139}' - | '\u{213C}' .. '\u{213F}' - | '\u{2145}' .. '\u{2149}' - | '\u{214E}' - | '\u{2160}' .. '\u{2188}' - | '\u{2C00}' .. '\u{2CE4}' - | '\u{2CEB}' .. '\u{2CEE}' - | '\u{2CF2}' .. '\u{2CF3}' - | '\u{2D00}' .. '\u{2D25}' - | '\u{2D27}' - | '\u{2D2D}' - | '\u{2D30}' .. '\u{2D67}' - | '\u{2D6F}' - | '\u{2D80}' .. '\u{2D96}' - | '\u{2DA0}' .. '\u{2DA6}' - | '\u{2DA8}' .. '\u{2DAE}' - | '\u{2DB0}' .. '\u{2DB6}' - | '\u{2DB8}' .. '\u{2DBE}' - | '\u{2DC0}' .. '\u{2DC6}' - | '\u{2DC8}' .. '\u{2DCE}' - | '\u{2DD0}' .. '\u{2DD6}' - | '\u{2DD8}' .. '\u{2DDE}' - | '\u{3005}' .. '\u{3007}' - | '\u{3021}' .. '\u{3029}' - | '\u{3031}' .. '\u{3035}' - | '\u{3038}' .. '\u{303C}' - | '\u{3041}' .. '\u{3096}' - | '\u{309D}' .. '\u{309F}' - | '\u{30A1}' .. '\u{30FA}' - | '\u{30FC}' .. '\u{30FF}' - | '\u{3105}' .. '\u{312F}' - | '\u{3131}' .. '\u{318E}' - | '\u{31A0}' .. '\u{31BF}' - | '\u{31F0}' .. '\u{31FF}' - | '\u{3400}' .. '\u{4DBF}' - | '\u{4E00}' .. '\u{A48C}' - | '\u{A4D0}' .. '\u{A4FD}' - | '\u{A500}' .. '\u{A60C}' - | '\u{A610}' .. '\u{A61F}' - | '\u{A62A}' .. '\u{A62B}' - | '\u{A640}' .. '\u{A66E}' - | '\u{A67F}' .. '\u{A69D}' - | '\u{A6A0}' .. '\u{A6EF}' - | '\u{A717}' .. '\u{A71F}' - | '\u{A722}' .. '\u{A788}' - | '\u{A78B}' .. '\u{A7CA}' - | '\u{A7D0}' .. '\u{A7D1}' - | '\u{A7D3}' - | '\u{A7D5}' .. '\u{A7D9}' - | '\u{A7F2}' .. '\u{A801}' - | '\u{A803}' .. '\u{A805}' - | '\u{A807}' .. '\u{A80A}' - | '\u{A80C}' .. '\u{A822}' - | '\u{A840}' .. '\u{A873}' - | '\u{A882}' .. '\u{A8B3}' - | '\u{A8F2}' .. '\u{A8F7}' - | '\u{A8FB}' - | '\u{A8FD}' .. '\u{A8FE}' - | '\u{A90A}' .. '\u{A925}' - | '\u{A930}' .. '\u{A946}' - | '\u{A960}' .. '\u{A97C}' - | '\u{A984}' .. '\u{A9B2}' - | '\u{A9CF}' - | '\u{A9E0}' .. '\u{A9E4}' - | '\u{A9E6}' .. '\u{A9EF}' - | '\u{A9FA}' .. '\u{A9FE}' - | '\u{AA00}' .. '\u{AA28}' - | '\u{AA40}' .. '\u{AA42}' - | '\u{AA44}' .. '\u{AA4B}' - | '\u{AA60}' .. '\u{AA76}' - | '\u{AA7A}' - | '\u{AA7E}' .. '\u{AAAF}' - | '\u{AAB1}' - | '\u{AAB5}' .. '\u{AAB6}' - | '\u{AAB9}' .. '\u{AABD}' - | '\u{AAC0}' - | '\u{AAC2}' - | '\u{AADB}' .. '\u{AADD}' - | '\u{AAE0}' .. '\u{AAEA}' - | '\u{AAF2}' .. '\u{AAF4}' - | '\u{AB01}' .. '\u{AB06}' - | '\u{AB09}' .. '\u{AB0E}' - | '\u{AB11}' .. '\u{AB16}' - | '\u{AB20}' .. '\u{AB26}' - | '\u{AB28}' .. '\u{AB2E}' - | '\u{AB30}' .. '\u{AB5A}' - | '\u{AB5C}' .. '\u{AB69}' - | '\u{AB70}' .. '\u{ABE2}' - | '\u{AC00}' .. '\u{D7A3}' - | '\u{D7B0}' .. '\u{D7C6}' - | '\u{D7CB}' .. '\u{D7FB}' - | '\u{F900}' .. '\u{FA6D}' - | '\u{FA70}' .. '\u{FAD9}' - | '\u{FB00}' .. '\u{FB06}' - | '\u{FB13}' .. '\u{FB17}' - | '\u{FB1D}' - | '\u{FB1F}' .. '\u{FB28}' - | '\u{FB2A}' .. '\u{FB36}' - | '\u{FB38}' .. '\u{FB3C}' - | '\u{FB3E}' - | '\u{FB40}' .. '\u{FB41}' - | '\u{FB43}' .. '\u{FB44}' - | '\u{FB46}' .. '\u{FBB1}' - | '\u{FBD3}' .. '\u{FC5D}' - | '\u{FC64}' .. '\u{FD3D}' - | '\u{FD50}' .. '\u{FD8F}' - | '\u{FD92}' .. '\u{FDC7}' - | '\u{FDF0}' .. '\u{FDF9}' - | '\u{FE71}' - | '\u{FE73}' - | '\u{FE77}' - | '\u{FE79}' - | '\u{FE7B}' - | '\u{FE7D}' - | '\u{FE7F}' .. '\u{FEFC}' - | '\u{FF21}' .. '\u{FF3A}' - | '\u{FF41}' .. '\u{FF5A}' - | '\u{FF66}' .. '\u{FF9D}' - | '\u{FFA0}' .. '\u{FFBE}' - | '\u{FFC2}' .. '\u{FFC7}' - | '\u{FFCA}' .. '\u{FFCF}' - | '\u{FFD2}' .. '\u{FFD7}' - | '\u{FFDA}' .. '\u{FFDC}' - | '\u{10000}' .. '\u{1000B}' - | '\u{1000D}' .. '\u{10026}' - | '\u{10028}' .. '\u{1003A}' - | '\u{1003C}' .. '\u{1003D}' - | '\u{1003F}' .. '\u{1004D}' - | '\u{10050}' .. '\u{1005D}' - | '\u{10080}' .. '\u{100FA}' - | '\u{10140}' .. '\u{10174}' - | '\u{10280}' .. '\u{1029C}' - | '\u{102A0}' .. '\u{102D0}' - | '\u{10300}' .. '\u{1031F}' - | '\u{1032D}' .. '\u{1034A}' - | '\u{10350}' .. '\u{10375}' - | '\u{10380}' .. '\u{1039D}' - | '\u{103A0}' .. '\u{103C3}' - | '\u{103C8}' .. '\u{103CF}' - | '\u{103D1}' .. '\u{103D5}' - | '\u{10400}' .. '\u{1049D}' - | '\u{104B0}' .. '\u{104D3}' - | '\u{104D8}' .. '\u{104FB}' - | '\u{10500}' .. '\u{10527}' - | '\u{10530}' .. '\u{10563}' - | '\u{10570}' .. '\u{1057A}' - | '\u{1057C}' .. '\u{1058A}' - | '\u{1058C}' .. '\u{10592}' - | '\u{10594}' .. '\u{10595}' - | '\u{10597}' .. '\u{105A1}' - | '\u{105A3}' .. '\u{105B1}' - | '\u{105B3}' .. '\u{105B9}' - | '\u{105BB}' .. '\u{105BC}' - | '\u{10600}' .. '\u{10736}' - | '\u{10740}' .. '\u{10755}' - | '\u{10760}' .. '\u{10767}' - | '\u{10780}' .. '\u{10785}' - | '\u{10787}' .. '\u{107B0}' - | '\u{107B2}' .. '\u{107BA}' - | '\u{10800}' .. '\u{10805}' - | '\u{10808}' - | '\u{1080A}' .. '\u{10835}' - | '\u{10837}' .. '\u{10838}' - | '\u{1083C}' - | '\u{1083F}' .. '\u{10855}' - | '\u{10860}' .. '\u{10876}' - | '\u{10880}' .. '\u{1089E}' - | '\u{108E0}' .. '\u{108F2}' - | '\u{108F4}' .. '\u{108F5}' - | '\u{10900}' .. '\u{10915}' - | '\u{10920}' .. '\u{10939}' - | '\u{10980}' .. '\u{109B7}' - | '\u{109BE}' .. '\u{109BF}' - | '\u{10A00}' - | '\u{10A10}' .. '\u{10A13}' - | '\u{10A15}' .. '\u{10A17}' - | '\u{10A19}' .. '\u{10A35}' - | '\u{10A60}' .. '\u{10A7C}' - | '\u{10A80}' .. '\u{10A9C}' - | '\u{10AC0}' .. '\u{10AC7}' - | '\u{10AC9}' .. '\u{10AE4}' - | '\u{10B00}' .. '\u{10B35}' - | '\u{10B40}' .. '\u{10B55}' - | '\u{10B60}' .. '\u{10B72}' - | '\u{10B80}' .. '\u{10B91}' - | '\u{10C00}' .. '\u{10C48}' - | '\u{10C80}' .. '\u{10CB2}' - | '\u{10CC0}' .. '\u{10CF2}' - | '\u{10D00}' .. '\u{10D23}' - | '\u{10E80}' .. '\u{10EA9}' - | '\u{10EB0}' .. '\u{10EB1}' - | '\u{10F00}' .. '\u{10F1C}' - | '\u{10F27}' - | '\u{10F30}' .. '\u{10F45}' - | '\u{10F70}' .. '\u{10F81}' - | '\u{10FB0}' .. '\u{10FC4}' - | '\u{10FE0}' .. '\u{10FF6}' - | '\u{11003}' .. '\u{11037}' - | '\u{11071}' .. '\u{11072}' - | '\u{11075}' - | '\u{11083}' .. '\u{110AF}' - | '\u{110D0}' .. '\u{110E8}' - | '\u{11103}' .. '\u{11126}' - | '\u{11144}' - | '\u{11147}' - | '\u{11150}' .. '\u{11172}' - | '\u{11176}' - | '\u{11183}' .. '\u{111B2}' - | '\u{111C1}' .. '\u{111C4}' - | '\u{111DA}' - | '\u{111DC}' - | '\u{11200}' .. '\u{11211}' - | '\u{11213}' .. '\u{1122B}' - | '\u{1123F}' .. '\u{11240}' - | '\u{11280}' .. '\u{11286}' - | '\u{11288}' - | '\u{1128A}' .. '\u{1128D}' - | '\u{1128F}' .. '\u{1129D}' - | '\u{1129F}' .. '\u{112A8}' - | '\u{112B0}' .. '\u{112DE}' - | '\u{11305}' .. '\u{1130C}' - | '\u{1130F}' .. '\u{11310}' - | '\u{11313}' .. '\u{11328}' - | '\u{1132A}' .. '\u{11330}' - | '\u{11332}' .. '\u{11333}' - | '\u{11335}' .. '\u{11339}' - | '\u{1133D}' - | '\u{11350}' - | '\u{1135D}' .. '\u{11361}' - | '\u{11400}' .. '\u{11434}' - | '\u{11447}' .. '\u{1144A}' - | '\u{1145F}' .. '\u{11461}' - | '\u{11480}' .. '\u{114AF}' - | '\u{114C4}' .. '\u{114C5}' - | '\u{114C7}' - | '\u{11580}' .. '\u{115AE}' - | '\u{115D8}' .. '\u{115DB}' - | '\u{11600}' .. '\u{1162F}' - | '\u{11644}' - | '\u{11680}' .. '\u{116AA}' - | '\u{116B8}' - | '\u{11700}' .. '\u{1171A}' - | '\u{11740}' .. '\u{11746}' - | '\u{11800}' .. '\u{1182B}' - | '\u{118A0}' .. '\u{118DF}' - | '\u{118FF}' .. '\u{11906}' - | '\u{11909}' - | '\u{1190C}' .. '\u{11913}' - | '\u{11915}' .. '\u{11916}' - | '\u{11918}' .. '\u{1192F}' - | '\u{1193F}' - | '\u{11941}' - | '\u{119A0}' .. '\u{119A7}' - | '\u{119AA}' .. '\u{119D0}' - | '\u{119E1}' - | '\u{119E3}' - | '\u{11A00}' - | '\u{11A0B}' .. '\u{11A32}' - | '\u{11A3A}' - | '\u{11A50}' - | '\u{11A5C}' .. '\u{11A89}' - | '\u{11A9D}' - | '\u{11AB0}' .. '\u{11AF8}' - | '\u{11C00}' .. '\u{11C08}' - | '\u{11C0A}' .. '\u{11C2E}' - | '\u{11C40}' - | '\u{11C72}' .. '\u{11C8F}' - | '\u{11D00}' .. '\u{11D06}' - | '\u{11D08}' .. '\u{11D09}' - | '\u{11D0B}' .. '\u{11D30}' - | '\u{11D46}' - | '\u{11D60}' .. '\u{11D65}' - | '\u{11D67}' .. '\u{11D68}' - | '\u{11D6A}' .. '\u{11D89}' - | '\u{11D98}' - | '\u{11EE0}' .. '\u{11EF2}' - | '\u{11F02}' - | '\u{11F04}' .. '\u{11F10}' - | '\u{11F12}' .. '\u{11F33}' - | '\u{11FB0}' - | '\u{12000}' .. '\u{12399}' - | '\u{12400}' .. '\u{1246E}' - | '\u{12480}' .. '\u{12543}' - | '\u{12F90}' .. '\u{12FF0}' - | '\u{13000}' .. '\u{1342F}' - | '\u{13441}' .. '\u{13446}' - | '\u{14400}' .. '\u{14646}' - | '\u{16800}' .. '\u{16A38}' - | '\u{16A40}' .. '\u{16A5E}' - | '\u{16A70}' .. '\u{16ABE}' - | '\u{16AD0}' .. '\u{16AED}' - | '\u{16B00}' .. '\u{16B2F}' - | '\u{16B40}' .. '\u{16B43}' - | '\u{16B63}' .. '\u{16B77}' - | '\u{16B7D}' .. '\u{16B8F}' - | '\u{16E40}' .. '\u{16E7F}' - | '\u{16F00}' .. '\u{16F4A}' - | '\u{16F50}' - | '\u{16F93}' .. '\u{16F9F}' - | '\u{16FE0}' .. '\u{16FE1}' - | '\u{16FE3}' - | '\u{17000}' .. '\u{187F7}' - | '\u{18800}' .. '\u{18CD5}' - | '\u{18D00}' .. '\u{18D08}' - | '\u{1AFF0}' .. '\u{1AFF3}' - | '\u{1AFF5}' .. '\u{1AFFB}' - | '\u{1AFFD}' .. '\u{1AFFE}' - | '\u{1B000}' .. '\u{1B122}' - | '\u{1B132}' - | '\u{1B150}' .. '\u{1B152}' - | '\u{1B155}' - | '\u{1B164}' .. '\u{1B167}' - | '\u{1B170}' .. '\u{1B2FB}' - | '\u{1BC00}' .. '\u{1BC6A}' - | '\u{1BC70}' .. '\u{1BC7C}' - | '\u{1BC80}' .. '\u{1BC88}' - | '\u{1BC90}' .. '\u{1BC99}' - | '\u{1D400}' .. '\u{1D454}' - | '\u{1D456}' .. '\u{1D49C}' - | '\u{1D49E}' .. '\u{1D49F}' - | '\u{1D4A2}' - | '\u{1D4A5}' .. '\u{1D4A6}' - | '\u{1D4A9}' .. '\u{1D4AC}' - | '\u{1D4AE}' .. '\u{1D4B9}' - | '\u{1D4BB}' - | '\u{1D4BD}' .. '\u{1D4C3}' - | '\u{1D4C5}' .. '\u{1D505}' - | '\u{1D507}' .. '\u{1D50A}' - | '\u{1D50D}' .. '\u{1D514}' - | '\u{1D516}' .. '\u{1D51C}' - | '\u{1D51E}' .. '\u{1D539}' - | '\u{1D53B}' .. '\u{1D53E}' - | '\u{1D540}' .. '\u{1D544}' - | '\u{1D546}' - | '\u{1D54A}' .. '\u{1D550}' - | '\u{1D552}' .. '\u{1D6A5}' - | '\u{1D6A8}' .. '\u{1D6C0}' - | '\u{1D6C2}' .. '\u{1D6DA}' - | '\u{1D6DC}' .. '\u{1D6FA}' - | '\u{1D6FC}' .. '\u{1D714}' - | '\u{1D716}' .. '\u{1D734}' - | '\u{1D736}' .. '\u{1D74E}' - | '\u{1D750}' .. '\u{1D76E}' - | '\u{1D770}' .. '\u{1D788}' - | '\u{1D78A}' .. '\u{1D7A8}' - | '\u{1D7AA}' .. '\u{1D7C2}' - | '\u{1D7C4}' .. '\u{1D7CB}' - | '\u{1DF00}' .. '\u{1DF1E}' - | '\u{1DF25}' .. '\u{1DF2A}' - | '\u{1E030}' .. '\u{1E06D}' - | '\u{1E100}' .. '\u{1E12C}' - | '\u{1E137}' .. '\u{1E13D}' - | '\u{1E14E}' - | '\u{1E290}' .. '\u{1E2AD}' - | '\u{1E2C0}' .. '\u{1E2EB}' - | '\u{1E4D0}' .. '\u{1E4EB}' - | '\u{1E7E0}' .. '\u{1E7E6}' - | '\u{1E7E8}' .. '\u{1E7EB}' - | '\u{1E7ED}' .. '\u{1E7EE}' - | '\u{1E7F0}' .. '\u{1E7FE}' - | '\u{1E800}' .. '\u{1E8C4}' - | '\u{1E900}' .. '\u{1E943}' - | '\u{1E94B}' - | '\u{1EE00}' .. '\u{1EE03}' - | '\u{1EE05}' .. '\u{1EE1F}' - | '\u{1EE21}' .. '\u{1EE22}' - | '\u{1EE24}' - | '\u{1EE27}' - | '\u{1EE29}' .. '\u{1EE32}' - | '\u{1EE34}' .. '\u{1EE37}' - | '\u{1EE39}' - | '\u{1EE3B}' - | '\u{1EE42}' - | '\u{1EE47}' - | '\u{1EE49}' - | '\u{1EE4B}' - | '\u{1EE4D}' .. '\u{1EE4F}' - | '\u{1EE51}' .. '\u{1EE52}' - | '\u{1EE54}' - | '\u{1EE57}' - | '\u{1EE59}' - | '\u{1EE5B}' - | '\u{1EE5D}' - | '\u{1EE5F}' - | '\u{1EE61}' .. '\u{1EE62}' - | '\u{1EE64}' - | '\u{1EE67}' .. '\u{1EE6A}' - | '\u{1EE6C}' .. '\u{1EE72}' - | '\u{1EE74}' .. '\u{1EE77}' - | '\u{1EE79}' .. '\u{1EE7C}' - | '\u{1EE7E}' - | '\u{1EE80}' .. '\u{1EE89}' - | '\u{1EE8B}' .. '\u{1EE9B}' - | '\u{1EEA1}' .. '\u{1EEA3}' - | '\u{1EEA5}' .. '\u{1EEA9}' - | '\u{1EEAB}' .. '\u{1EEBB}' - | '\u{20000}' .. '\u{2A6DF}' - | '\u{2A700}' .. '\u{2B739}' - | '\u{2B740}' .. '\u{2B81D}' - | '\u{2B820}' .. '\u{2CEA1}' - | '\u{2CEB0}' .. '\u{2EBE0}' - | '\u{2F800}' .. '\u{2FA1D}' - | '\u{30000}' .. '\u{3134A}' - | '\u{31350}' .. '\u{323AF}' -; \ No newline at end of file + : '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' + | '"""' LONG_BYTES_ITEM*? '"""' + ; + +fragment SHORT_BYTES_ITEM_FOR_SINGLE_QUOTE + : SHORT_BYTES_CHAR_NO_SINGLE_QUOTE + | BYTES_ESCAPE_SEQ + ; + +fragment SHORT_BYTES_ITEM_FOR_DOUBLE_QUOTE + : SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE + | BYTES_ESCAPE_SEQ + ; + +fragment LONG_BYTES_ITEM + : LONG_BYTES_CHAR + | BYTES_ESCAPE_SEQ + ; + +fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE // + : [\u0000-\u0009] + | [\u000B-\u000C] + | [\u000E-\u0026] + | [\u0028-\u005B] + | [\u005D-\u007F] + ; + +fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE // + : [\u0000-\u0009] + | [\u000B-\u000C] + | [\u000E-\u0021] + | [\u0023-\u005B] + | [\u005D-\u007F] + ; + +fragment LONG_BYTES_CHAR + : [\u0000-\u005B] + | [\u005D-\u007F] + ; // + +fragment BYTES_ESCAPE_SEQ + : '\\' [\u0000-\u007F] + ; // "\" + + // https://docs.python.org/3.12/library/string.html#format-specification-mini-language + +fragment SINGLE_QUOTE_FSTRING_LITERAL + : (FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE | DOUBLE_BRACES)+ + ; + +fragment DOUBLE_QUOTE_FSTRING_LITERAL + : (FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE | DOUBLE_BRACES)+ + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#formatted-string-literals + +fragment F_STRING_PREFIX + : 'f' + | 'F' + | 'fr' + | 'Fr' + | 'fR' + | 'FR' + | 'rf' + | 'rF' + | 'Rf' + | 'RF' + ; + +fragment FORMAT_SPEC_CHAR_NO_SINGLE_QUOTE + : ~ [{}'] + ; + +fragment FORMAT_SPEC_CHAR_NO_DOUBLE_QUOTE + : ~ [{}"] + ; + +fragment DOUBLE_BRACES + : '{{' + | '}}' + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#integer-literals + +fragment INTEGER + : DEC_INTEGER + | BIN_INTEGER + | OCT_INTEGER + | HEX_INTEGER + ; + +fragment DEC_INTEGER + : NON_ZERO_DIGIT ('_'? DIGIT)* + | '0'+ ('_'? '0')* + ; + +fragment BIN_INTEGER + : '0' ('b' | 'B') ('_'? BIN_DIGIT)+ + ; + +fragment OCT_INTEGER + : '0' ('o' | 'O') ('_'? OCT_DIGIT)+ + ; + +fragment HEX_INTEGER + : '0' ('x' | 'X') ('_'? HEX_DIGIT)+ + ; + +fragment NON_ZERO_DIGIT + : [1-9] + ; + +fragment DIGIT + : [0-9] + ; + +fragment BIN_DIGIT + : '0' + | '1' + ; + +fragment OCT_DIGIT + : [0-7] + ; + +fragment HEX_DIGIT + : DIGIT + | [a-f] + | [A-F] + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#floating-point-literals + +fragment FLOAT_NUMBER + : POINT_FLOAT + | EXPONENT_FLOAT + ; + +fragment POINT_FLOAT + : DIGIT_PART? FRACTION + | DIGIT_PART '.' + ; + +fragment EXPONENT_FLOAT + : (DIGIT_PART | POINT_FLOAT) EXPONENT + ; + +fragment DIGIT_PART + : DIGIT ('_'? DIGIT)* + ; + +fragment FRACTION + : '.' DIGIT_PART + ; + +fragment EXPONENT + : ('e' | 'E') ('+' | '-')? DIGIT_PART + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#imaginary-literals + +fragment IMAG_NUMBER + : (FLOAT_NUMBER | DIGIT_PART) ('j' | 'J') + ; + // https://docs.python.org/3.12/reference/lexical_analysis.html#physical-lines + +fragment OS_INDEPENDENT_NL + : '\r'? '\n' + ; // Unix, Windows + + // https://github.com/RobEin/ANTLR4-parser-for-Python-3.12/tree/main/valid_chars_in_py_identifiers + +fragment ID_CONTINUE + : ID_START + | '\u{0030}' .. '\u{0039}' + | '\u{00B7}' + | '\u{0300}' .. '\u{036F}' + | '\u{0387}' + | '\u{0483}' .. '\u{0487}' + | '\u{0591}' .. '\u{05BD}' + | '\u{05BF}' + | '\u{05C1}' .. '\u{05C2}' + | '\u{05C4}' .. '\u{05C5}' + | '\u{05C7}' + | '\u{0610}' .. '\u{061A}' + | '\u{064B}' .. '\u{0669}' + | '\u{0670}' + | '\u{06D6}' .. '\u{06DC}' + | '\u{06DF}' .. '\u{06E4}' + | '\u{06E7}' .. '\u{06E8}' + | '\u{06EA}' .. '\u{06ED}' + | '\u{06F0}' .. '\u{06F9}' + | '\u{0711}' + | '\u{0730}' .. '\u{074A}' + | '\u{07A6}' .. '\u{07B0}' + | '\u{07C0}' .. '\u{07C9}' + | '\u{07EB}' .. '\u{07F3}' + | '\u{07FD}' + | '\u{0816}' .. '\u{0819}' + | '\u{081B}' .. '\u{0823}' + | '\u{0825}' .. '\u{0827}' + | '\u{0829}' .. '\u{082D}' + | '\u{0859}' .. '\u{085B}' + | '\u{0898}' .. '\u{089F}' + | '\u{08CA}' .. '\u{08E1}' + | '\u{08E3}' .. '\u{0903}' + | '\u{093A}' .. '\u{093C}' + | '\u{093E}' .. '\u{094F}' + | '\u{0951}' .. '\u{0957}' + | '\u{0962}' .. '\u{0963}' + | '\u{0966}' .. '\u{096F}' + | '\u{0981}' .. '\u{0983}' + | '\u{09BC}' + | '\u{09BE}' .. '\u{09C4}' + | '\u{09C7}' .. '\u{09C8}' + | '\u{09CB}' .. '\u{09CD}' + | '\u{09D7}' + | '\u{09E2}' .. '\u{09E3}' + | '\u{09E6}' .. '\u{09EF}' + | '\u{09FE}' + | '\u{0A01}' .. '\u{0A03}' + | '\u{0A3C}' + | '\u{0A3E}' .. '\u{0A42}' + | '\u{0A47}' .. '\u{0A48}' + | '\u{0A4B}' .. '\u{0A4D}' + | '\u{0A51}' + | '\u{0A66}' .. '\u{0A71}' + | '\u{0A75}' + | '\u{0A81}' .. '\u{0A83}' + | '\u{0ABC}' + | '\u{0ABE}' .. '\u{0AC5}' + | '\u{0AC7}' .. '\u{0AC9}' + | '\u{0ACB}' .. '\u{0ACD}' + | '\u{0AE2}' .. '\u{0AE3}' + | '\u{0AE6}' .. '\u{0AEF}' + | '\u{0AFA}' .. '\u{0AFF}' + | '\u{0B01}' .. '\u{0B03}' + | '\u{0B3C}' + | '\u{0B3E}' .. '\u{0B44}' + | '\u{0B47}' .. '\u{0B48}' + | '\u{0B4B}' .. '\u{0B4D}' + | '\u{0B55}' .. '\u{0B57}' + | '\u{0B62}' .. '\u{0B63}' + | '\u{0B66}' .. '\u{0B6F}' + | '\u{0B82}' + | '\u{0BBE}' .. '\u{0BC2}' + | '\u{0BC6}' .. '\u{0BC8}' + | '\u{0BCA}' .. '\u{0BCD}' + | '\u{0BD7}' + | '\u{0BE6}' .. '\u{0BEF}' + | '\u{0C00}' .. '\u{0C04}' + | '\u{0C3C}' + | '\u{0C3E}' .. '\u{0C44}' + | '\u{0C46}' .. '\u{0C48}' + | '\u{0C4A}' .. '\u{0C4D}' + | '\u{0C55}' .. '\u{0C56}' + | '\u{0C62}' .. '\u{0C63}' + | '\u{0C66}' .. '\u{0C6F}' + | '\u{0C81}' .. '\u{0C83}' + | '\u{0CBC}' + | '\u{0CBE}' .. '\u{0CC4}' + | '\u{0CC6}' .. '\u{0CC8}' + | '\u{0CCA}' .. '\u{0CCD}' + | '\u{0CD5}' .. '\u{0CD6}' + | '\u{0CE2}' .. '\u{0CE3}' + | '\u{0CE6}' .. '\u{0CEF}' + | '\u{0CF3}' + | '\u{0D00}' .. '\u{0D03}' + | '\u{0D3B}' .. '\u{0D3C}' + | '\u{0D3E}' .. '\u{0D44}' + | '\u{0D46}' .. '\u{0D48}' + | '\u{0D4A}' .. '\u{0D4D}' + | '\u{0D57}' + | '\u{0D62}' .. '\u{0D63}' + | '\u{0D66}' .. '\u{0D6F}' + | '\u{0D81}' .. '\u{0D83}' + | '\u{0DCA}' + | '\u{0DCF}' .. '\u{0DD4}' + | '\u{0DD6}' + | '\u{0DD8}' .. '\u{0DDF}' + | '\u{0DE6}' .. '\u{0DEF}' + | '\u{0DF2}' .. '\u{0DF3}' + | '\u{0E31}' + | '\u{0E33}' .. '\u{0E3A}' + | '\u{0E47}' .. '\u{0E4E}' + | '\u{0E50}' .. '\u{0E59}' + | '\u{0EB1}' + | '\u{0EB3}' .. '\u{0EBC}' + | '\u{0EC8}' .. '\u{0ECE}' + | '\u{0ED0}' .. '\u{0ED9}' + | '\u{0F18}' .. '\u{0F19}' + | '\u{0F20}' .. '\u{0F29}' + | '\u{0F35}' + | '\u{0F37}' + | '\u{0F39}' + | '\u{0F3E}' .. '\u{0F3F}' + | '\u{0F71}' .. '\u{0F84}' + | '\u{0F86}' .. '\u{0F87}' + | '\u{0F8D}' .. '\u{0F97}' + | '\u{0F99}' .. '\u{0FBC}' + | '\u{0FC6}' + | '\u{102B}' .. '\u{103E}' + | '\u{1040}' .. '\u{1049}' + | '\u{1056}' .. '\u{1059}' + | '\u{105E}' .. '\u{1060}' + | '\u{1062}' .. '\u{1064}' + | '\u{1067}' .. '\u{106D}' + | '\u{1071}' .. '\u{1074}' + | '\u{1082}' .. '\u{108D}' + | '\u{108F}' .. '\u{109D}' + | '\u{135D}' .. '\u{135F}' + | '\u{1369}' .. '\u{1371}' + | '\u{1712}' .. '\u{1715}' + | '\u{1732}' .. '\u{1734}' + | '\u{1752}' .. '\u{1753}' + | '\u{1772}' .. '\u{1773}' + | '\u{17B4}' .. '\u{17D3}' + | '\u{17DD}' + | '\u{17E0}' .. '\u{17E9}' + | '\u{180B}' .. '\u{180D}' + | '\u{180F}' .. '\u{1819}' + | '\u{18A9}' + | '\u{1920}' .. '\u{192B}' + | '\u{1930}' .. '\u{193B}' + | '\u{1946}' .. '\u{194F}' + | '\u{19D0}' .. '\u{19DA}' + | '\u{1A17}' .. '\u{1A1B}' + | '\u{1A55}' .. '\u{1A5E}' + | '\u{1A60}' .. '\u{1A7C}' + | '\u{1A7F}' .. '\u{1A89}' + | '\u{1A90}' .. '\u{1A99}' + | '\u{1AB0}' .. '\u{1ABD}' + | '\u{1ABF}' .. '\u{1ACE}' + | '\u{1B00}' .. '\u{1B04}' + | '\u{1B34}' .. '\u{1B44}' + | '\u{1B50}' .. '\u{1B59}' + | '\u{1B6B}' .. '\u{1B73}' + | '\u{1B80}' .. '\u{1B82}' + | '\u{1BA1}' .. '\u{1BAD}' + | '\u{1BB0}' .. '\u{1BB9}' + | '\u{1BE6}' .. '\u{1BF3}' + | '\u{1C24}' .. '\u{1C37}' + | '\u{1C40}' .. '\u{1C49}' + | '\u{1C50}' .. '\u{1C59}' + | '\u{1CD0}' .. '\u{1CD2}' + | '\u{1CD4}' .. '\u{1CE8}' + | '\u{1CED}' + | '\u{1CF4}' + | '\u{1CF7}' .. '\u{1CF9}' + | '\u{1DC0}' .. '\u{1DFF}' + | '\u{203F}' .. '\u{2040}' + | '\u{2054}' + | '\u{20D0}' .. '\u{20DC}' + | '\u{20E1}' + | '\u{20E5}' .. '\u{20F0}' + | '\u{2CEF}' .. '\u{2CF1}' + | '\u{2D7F}' + | '\u{2DE0}' .. '\u{2DFF}' + | '\u{302A}' .. '\u{302F}' + | '\u{3099}' .. '\u{309A}' + | '\u{A620}' .. '\u{A629}' + | '\u{A66F}' + | '\u{A674}' .. '\u{A67D}' + | '\u{A69E}' .. '\u{A69F}' + | '\u{A6F0}' .. '\u{A6F1}' + | '\u{A802}' + | '\u{A806}' + | '\u{A80B}' + | '\u{A823}' .. '\u{A827}' + | '\u{A82C}' + | '\u{A880}' .. '\u{A881}' + | '\u{A8B4}' .. '\u{A8C5}' + | '\u{A8D0}' .. '\u{A8D9}' + | '\u{A8E0}' .. '\u{A8F1}' + | '\u{A8FF}' .. '\u{A909}' + | '\u{A926}' .. '\u{A92D}' + | '\u{A947}' .. '\u{A953}' + | '\u{A980}' .. '\u{A983}' + | '\u{A9B3}' .. '\u{A9C0}' + | '\u{A9D0}' .. '\u{A9D9}' + | '\u{A9E5}' + | '\u{A9F0}' .. '\u{A9F9}' + | '\u{AA29}' .. '\u{AA36}' + | '\u{AA43}' + | '\u{AA4C}' .. '\u{AA4D}' + | '\u{AA50}' .. '\u{AA59}' + | '\u{AA7B}' .. '\u{AA7D}' + | '\u{AAB0}' + | '\u{AAB2}' .. '\u{AAB4}' + | '\u{AAB7}' .. '\u{AAB8}' + | '\u{AABE}' .. '\u{AABF}' + | '\u{AAC1}' + | '\u{AAEB}' .. '\u{AAEF}' + | '\u{AAF5}' .. '\u{AAF6}' + | '\u{ABE3}' .. '\u{ABEA}' + | '\u{ABEC}' .. '\u{ABED}' + | '\u{ABF0}' .. '\u{ABF9}' + | '\u{FB1E}' + | '\u{FE00}' .. '\u{FE0F}' + | '\u{FE20}' .. '\u{FE2F}' + | '\u{FE33}' .. '\u{FE34}' + | '\u{FE4D}' .. '\u{FE4F}' + | '\u{FF10}' .. '\u{FF19}' + | '\u{FF3F}' + | '\u{FF9E}' .. '\u{FF9F}' + | '\u{101FD}' + | '\u{102E0}' + | '\u{10376}' .. '\u{1037A}' + | '\u{104A0}' .. '\u{104A9}' + | '\u{10A01}' .. '\u{10A03}' + | '\u{10A05}' .. '\u{10A06}' + | '\u{10A0C}' .. '\u{10A0F}' + | '\u{10A38}' .. '\u{10A3A}' + | '\u{10A3F}' + | '\u{10AE5}' .. '\u{10AE6}' + | '\u{10D24}' .. '\u{10D27}' + | '\u{10D30}' .. '\u{10D39}' + | '\u{10EAB}' .. '\u{10EAC}' + | '\u{10EFD}' .. '\u{10EFF}' + | '\u{10F46}' .. '\u{10F50}' + | '\u{10F82}' .. '\u{10F85}' + | '\u{11000}' .. '\u{11002}' + | '\u{11038}' .. '\u{11046}' + | '\u{11066}' .. '\u{11070}' + | '\u{11073}' .. '\u{11074}' + | '\u{1107F}' .. '\u{11082}' + | '\u{110B0}' .. '\u{110BA}' + | '\u{110C2}' + | '\u{110F0}' .. '\u{110F9}' + | '\u{11100}' .. '\u{11102}' + | '\u{11127}' .. '\u{11134}' + | '\u{11136}' .. '\u{1113F}' + | '\u{11145}' .. '\u{11146}' + | '\u{11173}' + | '\u{11180}' .. '\u{11182}' + | '\u{111B3}' .. '\u{111C0}' + | '\u{111C9}' .. '\u{111CC}' + | '\u{111CE}' .. '\u{111D9}' + | '\u{1122C}' .. '\u{11237}' + | '\u{1123E}' + | '\u{11241}' + | '\u{112DF}' .. '\u{112EA}' + | '\u{112F0}' .. '\u{112F9}' + | '\u{11300}' .. '\u{11303}' + | '\u{1133B}' .. '\u{1133C}' + | '\u{1133E}' .. '\u{11344}' + | '\u{11347}' .. '\u{11348}' + | '\u{1134B}' .. '\u{1134D}' + | '\u{11357}' + | '\u{11362}' .. '\u{11363}' + | '\u{11366}' .. '\u{1136C}' + | '\u{11370}' .. '\u{11374}' + | '\u{11435}' .. '\u{11446}' + | '\u{11450}' .. '\u{11459}' + | '\u{1145E}' + | '\u{114B0}' .. '\u{114C3}' + | '\u{114D0}' .. '\u{114D9}' + | '\u{115AF}' .. '\u{115B5}' + | '\u{115B8}' .. '\u{115C0}' + | '\u{115DC}' .. '\u{115DD}' + | '\u{11630}' .. '\u{11640}' + | '\u{11650}' .. '\u{11659}' + | '\u{116AB}' .. '\u{116B7}' + | '\u{116C0}' .. '\u{116C9}' + | '\u{1171D}' .. '\u{1172B}' + | '\u{11730}' .. '\u{11739}' + | '\u{1182C}' .. '\u{1183A}' + | '\u{118E0}' .. '\u{118E9}' + | '\u{11930}' .. '\u{11935}' + | '\u{11937}' .. '\u{11938}' + | '\u{1193B}' .. '\u{1193E}' + | '\u{11940}' + | '\u{11942}' .. '\u{11943}' + | '\u{11950}' .. '\u{11959}' + | '\u{119D1}' .. '\u{119D7}' + | '\u{119DA}' .. '\u{119E0}' + | '\u{119E4}' + | '\u{11A01}' .. '\u{11A0A}' + | '\u{11A33}' .. '\u{11A39}' + | '\u{11A3B}' .. '\u{11A3E}' + | '\u{11A47}' + | '\u{11A51}' .. '\u{11A5B}' + | '\u{11A8A}' .. '\u{11A99}' + | '\u{11C2F}' .. '\u{11C36}' + | '\u{11C38}' .. '\u{11C3F}' + | '\u{11C50}' .. '\u{11C59}' + | '\u{11C92}' .. '\u{11CA7}' + | '\u{11CA9}' .. '\u{11CB6}' + | '\u{11D31}' .. '\u{11D36}' + | '\u{11D3A}' + | '\u{11D3C}' .. '\u{11D3D}' + | '\u{11D3F}' .. '\u{11D45}' + | '\u{11D47}' + | '\u{11D50}' .. '\u{11D59}' + | '\u{11D8A}' .. '\u{11D8E}' + | '\u{11D90}' .. '\u{11D91}' + | '\u{11D93}' .. '\u{11D97}' + | '\u{11DA0}' .. '\u{11DA9}' + | '\u{11EF3}' .. '\u{11EF6}' + | '\u{11F00}' .. '\u{11F01}' + | '\u{11F03}' + | '\u{11F34}' .. '\u{11F3A}' + | '\u{11F3E}' .. '\u{11F42}' + | '\u{11F50}' .. '\u{11F59}' + | '\u{13440}' + | '\u{13447}' .. '\u{13455}' + | '\u{16A60}' .. '\u{16A69}' + | '\u{16AC0}' .. '\u{16AC9}' + | '\u{16AF0}' .. '\u{16AF4}' + | '\u{16B30}' .. '\u{16B36}' + | '\u{16B50}' .. '\u{16B59}' + | '\u{16F4F}' + | '\u{16F51}' .. '\u{16F87}' + | '\u{16F8F}' .. '\u{16F92}' + | '\u{16FE4}' + | '\u{16FF0}' .. '\u{16FF1}' + | '\u{1BC9D}' .. '\u{1BC9E}' + | '\u{1CF00}' .. '\u{1CF2D}' + | '\u{1CF30}' .. '\u{1CF46}' + | '\u{1D165}' .. '\u{1D169}' + | '\u{1D16D}' .. '\u{1D172}' + | '\u{1D17B}' .. '\u{1D182}' + | '\u{1D185}' .. '\u{1D18B}' + | '\u{1D1AA}' .. '\u{1D1AD}' + | '\u{1D242}' .. '\u{1D244}' + | '\u{1D7CE}' .. '\u{1D7FF}' + | '\u{1DA00}' .. '\u{1DA36}' + | '\u{1DA3B}' .. '\u{1DA6C}' + | '\u{1DA75}' + | '\u{1DA84}' + | '\u{1DA9B}' .. '\u{1DA9F}' + | '\u{1DAA1}' .. '\u{1DAAF}' + | '\u{1E000}' .. '\u{1E006}' + | '\u{1E008}' .. '\u{1E018}' + | '\u{1E01B}' .. '\u{1E021}' + | '\u{1E023}' .. '\u{1E024}' + | '\u{1E026}' .. '\u{1E02A}' + | '\u{1E08F}' + | '\u{1E130}' .. '\u{1E136}' + | '\u{1E140}' .. '\u{1E149}' + | '\u{1E2AE}' + | '\u{1E2EC}' .. '\u{1E2F9}' + | '\u{1E4EC}' .. '\u{1E4F9}' + | '\u{1E8D0}' .. '\u{1E8D6}' + | '\u{1E944}' .. '\u{1E94A}' + | '\u{1E950}' .. '\u{1E959}' + | '\u{1FBF0}' .. '\u{1FBF9}' + | '\u{E0100}' .. '\u{E01EF}' + ; + // https://github.com/RobEin/ANTLR4-parser-for-Python-3.12/tree/main/valid_chars_in_py_identifiers + +fragment ID_START + : '\u{0041}' .. '\u{005A}' + | '\u{005F}' + | '\u{0061}' .. '\u{007A}' + | '\u{00AA}' + | '\u{00B5}' + | '\u{00BA}' + | '\u{00C0}' .. '\u{00D6}' + | '\u{00D8}' .. '\u{00F6}' + | '\u{00F8}' .. '\u{02C1}' + | '\u{02C6}' .. '\u{02D1}' + | '\u{02E0}' .. '\u{02E4}' + | '\u{02EC}' + | '\u{02EE}' + | '\u{0370}' .. '\u{0374}' + | '\u{0376}' .. '\u{0377}' + | '\u{037B}' .. '\u{037D}' + | '\u{037F}' + | '\u{0386}' + | '\u{0388}' .. '\u{038A}' + | '\u{038C}' + | '\u{038E}' .. '\u{03A1}' + | '\u{03A3}' .. '\u{03F5}' + | '\u{03F7}' .. '\u{0481}' + | '\u{048A}' .. '\u{052F}' + | '\u{0531}' .. '\u{0556}' + | '\u{0559}' + | '\u{0560}' .. '\u{0588}' + | '\u{05D0}' .. '\u{05EA}' + | '\u{05EF}' .. '\u{05F2}' + | '\u{0620}' .. '\u{064A}' + | '\u{066E}' .. '\u{066F}' + | '\u{0671}' .. '\u{06D3}' + | '\u{06D5}' + | '\u{06E5}' .. '\u{06E6}' + | '\u{06EE}' .. '\u{06EF}' + | '\u{06FA}' .. '\u{06FC}' + | '\u{06FF}' + | '\u{0710}' + | '\u{0712}' .. '\u{072F}' + | '\u{074D}' .. '\u{07A5}' + | '\u{07B1}' + | '\u{07CA}' .. '\u{07EA}' + | '\u{07F4}' .. '\u{07F5}' + | '\u{07FA}' + | '\u{0800}' .. '\u{0815}' + | '\u{081A}' + | '\u{0824}' + | '\u{0828}' + | '\u{0840}' .. '\u{0858}' + | '\u{0860}' .. '\u{086A}' + | '\u{0870}' .. '\u{0887}' + | '\u{0889}' .. '\u{088E}' + | '\u{08A0}' .. '\u{08C9}' + | '\u{0904}' .. '\u{0939}' + | '\u{093D}' + | '\u{0950}' + | '\u{0958}' .. '\u{0961}' + | '\u{0971}' .. '\u{0980}' + | '\u{0985}' .. '\u{098C}' + | '\u{098F}' .. '\u{0990}' + | '\u{0993}' .. '\u{09A8}' + | '\u{09AA}' .. '\u{09B0}' + | '\u{09B2}' + | '\u{09B6}' .. '\u{09B9}' + | '\u{09BD}' + | '\u{09CE}' + | '\u{09DC}' .. '\u{09DD}' + | '\u{09DF}' .. '\u{09E1}' + | '\u{09F0}' .. '\u{09F1}' + | '\u{09FC}' + | '\u{0A05}' .. '\u{0A0A}' + | '\u{0A0F}' .. '\u{0A10}' + | '\u{0A13}' .. '\u{0A28}' + | '\u{0A2A}' .. '\u{0A30}' + | '\u{0A32}' .. '\u{0A33}' + | '\u{0A35}' .. '\u{0A36}' + | '\u{0A38}' .. '\u{0A39}' + | '\u{0A59}' .. '\u{0A5C}' + | '\u{0A5E}' + | '\u{0A72}' .. '\u{0A74}' + | '\u{0A85}' .. '\u{0A8D}' + | '\u{0A8F}' .. '\u{0A91}' + | '\u{0A93}' .. '\u{0AA8}' + | '\u{0AAA}' .. '\u{0AB0}' + | '\u{0AB2}' .. '\u{0AB3}' + | '\u{0AB5}' .. '\u{0AB9}' + | '\u{0ABD}' + | '\u{0AD0}' + | '\u{0AE0}' .. '\u{0AE1}' + | '\u{0AF9}' + | '\u{0B05}' .. '\u{0B0C}' + | '\u{0B0F}' .. '\u{0B10}' + | '\u{0B13}' .. '\u{0B28}' + | '\u{0B2A}' .. '\u{0B30}' + | '\u{0B32}' .. '\u{0B33}' + | '\u{0B35}' .. '\u{0B39}' + | '\u{0B3D}' + | '\u{0B5C}' .. '\u{0B5D}' + | '\u{0B5F}' .. '\u{0B61}' + | '\u{0B71}' + | '\u{0B83}' + | '\u{0B85}' .. '\u{0B8A}' + | '\u{0B8E}' .. '\u{0B90}' + | '\u{0B92}' .. '\u{0B95}' + | '\u{0B99}' .. '\u{0B9A}' + | '\u{0B9C}' + | '\u{0B9E}' .. '\u{0B9F}' + | '\u{0BA3}' .. '\u{0BA4}' + | '\u{0BA8}' .. '\u{0BAA}' + | '\u{0BAE}' .. '\u{0BB9}' + | '\u{0BD0}' + | '\u{0C05}' .. '\u{0C0C}' + | '\u{0C0E}' .. '\u{0C10}' + | '\u{0C12}' .. '\u{0C28}' + | '\u{0C2A}' .. '\u{0C39}' + | '\u{0C3D}' + | '\u{0C58}' .. '\u{0C5A}' + | '\u{0C5D}' + | '\u{0C60}' .. '\u{0C61}' + | '\u{0C80}' + | '\u{0C85}' .. '\u{0C8C}' + | '\u{0C8E}' .. '\u{0C90}' + | '\u{0C92}' .. '\u{0CA8}' + | '\u{0CAA}' .. '\u{0CB3}' + | '\u{0CB5}' .. '\u{0CB9}' + | '\u{0CBD}' + | '\u{0CDD}' .. '\u{0CDE}' + | '\u{0CE0}' .. '\u{0CE1}' + | '\u{0CF1}' .. '\u{0CF2}' + | '\u{0D04}' .. '\u{0D0C}' + | '\u{0D0E}' .. '\u{0D10}' + | '\u{0D12}' .. '\u{0D3A}' + | '\u{0D3D}' + | '\u{0D4E}' + | '\u{0D54}' .. '\u{0D56}' + | '\u{0D5F}' .. '\u{0D61}' + | '\u{0D7A}' .. '\u{0D7F}' + | '\u{0D85}' .. '\u{0D96}' + | '\u{0D9A}' .. '\u{0DB1}' + | '\u{0DB3}' .. '\u{0DBB}' + | '\u{0DBD}' + | '\u{0DC0}' .. '\u{0DC6}' + | '\u{0E01}' .. '\u{0E30}' + | '\u{0E32}' + | '\u{0E40}' .. '\u{0E46}' + | '\u{0E81}' .. '\u{0E82}' + | '\u{0E84}' + | '\u{0E86}' .. '\u{0E8A}' + | '\u{0E8C}' .. '\u{0EA3}' + | '\u{0EA5}' + | '\u{0EA7}' .. '\u{0EB0}' + | '\u{0EB2}' + | '\u{0EBD}' + | '\u{0EC0}' .. '\u{0EC4}' + | '\u{0EC6}' + | '\u{0EDC}' .. '\u{0EDF}' + | '\u{0F00}' + | '\u{0F40}' .. '\u{0F47}' + | '\u{0F49}' .. '\u{0F6C}' + | '\u{0F88}' .. '\u{0F8C}' + | '\u{1000}' .. '\u{102A}' + | '\u{103F}' + | '\u{1050}' .. '\u{1055}' + | '\u{105A}' .. '\u{105D}' + | '\u{1061}' + | '\u{1065}' .. '\u{1066}' + | '\u{106E}' .. '\u{1070}' + | '\u{1075}' .. '\u{1081}' + | '\u{108E}' + | '\u{10A0}' .. '\u{10C5}' + | '\u{10C7}' + | '\u{10CD}' + | '\u{10D0}' .. '\u{10FA}' + | '\u{10FC}' .. '\u{1248}' + | '\u{124A}' .. '\u{124D}' + | '\u{1250}' .. '\u{1256}' + | '\u{1258}' + | '\u{125A}' .. '\u{125D}' + | '\u{1260}' .. '\u{1288}' + | '\u{128A}' .. '\u{128D}' + | '\u{1290}' .. '\u{12B0}' + | '\u{12B2}' .. '\u{12B5}' + | '\u{12B8}' .. '\u{12BE}' + | '\u{12C0}' + | '\u{12C2}' .. '\u{12C5}' + | '\u{12C8}' .. '\u{12D6}' + | '\u{12D8}' .. '\u{1310}' + | '\u{1312}' .. '\u{1315}' + | '\u{1318}' .. '\u{135A}' + | '\u{1380}' .. '\u{138F}' + | '\u{13A0}' .. '\u{13F5}' + | '\u{13F8}' .. '\u{13FD}' + | '\u{1401}' .. '\u{166C}' + | '\u{166F}' .. '\u{167F}' + | '\u{1681}' .. '\u{169A}' + | '\u{16A0}' .. '\u{16EA}' + | '\u{16EE}' .. '\u{16F8}' + | '\u{1700}' .. '\u{1711}' + | '\u{171F}' .. '\u{1731}' + | '\u{1740}' .. '\u{1751}' + | '\u{1760}' .. '\u{176C}' + | '\u{176E}' .. '\u{1770}' + | '\u{1780}' .. '\u{17B3}' + | '\u{17D7}' + | '\u{17DC}' + | '\u{1820}' .. '\u{1878}' + | '\u{1880}' .. '\u{18A8}' + | '\u{18AA}' + | '\u{18B0}' .. '\u{18F5}' + | '\u{1900}' .. '\u{191E}' + | '\u{1950}' .. '\u{196D}' + | '\u{1970}' .. '\u{1974}' + | '\u{1980}' .. '\u{19AB}' + | '\u{19B0}' .. '\u{19C9}' + | '\u{1A00}' .. '\u{1A16}' + | '\u{1A20}' .. '\u{1A54}' + | '\u{1AA7}' + | '\u{1B05}' .. '\u{1B33}' + | '\u{1B45}' .. '\u{1B4C}' + | '\u{1B83}' .. '\u{1BA0}' + | '\u{1BAE}' .. '\u{1BAF}' + | '\u{1BBA}' .. '\u{1BE5}' + | '\u{1C00}' .. '\u{1C23}' + | '\u{1C4D}' .. '\u{1C4F}' + | '\u{1C5A}' .. '\u{1C7D}' + | '\u{1C80}' .. '\u{1C88}' + | '\u{1C90}' .. '\u{1CBA}' + | '\u{1CBD}' .. '\u{1CBF}' + | '\u{1CE9}' .. '\u{1CEC}' + | '\u{1CEE}' .. '\u{1CF3}' + | '\u{1CF5}' .. '\u{1CF6}' + | '\u{1CFA}' + | '\u{1D00}' .. '\u{1DBF}' + | '\u{1E00}' .. '\u{1F15}' + | '\u{1F18}' .. '\u{1F1D}' + | '\u{1F20}' .. '\u{1F45}' + | '\u{1F48}' .. '\u{1F4D}' + | '\u{1F50}' .. '\u{1F57}' + | '\u{1F59}' + | '\u{1F5B}' + | '\u{1F5D}' + | '\u{1F5F}' .. '\u{1F7D}' + | '\u{1F80}' .. '\u{1FB4}' + | '\u{1FB6}' .. '\u{1FBC}' + | '\u{1FBE}' + | '\u{1FC2}' .. '\u{1FC4}' + | '\u{1FC6}' .. '\u{1FCC}' + | '\u{1FD0}' .. '\u{1FD3}' + | '\u{1FD6}' .. '\u{1FDB}' + | '\u{1FE0}' .. '\u{1FEC}' + | '\u{1FF2}' .. '\u{1FF4}' + | '\u{1FF6}' .. '\u{1FFC}' + | '\u{2071}' + | '\u{207F}' + | '\u{2090}' .. '\u{209C}' + | '\u{2102}' + | '\u{2107}' + | '\u{210A}' .. '\u{2113}' + | '\u{2115}' + | '\u{2118}' .. '\u{211D}' + | '\u{2124}' + | '\u{2126}' + | '\u{2128}' + | '\u{212A}' .. '\u{2139}' + | '\u{213C}' .. '\u{213F}' + | '\u{2145}' .. '\u{2149}' + | '\u{214E}' + | '\u{2160}' .. '\u{2188}' + | '\u{2C00}' .. '\u{2CE4}' + | '\u{2CEB}' .. '\u{2CEE}' + | '\u{2CF2}' .. '\u{2CF3}' + | '\u{2D00}' .. '\u{2D25}' + | '\u{2D27}' + | '\u{2D2D}' + | '\u{2D30}' .. '\u{2D67}' + | '\u{2D6F}' + | '\u{2D80}' .. '\u{2D96}' + | '\u{2DA0}' .. '\u{2DA6}' + | '\u{2DA8}' .. '\u{2DAE}' + | '\u{2DB0}' .. '\u{2DB6}' + | '\u{2DB8}' .. '\u{2DBE}' + | '\u{2DC0}' .. '\u{2DC6}' + | '\u{2DC8}' .. '\u{2DCE}' + | '\u{2DD0}' .. '\u{2DD6}' + | '\u{2DD8}' .. '\u{2DDE}' + | '\u{3005}' .. '\u{3007}' + | '\u{3021}' .. '\u{3029}' + | '\u{3031}' .. '\u{3035}' + | '\u{3038}' .. '\u{303C}' + | '\u{3041}' .. '\u{3096}' + | '\u{309D}' .. '\u{309F}' + | '\u{30A1}' .. '\u{30FA}' + | '\u{30FC}' .. '\u{30FF}' + | '\u{3105}' .. '\u{312F}' + | '\u{3131}' .. '\u{318E}' + | '\u{31A0}' .. '\u{31BF}' + | '\u{31F0}' .. '\u{31FF}' + | '\u{3400}' .. '\u{4DBF}' + | '\u{4E00}' .. '\u{A48C}' + | '\u{A4D0}' .. '\u{A4FD}' + | '\u{A500}' .. '\u{A60C}' + | '\u{A610}' .. '\u{A61F}' + | '\u{A62A}' .. '\u{A62B}' + | '\u{A640}' .. '\u{A66E}' + | '\u{A67F}' .. '\u{A69D}' + | '\u{A6A0}' .. '\u{A6EF}' + | '\u{A717}' .. '\u{A71F}' + | '\u{A722}' .. '\u{A788}' + | '\u{A78B}' .. '\u{A7CA}' + | '\u{A7D0}' .. '\u{A7D1}' + | '\u{A7D3}' + | '\u{A7D5}' .. '\u{A7D9}' + | '\u{A7F2}' .. '\u{A801}' + | '\u{A803}' .. '\u{A805}' + | '\u{A807}' .. '\u{A80A}' + | '\u{A80C}' .. '\u{A822}' + | '\u{A840}' .. '\u{A873}' + | '\u{A882}' .. '\u{A8B3}' + | '\u{A8F2}' .. '\u{A8F7}' + | '\u{A8FB}' + | '\u{A8FD}' .. '\u{A8FE}' + | '\u{A90A}' .. '\u{A925}' + | '\u{A930}' .. '\u{A946}' + | '\u{A960}' .. '\u{A97C}' + | '\u{A984}' .. '\u{A9B2}' + | '\u{A9CF}' + | '\u{A9E0}' .. '\u{A9E4}' + | '\u{A9E6}' .. '\u{A9EF}' + | '\u{A9FA}' .. '\u{A9FE}' + | '\u{AA00}' .. '\u{AA28}' + | '\u{AA40}' .. '\u{AA42}' + | '\u{AA44}' .. '\u{AA4B}' + | '\u{AA60}' .. '\u{AA76}' + | '\u{AA7A}' + | '\u{AA7E}' .. '\u{AAAF}' + | '\u{AAB1}' + | '\u{AAB5}' .. '\u{AAB6}' + | '\u{AAB9}' .. '\u{AABD}' + | '\u{AAC0}' + | '\u{AAC2}' + | '\u{AADB}' .. '\u{AADD}' + | '\u{AAE0}' .. '\u{AAEA}' + | '\u{AAF2}' .. '\u{AAF4}' + | '\u{AB01}' .. '\u{AB06}' + | '\u{AB09}' .. '\u{AB0E}' + | '\u{AB11}' .. '\u{AB16}' + | '\u{AB20}' .. '\u{AB26}' + | '\u{AB28}' .. '\u{AB2E}' + | '\u{AB30}' .. '\u{AB5A}' + | '\u{AB5C}' .. '\u{AB69}' + | '\u{AB70}' .. '\u{ABE2}' + | '\u{AC00}' .. '\u{D7A3}' + | '\u{D7B0}' .. '\u{D7C6}' + | '\u{D7CB}' .. '\u{D7FB}' + | '\u{F900}' .. '\u{FA6D}' + | '\u{FA70}' .. '\u{FAD9}' + | '\u{FB00}' .. '\u{FB06}' + | '\u{FB13}' .. '\u{FB17}' + | '\u{FB1D}' + | '\u{FB1F}' .. '\u{FB28}' + | '\u{FB2A}' .. '\u{FB36}' + | '\u{FB38}' .. '\u{FB3C}' + | '\u{FB3E}' + | '\u{FB40}' .. '\u{FB41}' + | '\u{FB43}' .. '\u{FB44}' + | '\u{FB46}' .. '\u{FBB1}' + | '\u{FBD3}' .. '\u{FC5D}' + | '\u{FC64}' .. '\u{FD3D}' + | '\u{FD50}' .. '\u{FD8F}' + | '\u{FD92}' .. '\u{FDC7}' + | '\u{FDF0}' .. '\u{FDF9}' + | '\u{FE71}' + | '\u{FE73}' + | '\u{FE77}' + | '\u{FE79}' + | '\u{FE7B}' + | '\u{FE7D}' + | '\u{FE7F}' .. '\u{FEFC}' + | '\u{FF21}' .. '\u{FF3A}' + | '\u{FF41}' .. '\u{FF5A}' + | '\u{FF66}' .. '\u{FF9D}' + | '\u{FFA0}' .. '\u{FFBE}' + | '\u{FFC2}' .. '\u{FFC7}' + | '\u{FFCA}' .. '\u{FFCF}' + | '\u{FFD2}' .. '\u{FFD7}' + | '\u{FFDA}' .. '\u{FFDC}' + | '\u{10000}' .. '\u{1000B}' + | '\u{1000D}' .. '\u{10026}' + | '\u{10028}' .. '\u{1003A}' + | '\u{1003C}' .. '\u{1003D}' + | '\u{1003F}' .. '\u{1004D}' + | '\u{10050}' .. '\u{1005D}' + | '\u{10080}' .. '\u{100FA}' + | '\u{10140}' .. '\u{10174}' + | '\u{10280}' .. '\u{1029C}' + | '\u{102A0}' .. '\u{102D0}' + | '\u{10300}' .. '\u{1031F}' + | '\u{1032D}' .. '\u{1034A}' + | '\u{10350}' .. '\u{10375}' + | '\u{10380}' .. '\u{1039D}' + | '\u{103A0}' .. '\u{103C3}' + | '\u{103C8}' .. '\u{103CF}' + | '\u{103D1}' .. '\u{103D5}' + | '\u{10400}' .. '\u{1049D}' + | '\u{104B0}' .. '\u{104D3}' + | '\u{104D8}' .. '\u{104FB}' + | '\u{10500}' .. '\u{10527}' + | '\u{10530}' .. '\u{10563}' + | '\u{10570}' .. '\u{1057A}' + | '\u{1057C}' .. '\u{1058A}' + | '\u{1058C}' .. '\u{10592}' + | '\u{10594}' .. '\u{10595}' + | '\u{10597}' .. '\u{105A1}' + | '\u{105A3}' .. '\u{105B1}' + | '\u{105B3}' .. '\u{105B9}' + | '\u{105BB}' .. '\u{105BC}' + | '\u{10600}' .. '\u{10736}' + | '\u{10740}' .. '\u{10755}' + | '\u{10760}' .. '\u{10767}' + | '\u{10780}' .. '\u{10785}' + | '\u{10787}' .. '\u{107B0}' + | '\u{107B2}' .. '\u{107BA}' + | '\u{10800}' .. '\u{10805}' + | '\u{10808}' + | '\u{1080A}' .. '\u{10835}' + | '\u{10837}' .. '\u{10838}' + | '\u{1083C}' + | '\u{1083F}' .. '\u{10855}' + | '\u{10860}' .. '\u{10876}' + | '\u{10880}' .. '\u{1089E}' + | '\u{108E0}' .. '\u{108F2}' + | '\u{108F4}' .. '\u{108F5}' + | '\u{10900}' .. '\u{10915}' + | '\u{10920}' .. '\u{10939}' + | '\u{10980}' .. '\u{109B7}' + | '\u{109BE}' .. '\u{109BF}' + | '\u{10A00}' + | '\u{10A10}' .. '\u{10A13}' + | '\u{10A15}' .. '\u{10A17}' + | '\u{10A19}' .. '\u{10A35}' + | '\u{10A60}' .. '\u{10A7C}' + | '\u{10A80}' .. '\u{10A9C}' + | '\u{10AC0}' .. '\u{10AC7}' + | '\u{10AC9}' .. '\u{10AE4}' + | '\u{10B00}' .. '\u{10B35}' + | '\u{10B40}' .. '\u{10B55}' + | '\u{10B60}' .. '\u{10B72}' + | '\u{10B80}' .. '\u{10B91}' + | '\u{10C00}' .. '\u{10C48}' + | '\u{10C80}' .. '\u{10CB2}' + | '\u{10CC0}' .. '\u{10CF2}' + | '\u{10D00}' .. '\u{10D23}' + | '\u{10E80}' .. '\u{10EA9}' + | '\u{10EB0}' .. '\u{10EB1}' + | '\u{10F00}' .. '\u{10F1C}' + | '\u{10F27}' + | '\u{10F30}' .. '\u{10F45}' + | '\u{10F70}' .. '\u{10F81}' + | '\u{10FB0}' .. '\u{10FC4}' + | '\u{10FE0}' .. '\u{10FF6}' + | '\u{11003}' .. '\u{11037}' + | '\u{11071}' .. '\u{11072}' + | '\u{11075}' + | '\u{11083}' .. '\u{110AF}' + | '\u{110D0}' .. '\u{110E8}' + | '\u{11103}' .. '\u{11126}' + | '\u{11144}' + | '\u{11147}' + | '\u{11150}' .. '\u{11172}' + | '\u{11176}' + | '\u{11183}' .. '\u{111B2}' + | '\u{111C1}' .. '\u{111C4}' + | '\u{111DA}' + | '\u{111DC}' + | '\u{11200}' .. '\u{11211}' + | '\u{11213}' .. '\u{1122B}' + | '\u{1123F}' .. '\u{11240}' + | '\u{11280}' .. '\u{11286}' + | '\u{11288}' + | '\u{1128A}' .. '\u{1128D}' + | '\u{1128F}' .. '\u{1129D}' + | '\u{1129F}' .. '\u{112A8}' + | '\u{112B0}' .. '\u{112DE}' + | '\u{11305}' .. '\u{1130C}' + | '\u{1130F}' .. '\u{11310}' + | '\u{11313}' .. '\u{11328}' + | '\u{1132A}' .. '\u{11330}' + | '\u{11332}' .. '\u{11333}' + | '\u{11335}' .. '\u{11339}' + | '\u{1133D}' + | '\u{11350}' + | '\u{1135D}' .. '\u{11361}' + | '\u{11400}' .. '\u{11434}' + | '\u{11447}' .. '\u{1144A}' + | '\u{1145F}' .. '\u{11461}' + | '\u{11480}' .. '\u{114AF}' + | '\u{114C4}' .. '\u{114C5}' + | '\u{114C7}' + | '\u{11580}' .. '\u{115AE}' + | '\u{115D8}' .. '\u{115DB}' + | '\u{11600}' .. '\u{1162F}' + | '\u{11644}' + | '\u{11680}' .. '\u{116AA}' + | '\u{116B8}' + | '\u{11700}' .. '\u{1171A}' + | '\u{11740}' .. '\u{11746}' + | '\u{11800}' .. '\u{1182B}' + | '\u{118A0}' .. '\u{118DF}' + | '\u{118FF}' .. '\u{11906}' + | '\u{11909}' + | '\u{1190C}' .. '\u{11913}' + | '\u{11915}' .. '\u{11916}' + | '\u{11918}' .. '\u{1192F}' + | '\u{1193F}' + | '\u{11941}' + | '\u{119A0}' .. '\u{119A7}' + | '\u{119AA}' .. '\u{119D0}' + | '\u{119E1}' + | '\u{119E3}' + | '\u{11A00}' + | '\u{11A0B}' .. '\u{11A32}' + | '\u{11A3A}' + | '\u{11A50}' + | '\u{11A5C}' .. '\u{11A89}' + | '\u{11A9D}' + | '\u{11AB0}' .. '\u{11AF8}' + | '\u{11C00}' .. '\u{11C08}' + | '\u{11C0A}' .. '\u{11C2E}' + | '\u{11C40}' + | '\u{11C72}' .. '\u{11C8F}' + | '\u{11D00}' .. '\u{11D06}' + | '\u{11D08}' .. '\u{11D09}' + | '\u{11D0B}' .. '\u{11D30}' + | '\u{11D46}' + | '\u{11D60}' .. '\u{11D65}' + | '\u{11D67}' .. '\u{11D68}' + | '\u{11D6A}' .. '\u{11D89}' + | '\u{11D98}' + | '\u{11EE0}' .. '\u{11EF2}' + | '\u{11F02}' + | '\u{11F04}' .. '\u{11F10}' + | '\u{11F12}' .. '\u{11F33}' + | '\u{11FB0}' + | '\u{12000}' .. '\u{12399}' + | '\u{12400}' .. '\u{1246E}' + | '\u{12480}' .. '\u{12543}' + | '\u{12F90}' .. '\u{12FF0}' + | '\u{13000}' .. '\u{1342F}' + | '\u{13441}' .. '\u{13446}' + | '\u{14400}' .. '\u{14646}' + | '\u{16800}' .. '\u{16A38}' + | '\u{16A40}' .. '\u{16A5E}' + | '\u{16A70}' .. '\u{16ABE}' + | '\u{16AD0}' .. '\u{16AED}' + | '\u{16B00}' .. '\u{16B2F}' + | '\u{16B40}' .. '\u{16B43}' + | '\u{16B63}' .. '\u{16B77}' + | '\u{16B7D}' .. '\u{16B8F}' + | '\u{16E40}' .. '\u{16E7F}' + | '\u{16F00}' .. '\u{16F4A}' + | '\u{16F50}' + | '\u{16F93}' .. '\u{16F9F}' + | '\u{16FE0}' .. '\u{16FE1}' + | '\u{16FE3}' + | '\u{17000}' .. '\u{187F7}' + | '\u{18800}' .. '\u{18CD5}' + | '\u{18D00}' .. '\u{18D08}' + | '\u{1AFF0}' .. '\u{1AFF3}' + | '\u{1AFF5}' .. '\u{1AFFB}' + | '\u{1AFFD}' .. '\u{1AFFE}' + | '\u{1B000}' .. '\u{1B122}' + | '\u{1B132}' + | '\u{1B150}' .. '\u{1B152}' + | '\u{1B155}' + | '\u{1B164}' .. '\u{1B167}' + | '\u{1B170}' .. '\u{1B2FB}' + | '\u{1BC00}' .. '\u{1BC6A}' + | '\u{1BC70}' .. '\u{1BC7C}' + | '\u{1BC80}' .. '\u{1BC88}' + | '\u{1BC90}' .. '\u{1BC99}' + | '\u{1D400}' .. '\u{1D454}' + | '\u{1D456}' .. '\u{1D49C}' + | '\u{1D49E}' .. '\u{1D49F}' + | '\u{1D4A2}' + | '\u{1D4A5}' .. '\u{1D4A6}' + | '\u{1D4A9}' .. '\u{1D4AC}' + | '\u{1D4AE}' .. '\u{1D4B9}' + | '\u{1D4BB}' + | '\u{1D4BD}' .. '\u{1D4C3}' + | '\u{1D4C5}' .. '\u{1D505}' + | '\u{1D507}' .. '\u{1D50A}' + | '\u{1D50D}' .. '\u{1D514}' + | '\u{1D516}' .. '\u{1D51C}' + | '\u{1D51E}' .. '\u{1D539}' + | '\u{1D53B}' .. '\u{1D53E}' + | '\u{1D540}' .. '\u{1D544}' + | '\u{1D546}' + | '\u{1D54A}' .. '\u{1D550}' + | '\u{1D552}' .. '\u{1D6A5}' + | '\u{1D6A8}' .. '\u{1D6C0}' + | '\u{1D6C2}' .. '\u{1D6DA}' + | '\u{1D6DC}' .. '\u{1D6FA}' + | '\u{1D6FC}' .. '\u{1D714}' + | '\u{1D716}' .. '\u{1D734}' + | '\u{1D736}' .. '\u{1D74E}' + | '\u{1D750}' .. '\u{1D76E}' + | '\u{1D770}' .. '\u{1D788}' + | '\u{1D78A}' .. '\u{1D7A8}' + | '\u{1D7AA}' .. '\u{1D7C2}' + | '\u{1D7C4}' .. '\u{1D7CB}' + | '\u{1DF00}' .. '\u{1DF1E}' + | '\u{1DF25}' .. '\u{1DF2A}' + | '\u{1E030}' .. '\u{1E06D}' + | '\u{1E100}' .. '\u{1E12C}' + | '\u{1E137}' .. '\u{1E13D}' + | '\u{1E14E}' + | '\u{1E290}' .. '\u{1E2AD}' + | '\u{1E2C0}' .. '\u{1E2EB}' + | '\u{1E4D0}' .. '\u{1E4EB}' + | '\u{1E7E0}' .. '\u{1E7E6}' + | '\u{1E7E8}' .. '\u{1E7EB}' + | '\u{1E7ED}' .. '\u{1E7EE}' + | '\u{1E7F0}' .. '\u{1E7FE}' + | '\u{1E800}' .. '\u{1E8C4}' + | '\u{1E900}' .. '\u{1E943}' + | '\u{1E94B}' + | '\u{1EE00}' .. '\u{1EE03}' + | '\u{1EE05}' .. '\u{1EE1F}' + | '\u{1EE21}' .. '\u{1EE22}' + | '\u{1EE24}' + | '\u{1EE27}' + | '\u{1EE29}' .. '\u{1EE32}' + | '\u{1EE34}' .. '\u{1EE37}' + | '\u{1EE39}' + | '\u{1EE3B}' + | '\u{1EE42}' + | '\u{1EE47}' + | '\u{1EE49}' + | '\u{1EE4B}' + | '\u{1EE4D}' .. '\u{1EE4F}' + | '\u{1EE51}' .. '\u{1EE52}' + | '\u{1EE54}' + | '\u{1EE57}' + | '\u{1EE59}' + | '\u{1EE5B}' + | '\u{1EE5D}' + | '\u{1EE5F}' + | '\u{1EE61}' .. '\u{1EE62}' + | '\u{1EE64}' + | '\u{1EE67}' .. '\u{1EE6A}' + | '\u{1EE6C}' .. '\u{1EE72}' + | '\u{1EE74}' .. '\u{1EE77}' + | '\u{1EE79}' .. '\u{1EE7C}' + | '\u{1EE7E}' + | '\u{1EE80}' .. '\u{1EE89}' + | '\u{1EE8B}' .. '\u{1EE9B}' + | '\u{1EEA1}' .. '\u{1EEA3}' + | '\u{1EEA5}' .. '\u{1EEA9}' + | '\u{1EEAB}' .. '\u{1EEBB}' + | '\u{20000}' .. '\u{2A6DF}' + | '\u{2A700}' .. '\u{2B739}' + | '\u{2B740}' .. '\u{2B81D}' + | '\u{2B820}' .. '\u{2CEA1}' + | '\u{2CEB0}' .. '\u{2EBE0}' + | '\u{2F800}' .. '\u{2FA1D}' + | '\u{30000}' .. '\u{3134A}' + | '\u{31350}' .. '\u{323AF}' + ; + diff --git a/python/python3_12_1/PythonParser.g4 b/python/python3_12_1/PythonParser.g4 index c245eb5711..1bbb676710 100644 --- a/python/python3_12_1/PythonParser.g4 +++ b/python/python3_12_1/PythonParser.g4 @@ -20,861 +20,1107 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - /* +/* * Project : an ANTLR4 parser grammar by the official PEG grammar * https://github.com/RobEin/ANTLR4-parser-for-Python-3.12 * Developed by : Robert Einhorn * */ - parser grammar PythonParser; // Python 3.12.1 https://docs.python.org/3.12/reference/grammar.html#full-grammar-specification -options { - tokenVocab=PythonLexer; - superClass=PythonParserBase; -} - -// STARTING RULES -// ============== -file_input: statements? EOF; -interactive: statement_newline; -eval: expressions NEWLINE* EOF; -func_type: '(' type_expressions? ')' '->' expression NEWLINE* EOF; -fstring_input: star_expressions; -// GENERAL STATEMENTS -// ================== +options { tokenVocab = PythonLexer; superClass = PythonParserBase; } +// STARTING RULES -statements: statement+; +// ============== -statement: compound_stmt | simple_stmts; +file_input + : statements? EOF + ; + +interactive + : statement_newline + ; + +eval + : expressions NEWLINE* EOF + ; + +func_type + : '(' type_expressions? ')' '->' expression NEWLINE* EOF + ; + +fstring_input + : star_expressions + ; + // GENERAL STATEMENTS + + // ================== + +statements + : statement+ + ; + +statement + : compound_stmt + | simple_stmts + ; statement_newline - : compound_stmt NEWLINE - | simple_stmts - | NEWLINE - | EOF; + : compound_stmt NEWLINE + | simple_stmts + | NEWLINE + | EOF + ; simple_stmts - : simple_stmt (';' simple_stmt)* ';'? NEWLINE - ; - -// NOTE: assignment MUST precede expression, else parsing a simple assignment -// will throw a SyntaxError. + : simple_stmt (';' simple_stmt)* ';'? NEWLINE + ; + // NOTE: assignment MUST precede expression, else parsing a simple assignment + + // will throw a SyntaxError. + simple_stmt - : assignment - | type_alias - | star_expressions - | return_stmt - | import_stmt - | raise_stmt - | 'pass' - | del_stmt - | yield_stmt - | assert_stmt - | 'break' - | 'continue' - | global_stmt - | nonlocal_stmt; + : assignment + | type_alias + | star_expressions + | return_stmt + | import_stmt + | raise_stmt + | 'pass' + | del_stmt + | yield_stmt + | assert_stmt + | 'break' + | 'continue' + | global_stmt + | nonlocal_stmt + ; compound_stmt - : function_def - | if_stmt - | class_def - | with_stmt - | for_stmt - | try_stmt - | while_stmt - | match_stmt; - -// SIMPLE STATEMENTS -// ================= - -// NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield' + : function_def + | if_stmt + | class_def + | with_stmt + | for_stmt + | try_stmt + | while_stmt + | match_stmt + ; + // SIMPLE STATEMENTS + + // ================= + + // NOTE: annotated_rhs may start with 'yield'; yield_expr must start with 'yield' + assignment - : NAME ':' expression ('=' annotated_rhs )? - | ('(' single_target ')' - | single_subscript_attribute_target) ':' expression ('=' annotated_rhs )? - | (star_targets '=' )+ (yield_expr | star_expressions) TYPE_COMMENT? - | single_target augassign (yield_expr | star_expressions); + : NAME ':' expression ('=' annotated_rhs)? + | ('(' single_target ')' | single_subscript_attribute_target) ':' expression ('=' annotated_rhs)? + | (star_targets '=')+ (yield_expr | star_expressions) TYPE_COMMENT? + | single_target augassign (yield_expr | star_expressions) + ; -annotated_rhs: yield_expr | star_expressions; +annotated_rhs + : yield_expr + | star_expressions + ; augassign - : '+=' - | '-=' - | '*=' - | '@=' - | '/=' - | '%=' - | '&=' - | '|=' - | '^=' - | '<<=' - | '>>=' - | '**=' - | '//='; + : '+=' + | '-=' + | '*=' + | '@=' + | '/=' + | '%=' + | '&=' + | '|=' + | '^=' + | '<<=' + | '>>=' + | '**=' + | '//=' + ; return_stmt - : 'return' star_expressions?; + : 'return' star_expressions? + ; raise_stmt - : 'raise' (expression ('from' expression )?)? - ; + : 'raise' (expression ('from' expression)?)? + ; -global_stmt: 'global' NAME (',' NAME)*; +global_stmt + : 'global' NAME (',' NAME)* + ; -nonlocal_stmt: 'nonlocal' NAME (',' NAME)*; +nonlocal_stmt + : 'nonlocal' NAME (',' NAME)* + ; del_stmt - : 'del' del_targets; + : 'del' del_targets + ; -yield_stmt: yield_expr; +yield_stmt + : yield_expr + ; -assert_stmt: 'assert' expression (',' expression )?; +assert_stmt + : 'assert' expression (',' expression)? + ; import_stmt - : import_name - | import_from; - -// Import statements -// ----------------- - -import_name: 'import' dotted_as_names; -// note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS + : import_name + | import_from + ; + // Import statements + + // ----------------- + +import_name + : 'import' dotted_as_names + ; + // note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS + import_from - : 'from' ('.' | '...')* dotted_name 'import' import_from_targets - | 'from' ('.' | '...')+ 'import' import_from_targets; + : 'from' ('.' | '...')* dotted_name 'import' import_from_targets + | 'from' ('.' | '...')+ 'import' import_from_targets + ; + import_from_targets - : '(' import_from_as_names ','? ')' - | import_from_as_names - | '*'; + : '(' import_from_as_names ','? ')' + | import_from_as_names + | '*' + ; + import_from_as_names - : import_from_as_name (',' import_from_as_name)*; + : import_from_as_name (',' import_from_as_name)* + ; + import_from_as_name - : NAME ('as' NAME )?; -dotted_as_names - : dotted_as_name (',' dotted_as_name)*; -dotted_as_name - : dotted_name ('as' NAME )?; -dotted_name - : dotted_name '.' NAME - | NAME; + : NAME ('as' NAME)? + ; -// COMPOUND STATEMENTS -// =================== +dotted_as_names + : dotted_as_name (',' dotted_as_name)* + ; -// Common elements -// --------------- +dotted_as_name + : dotted_name ('as' NAME)? + ; +dotted_name + : dotted_name '.' NAME + | NAME + ; + // COMPOUND STATEMENTS + + // =================== + + // Common elements + + // --------------- + block - : NEWLINE INDENT statements DEDENT - | simple_stmts; - -decorators: ('@' named_expression NEWLINE )+; - -// Class definitions -// ----------------- - + : NEWLINE INDENT statements DEDENT + | simple_stmts + ; + +decorators + : ('@' named_expression NEWLINE)+ + ; + // Class definitions + + // ----------------- + class_def - : decorators class_def_raw - | class_def_raw; + : decorators class_def_raw + | class_def_raw + ; class_def_raw - : 'class' NAME type_params? ('(' arguments? ')' )? ':' block; - -// Function definitions -// -------------------- - + : 'class' NAME type_params? ('(' arguments? ')')? ':' block + ; + // Function definitions + + // -------------------- + function_def - : decorators function_def_raw - | function_def_raw; + : decorators function_def_raw + | function_def_raw + ; function_def_raw - : 'def' NAME type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block - | ASYNC 'def' NAME type_params? '(' params? ')' ('->' expression )? ':' func_type_comment? block; - -// Function parameters -// ------------------- - + : 'def' NAME type_params? '(' params? ')' ('->' expression)? ':' func_type_comment? block + | ASYNC 'def' NAME type_params? '(' params? ')' ('->' expression)? ':' func_type_comment? block + ; + // Function parameters + + // ------------------- + params - : parameters; + : parameters + ; parameters - : slash_no_default param_no_default* param_with_default* star_etc? - | slash_with_default param_with_default* star_etc? - | param_no_default+ param_with_default* star_etc? - | param_with_default+ star_etc? - | star_etc; - -// Some duplication here because we can't write (',' | {isCurrentTokenType(RPAR)}?), -// which is because we don't support empty alternatives (yet). - + : slash_no_default param_no_default* param_with_default* star_etc? + | slash_with_default param_with_default* star_etc? + | param_no_default+ param_with_default* star_etc? + | param_with_default+ star_etc? + | star_etc + ; + // Some duplication here because we can't write (',' | {isCurrentTokenType(RPAR)}?), + + // which is because we don't support empty alternatives (yet). + slash_no_default - : param_no_default+ '/' ','? - ; + : param_no_default+ '/' ','? + ; + slash_with_default - : param_no_default* param_with_default+ '/' ','? - ; + : param_no_default* param_with_default+ '/' ','? + ; star_etc - : '*' param_no_default param_maybe_default* kwds? - | '*' param_no_default_star_annotation param_maybe_default* kwds? - | '*' ',' param_maybe_default+ kwds? - | kwds; + : '*' param_no_default param_maybe_default* kwds? + | '*' param_no_default_star_annotation param_maybe_default* kwds? + | '*' ',' param_maybe_default+ kwds? + | kwds + ; kwds - : '**' param_no_default; - -// One parameter. This *includes* a following comma and type comment. -// -// There are three styles: -// - No default_assignment -// - With default_assignment -// - Maybe with default_assignment -// -// There are two alternative forms of each, to deal with type comments: -// - Ends in a comma followed by an optional type comment -// - No comma, optional type comment, must be followed by close paren -// The latter form is for a final parameter without trailing comma. -// - + : '**' param_no_default + ; + // One parameter. This *includes* a following comma and type comment. + + // + + // There are three styles: + + // - No default_assignment + + // - With default_assignment + + // - Maybe with default_assignment + + // + + // There are two alternative forms of each, to deal with type comments: + + // - Ends in a comma followed by an optional type comment + + // - No comma, optional type comment, must be followed by close paren + + // The latter form is for a final parameter without trailing comma. + + // + param_no_default - : param ','? TYPE_COMMENT? - ; + : param ','? TYPE_COMMENT? + ; + param_no_default_star_annotation - : param_star_annotation ','? TYPE_COMMENT? - ; -param_with_default - : param default_assignment ','? TYPE_COMMENT? - ; -param_maybe_default - : param default_assignment? ','? TYPE_COMMENT? - ; -param: NAME annotation?; -param_star_annotation: NAME star_annotation; -annotation: ':' expression; -star_annotation: ':' star_expression; -default_assignment: '=' expression; + : param_star_annotation ','? TYPE_COMMENT? + ; -// If statement -// ------------ +param_with_default + : param default_assignment ','? TYPE_COMMENT? + ; +param_maybe_default + : param default_assignment? ','? TYPE_COMMENT? + ; + +param + : NAME annotation? + ; + +param_star_annotation + : NAME star_annotation + ; + +annotation + : ':' expression + ; + +star_annotation + : ':' star_expression + ; + +default_assignment + : '=' expression + ; + // If statement + + // ------------ + if_stmt - : 'if' named_expression ':' block (elif_stmt | else_block?) - ; -elif_stmt - : 'elif' named_expression ':' block (elif_stmt | else_block?) - ; -else_block - : 'else' ':' block; + : 'if' named_expression ':' block (elif_stmt | else_block?) + ; -// While statement -// --------------- +elif_stmt + : 'elif' named_expression ':' block (elif_stmt | else_block?) + ; +else_block + : 'else' ':' block + ; + // While statement + + // --------------- + while_stmt - : 'while' named_expression ':' block else_block?; - -// For statement -// ------------- - + : 'while' named_expression ':' block else_block? + ; + // For statement + + // ------------- + for_stmt - : ASYNC? 'for' star_targets 'in' star_expressions ':' TYPE_COMMENT? block else_block? - ; - -// With statement -// -------------- - + : ASYNC? 'for' star_targets 'in' star_expressions ':' TYPE_COMMENT? block else_block? + ; + // With statement + + // -------------- + with_stmt - : ASYNC? 'with' ( '(' with_item (',' with_item)* ','? ')' ':' - | with_item (',' with_item)* ':' TYPE_COMMENT? - ) block - ; + : ASYNC? 'with' ('(' with_item (',' with_item)* ','? ')' ':' | with_item (',' with_item)* ':' TYPE_COMMENT?) block + ; with_item - : expression ('as' star_target)? - ; - -// Try statement -// ------------- - + : expression ('as' star_target)? + ; + // Try statement + + // ------------- + try_stmt - : 'try' ':' block finally_block - | 'try' ':' block except_block+ else_block? finally_block? - | 'try' ':' block except_star_block+ else_block? finally_block?; - - -// Except statement -// ---------------- - + : 'try' ':' block finally_block + | 'try' ':' block except_block+ else_block? finally_block? + | 'try' ':' block except_star_block+ else_block? finally_block? + ; + // Except statement + + // ---------------- + except_block - : 'except' (expression ('as' NAME )?)? ':' block - ; -except_star_block - : 'except' '*' expression ('as' NAME )? ':' block; -finally_block - : 'finally' ':' block; + : 'except' (expression ('as' NAME)?)? ':' block + ; -// Match statement -// --------------- +except_star_block + : 'except' '*' expression ('as' NAME)? ':' block + ; +finally_block + : 'finally' ':' block + ; + // Match statement + + // --------------- + match_stmt - : soft_kw_match subject_expr ':' NEWLINE INDENT case_block+ DEDENT; + : soft_kw_match subject_expr ':' NEWLINE INDENT case_block+ DEDENT + ; subject_expr - : star_named_expression ',' star_named_expressions? - | named_expression; + : star_named_expression ',' star_named_expressions? + | named_expression + ; case_block - : soft_kw_case patterns guard? ':' block; + : soft_kw_case patterns guard? ':' block + ; -guard: 'if' named_expression; +guard + : 'if' named_expression + ; patterns - : open_sequence_pattern - | pattern; + : open_sequence_pattern + | pattern + ; pattern - : as_pattern - | or_pattern; + : as_pattern + | or_pattern + ; as_pattern - : or_pattern 'as' pattern_capture_target; + : or_pattern 'as' pattern_capture_target + ; or_pattern - : closed_pattern ('|' closed_pattern)*; + : closed_pattern ('|' closed_pattern)* + ; closed_pattern - : literal_pattern - | capture_pattern - | wildcard_pattern - | value_pattern - | group_pattern - | sequence_pattern - | mapping_pattern - | class_pattern; - -// Literal patterns are used for equality and identity constraints + : literal_pattern + | capture_pattern + | wildcard_pattern + | value_pattern + | group_pattern + | sequence_pattern + | mapping_pattern + | class_pattern + ; + // Literal patterns are used for equality and identity constraints + literal_pattern - : signed_number - | complex_number - | strings - | 'None' - | 'True' - | 'False'; - -// Literal expressions are used to restrict permitted mapping pattern keys + : signed_number + | complex_number + | strings + | 'None' + | 'True' + | 'False' + ; + // Literal expressions are used to restrict permitted mapping pattern keys + literal_expr - : signed_number - | complex_number - | strings - | 'None' - | 'True' - | 'False'; + : signed_number + | complex_number + | strings + | 'None' + | 'True' + | 'False' + ; complex_number - : signed_real_number ('+' | '-') imaginary_number - ; + : signed_real_number ('+' | '-') imaginary_number + ; signed_number - : '-'? NUMBER - ; + : '-'? NUMBER + ; signed_real_number - : '-'? real_number - ; + : '-'? real_number + ; real_number - : NUMBER; + : NUMBER + ; imaginary_number - : NUMBER; + : NUMBER + ; capture_pattern - : pattern_capture_target; + : pattern_capture_target + ; pattern_capture_target - : soft_kw__not__wildcard; + : soft_kw__not__wildcard + ; wildcard_pattern - : soft_kw_wildcard; + : soft_kw_wildcard + ; value_pattern - : attr; + : attr + ; attr - : NAME ('.' NAME)+ - ; + : NAME ('.' NAME)+ + ; + name_or_attr - : NAME ('.' NAME)* - ; + : NAME ('.' NAME)* + ; group_pattern - : '(' pattern ')'; + : '(' pattern ')' + ; sequence_pattern - : '[' maybe_sequence_pattern? ']' - | '(' open_sequence_pattern? ')'; + : '[' maybe_sequence_pattern? ']' + | '(' open_sequence_pattern? ')' + ; open_sequence_pattern - : maybe_star_pattern ',' maybe_sequence_pattern?; + : maybe_star_pattern ',' maybe_sequence_pattern? + ; maybe_sequence_pattern - : maybe_star_pattern (',' maybe_star_pattern)* ','?; + : maybe_star_pattern (',' maybe_star_pattern)* ','? + ; maybe_star_pattern - : star_pattern - | pattern; + : star_pattern + | pattern + ; star_pattern - : '*' pattern_capture_target - | '*' wildcard_pattern; + : '*' pattern_capture_target + | '*' wildcard_pattern + ; mapping_pattern - : LBRACE RBRACE - | LBRACE double_star_pattern ','? RBRACE - | LBRACE items_pattern (',' double_star_pattern)? ','? RBRACE - ; + : LBRACE RBRACE + | LBRACE double_star_pattern ','? RBRACE + | LBRACE items_pattern (',' double_star_pattern)? ','? RBRACE + ; items_pattern - : key_value_pattern (',' key_value_pattern)*; + : key_value_pattern (',' key_value_pattern)* + ; key_value_pattern - : (literal_expr | attr) ':' pattern; + : (literal_expr | attr) ':' pattern + ; double_star_pattern - : '**' pattern_capture_target; + : '**' pattern_capture_target + ; class_pattern - : name_or_attr '(' ((positional_patterns (',' keyword_patterns)? | keyword_patterns) ','?)? ')' - ; - - + : name_or_attr '(' ((positional_patterns (',' keyword_patterns)? | keyword_patterns) ','?)? ')' + ; positional_patterns - : pattern (',' pattern)*; + : pattern (',' pattern)* + ; keyword_patterns - : keyword_pattern (',' keyword_pattern)*; + : keyword_pattern (',' keyword_pattern)* + ; keyword_pattern - : NAME '=' pattern; - -// Type statement -// --------------- - + : NAME '=' pattern + ; + // Type statement + + // --------------- + type_alias - : soft_kw_type NAME type_params? '=' expression; - -// Type parameter declaration -// -------------------------- - -type_params: '[' type_param_seq ']'; - -type_param_seq: type_param (',' type_param)* ','?; + : soft_kw_type NAME type_params? '=' expression + ; + // Type parameter declaration + + // -------------------------- + +type_params + : '[' type_param_seq ']' + ; + +type_param_seq + : type_param (',' type_param)* ','? + ; type_param - : NAME type_param_bound? - | '*' NAME (':' expression)? - | '**' NAME (':' expression)? - ; - - -type_param_bound: ':' expression; - -// EXPRESSIONS -// ----------- - + : NAME type_param_bound? + | '*' NAME (':' expression)? + | '**' NAME (':' expression)? + ; + +type_param_bound + : ':' expression + ; + // EXPRESSIONS + + // ----------- + expressions - : expression (',' expression )* ','? - ; - + : expression (',' expression)* ','? + ; expression - : disjunction ('if' disjunction 'else' expression)? - | lambdef - ; + : disjunction ('if' disjunction 'else' expression)? + | lambdef + ; yield_expr - : 'yield' ('from' expression | star_expressions?) - ; + : 'yield' ('from' expression | star_expressions?) + ; star_expressions - : star_expression (',' star_expression )* ','? - ; - + : star_expression (',' star_expression)* ','? + ; star_expression - : '*' bitwise_or - | expression; + : '*' bitwise_or + | expression + ; -star_named_expressions: star_named_expression (',' star_named_expression)* ','?; +star_named_expressions + : star_named_expression (',' star_named_expression)* ','? + ; star_named_expression - : '*' bitwise_or - | named_expression; + : '*' bitwise_or + | named_expression + ; assignment_expression - : NAME ':=' expression; + : NAME ':=' expression + ; named_expression - : assignment_expression - | expression; + : assignment_expression + | expression + ; disjunction - : conjunction ('or' conjunction )* - ; + : conjunction ('or' conjunction)* + ; conjunction - : inversion ('and' inversion )* - ; + : inversion ('and' inversion)* + ; inversion - : 'not' inversion - | comparison; - -// Comparison operators -// -------------------- - + : 'not' inversion + | comparison + ; + // Comparison operators + + // -------------------- + comparison - : bitwise_or compare_op_bitwise_or_pair* - ; + : bitwise_or compare_op_bitwise_or_pair* + ; compare_op_bitwise_or_pair - : eq_bitwise_or - | noteq_bitwise_or - | lte_bitwise_or - | lt_bitwise_or - | gte_bitwise_or - | gt_bitwise_or - | notin_bitwise_or - | in_bitwise_or - | isnot_bitwise_or - | is_bitwise_or; - -eq_bitwise_or: '==' bitwise_or; -noteq_bitwise_or - : ('!=' ) bitwise_or; -lte_bitwise_or: '<=' bitwise_or; -lt_bitwise_or: '<' bitwise_or; -gte_bitwise_or: '>=' bitwise_or; -gt_bitwise_or: '>' bitwise_or; -notin_bitwise_or: 'not' 'in' bitwise_or; -in_bitwise_or: 'in' bitwise_or; -isnot_bitwise_or: 'is' 'not' bitwise_or; -is_bitwise_or: 'is' bitwise_or; - -// Bitwise operators -// ----------------- + : eq_bitwise_or + | noteq_bitwise_or + | lte_bitwise_or + | lt_bitwise_or + | gte_bitwise_or + | gt_bitwise_or + | notin_bitwise_or + | in_bitwise_or + | isnot_bitwise_or + | is_bitwise_or + ; + +eq_bitwise_or + : '==' bitwise_or + ; +noteq_bitwise_or + : ('!=') bitwise_or + ; + +lte_bitwise_or + : '<=' bitwise_or + ; + +lt_bitwise_or + : '<' bitwise_or + ; + +gte_bitwise_or + : '>=' bitwise_or + ; + +gt_bitwise_or + : '>' bitwise_or + ; + +notin_bitwise_or + : 'not' 'in' bitwise_or + ; + +in_bitwise_or + : 'in' bitwise_or + ; + +isnot_bitwise_or + : 'is' 'not' bitwise_or + ; + +is_bitwise_or + : 'is' bitwise_or + ; + // Bitwise operators + + // ----------------- + bitwise_or - : bitwise_or '|' bitwise_xor - | bitwise_xor; + : bitwise_or '|' bitwise_xor + | bitwise_xor + ; bitwise_xor - : bitwise_xor '^' bitwise_and - | bitwise_and; + : bitwise_xor '^' bitwise_and + | bitwise_and + ; bitwise_and - : bitwise_and '&' shift_expr - | shift_expr; + : bitwise_and '&' shift_expr + | shift_expr + ; shift_expr - : shift_expr ('<<' | '>>') sum - | sum - ; - -// Arithmetic operators -// -------------------- - + : shift_expr ('<<' | '>>') sum + | sum + ; + // Arithmetic operators + + // -------------------- + sum - : sum ('+' | '-') term - | term - ; + : sum ('+' | '-') term + | term + ; term - : term ('*' | '/' | '//' | '%' | '@') factor - | factor - ; - - - + : term ('*' | '/' | '//' | '%' | '@') factor + | factor + ; factor - : '+' factor - | '-' factor - | '~' factor - | power; + : '+' factor + | '-' factor + | '~' factor + | power + ; power - : await_primary ('**' factor)? - ; - -// Primary elements -// ---------------- - -// Primary elements are things like "obj.something.something", "obj[something]", "obj(something)", "obj" ... - + : await_primary ('**' factor)? + ; + // Primary elements + + // ---------------- + + // Primary elements are things like "obj.something.something", "obj[something]", "obj(something)", "obj" ... + await_primary - : AWAIT primary - | primary; + : AWAIT primary + | primary + ; primary - : primary ('.' NAME | genexp | '(' arguments? ')' | '[' slices ']') - | atom - ; - - + : primary ('.' NAME | genexp | '(' arguments? ')' | '[' slices ']') + | atom + ; slices - : slice - | (slice | starred_expression) (',' (slice | starred_expression))* ','?; + : slice + | (slice | starred_expression) (',' (slice | starred_expression))* ','? + ; slice - : expression? ':' expression? (':' expression? )? - | named_expression; + : expression? ':' expression? (':' expression?)? + | named_expression + ; atom - : NAME - | 'True' - | 'False' - | 'None' - | strings - | NUMBER - | (tuple | group | genexp) - | (list | listcomp) - | (dict | set | dictcomp | setcomp) - | '...'; + : NAME + | 'True' + | 'False' + | 'None' + | strings + | NUMBER + | (tuple | group | genexp) + | (list | listcomp) + | (dict | set | dictcomp | setcomp) + | '...' + ; group - : '(' (yield_expr | named_expression) ')'; - -// Lambda functions -// ---------------- - + : '(' (yield_expr | named_expression) ')' + ; + // Lambda functions + + // ---------------- + lambdef - : 'lambda' lambda_params? ':' expression; + : 'lambda' lambda_params? ':' expression + ; lambda_params - : lambda_parameters; - -// lambda_parameters etc. duplicates parameters but without annotations -// or type comments, and if there's no comma after a parameter, we expect -// a colon, not a close parenthesis. (For more, see parameters above.) -// + : lambda_parameters + ; + // lambda_parameters etc. duplicates parameters but without annotations + + // or type comments, and if there's no comma after a parameter, we expect + + // a colon, not a close parenthesis. (For more, see parameters above.) + + // + lambda_parameters - : lambda_slash_no_default lambda_param_no_default* lambda_param_with_default* lambda_star_etc? - | lambda_slash_with_default lambda_param_with_default* lambda_star_etc? - | lambda_param_no_default+ lambda_param_with_default* lambda_star_etc? - | lambda_param_with_default+ lambda_star_etc? - | lambda_star_etc; + : lambda_slash_no_default lambda_param_no_default* lambda_param_with_default* lambda_star_etc? + | lambda_slash_with_default lambda_param_with_default* lambda_star_etc? + | lambda_param_no_default+ lambda_param_with_default* lambda_star_etc? + | lambda_param_with_default+ lambda_star_etc? + | lambda_star_etc + ; lambda_slash_no_default - : lambda_param_no_default+ '/' ','? - ; + : lambda_param_no_default+ '/' ','? + ; lambda_slash_with_default - : lambda_param_no_default* lambda_param_with_default+ '/' ','? - ; + : lambda_param_no_default* lambda_param_with_default+ '/' ','? + ; lambda_star_etc - : '*' lambda_param_no_default lambda_param_maybe_default* lambda_kwds? - | '*' ',' lambda_param_maybe_default+ lambda_kwds? - | lambda_kwds; + : '*' lambda_param_no_default lambda_param_maybe_default* lambda_kwds? + | '*' ',' lambda_param_maybe_default+ lambda_kwds? + | lambda_kwds + ; lambda_kwds - : '**' lambda_param_no_default; + : '**' lambda_param_no_default + ; lambda_param_no_default - : lambda_param ','? - ; -lambda_param_with_default - : lambda_param default_assignment ','? - ; -lambda_param_maybe_default - : lambda_param default_assignment? ','? - ; -lambda_param: NAME; + : lambda_param ','? + ; -// LITERALS -// ======== +lambda_param_with_default + : lambda_param default_assignment ','? + ; +lambda_param_maybe_default + : lambda_param default_assignment? ','? + ; + +lambda_param + : NAME + ; + // LITERALS + + // ======== + fstring_middle - : fstring_replacement_field - | FSTRING_MIDDLE; + : fstring_replacement_field + | FSTRING_MIDDLE + ; + fstring_replacement_field - : LBRACE (yield_expr | star_expressions) '='? fstring_conversion? fstring_full_format_spec? RBRACE; + : LBRACE (yield_expr | star_expressions) '='? fstring_conversion? fstring_full_format_spec? RBRACE + ; + fstring_conversion - : '!' NAME; + : '!' NAME + ; + fstring_full_format_spec - : ':' fstring_format_spec*; + : ':' fstring_format_spec* + ; + fstring_format_spec - : FSTRING_MIDDLE - | fstring_replacement_field; + : FSTRING_MIDDLE + | fstring_replacement_field + ; + fstring - : FSTRING_START fstring_middle* FSTRING_END; + : FSTRING_START fstring_middle* FSTRING_END + ; -string: STRING; -strings: (fstring|string)+; +string + : STRING + ; + +strings + : (fstring | string)+ + ; list - : '[' star_named_expressions? ']'; + : '[' star_named_expressions? ']' + ; tuple - : '(' (star_named_expression ',' star_named_expressions? )? ')'; - -set: LBRACE star_named_expressions RBRACE; - -// Dicts -// ----- - + : '(' (star_named_expression ',' star_named_expressions?)? ')' + ; + +set + : LBRACE star_named_expressions RBRACE + ; + // Dicts + + // ----- + dict - : LBRACE double_starred_kvpairs? RBRACE; + : LBRACE double_starred_kvpairs? RBRACE + ; -double_starred_kvpairs: double_starred_kvpair (',' double_starred_kvpair)* ','?; +double_starred_kvpairs + : double_starred_kvpair (',' double_starred_kvpair)* ','? + ; double_starred_kvpair - : '**' bitwise_or - | kvpair; - -kvpair: expression ':' expression; - -// Comprehensions & Generators -// --------------------------- - + : '**' bitwise_or + | kvpair + ; + +kvpair + : expression ':' expression + ; + // Comprehensions & Generators + + // --------------------------- + for_if_clauses - : for_if_clause+; + : for_if_clause+ + ; for_if_clause - : ASYNC? 'for' star_targets 'in' disjunction ('if' disjunction )* - ; + : ASYNC? 'for' star_targets 'in' disjunction ('if' disjunction)* + ; listcomp - : '[' named_expression for_if_clauses ']'; + : '[' named_expression for_if_clauses ']' + ; setcomp - : LBRACE named_expression for_if_clauses RBRACE; + : LBRACE named_expression for_if_clauses RBRACE + ; genexp - : '(' ( assignment_expression | expression) for_if_clauses ')'; + : '(' (assignment_expression | expression) for_if_clauses ')' + ; dictcomp - : LBRACE kvpair for_if_clauses RBRACE; - -// FUNCTION CALL ARGUMENTS -// ======================= - + : LBRACE kvpair for_if_clauses RBRACE + ; + // FUNCTION CALL ARGUMENTS + + // ======================= + arguments - : args ','?; + : args ','? + ; args - : (starred_expression | ( assignment_expression | expression)) (',' (starred_expression | ( assignment_expression | expression)))* (',' kwargs )? - | kwargs; + : (starred_expression | (assignment_expression | expression)) (',' (starred_expression | (assignment_expression | expression)))* (',' kwargs)? + | kwargs + ; kwargs - : kwarg_or_starred (',' kwarg_or_starred)* (',' kwarg_or_double_starred (',' kwarg_or_double_starred)*)? - | kwarg_or_double_starred (',' kwarg_or_double_starred)* - ; + : kwarg_or_starred (',' kwarg_or_starred)* (',' kwarg_or_double_starred (',' kwarg_or_double_starred)*)? + | kwarg_or_double_starred (',' kwarg_or_double_starred)* + ; starred_expression - : '*' expression; + : '*' expression + ; kwarg_or_starred - : NAME '=' expression - | starred_expression; + : NAME '=' expression + | starred_expression + ; kwarg_or_double_starred - : NAME '=' expression - | '**' expression; - -// ASSIGNMENT TARGETS -// ================== - -// Generic targets -// --------------- - -// NOTE: star_targets may contain *bitwise_or, targets may not. + : NAME '=' expression + | '**' expression + ; + // ASSIGNMENT TARGETS + + // ================== + + // Generic targets + + // --------------- + + // NOTE: star_targets may contain *bitwise_or, targets may not. + star_targets - : star_target (',' star_target )* ','? - ; + : star_target (',' star_target)* ','? + ; -star_targets_list_seq: star_target (',' star_target)+ ','?; +star_targets_list_seq + : star_target (',' star_target)+ ','? + ; star_targets_tuple_seq - : star_target (',' | (',' star_target )+ ','?) - ; + : star_target (',' | (',' star_target)+ ','?) + ; star_target - : '*' (star_target) - | target_with_star_atom; + : '*' (star_target) + | target_with_star_atom + ; target_with_star_atom - : t_primary ('.' NAME | '[' slices ']') - | star_atom - ; + : t_primary ('.' NAME | '[' slices ']') + | star_atom + ; star_atom - : NAME - | '(' target_with_star_atom ')' - | '(' star_targets_tuple_seq? ')' - | '[' star_targets_list_seq? ']'; + : NAME + | '(' target_with_star_atom ')' + | '(' star_targets_tuple_seq? ')' + | '[' star_targets_list_seq? ']' + ; single_target - : single_subscript_attribute_target - | NAME - | '(' single_target ')'; + : single_subscript_attribute_target + | NAME + | '(' single_target ')' + ; single_subscript_attribute_target - : t_primary ('.' NAME | '[' slices ']') - ; + : t_primary ('.' NAME | '[' slices ']') + ; t_primary - : t_primary ('.' NAME | '[' slices ']' | genexp | '(' arguments? ')') - | atom - ; - - - - - -// Targets for del statements -// -------------------------- - -del_targets: del_target (',' del_target)* ','?; + : t_primary ('.' NAME | '[' slices ']' | genexp | '(' arguments? ')') + | atom + ; + // Targets for del statements + + // -------------------------- + +del_targets + : del_target (',' del_target)* ','? + ; del_target - : t_primary ('.' NAME | '[' slices ']') - | del_t_atom - ; + : t_primary ('.' NAME | '[' slices ']') + | del_t_atom + ; del_t_atom - : NAME - | '(' del_target ')' - | '(' del_targets? ')' - | '[' del_targets? ']'; - -// TYPING ELEMENTS -// --------------- - - -// type_expressions allow */** but ignore them + : NAME + | '(' del_target ')' + | '(' del_targets? ')' + | '[' del_targets? ']' + ; + // TYPING ELEMENTS + + // --------------- + + // type_expressions allow */** but ignore them + type_expressions - : expression (',' expression)* (',' ('*' expression (',' '**' expression)? | '**' expression))? - | '*' expression (',' '**' expression)? - | '**' expression - ; - - + : expression (',' expression)* (',' ('*' expression (',' '**' expression)? | '**' expression))? + | '*' expression (',' '**' expression)? + | '**' expression + ; func_type_comment - : NEWLINE TYPE_COMMENT // Must be followed by indented block - | TYPE_COMMENT; - -// *** Soft Keywords: https://docs.python.org/3.12/reference/lexical_analysis.html#soft-keywords -soft_kw_type: {this.isEqualToCurrentTokenText("type")}? NAME; -soft_kw_match: {this.isEqualToCurrentTokenText("match")}? NAME; -soft_kw_case: {this.isEqualToCurrentTokenText("case")}? NAME; -soft_kw_wildcard: {this.isEqualToCurrentTokenText("_")}? NAME; -soft_kw__not__wildcard: {this.isnotEqualToCurrentTokenText("_")}? NAME; - -// ========================= END OF THE GRAMMAR =========================== + : NEWLINE TYPE_COMMENT // Must be followed by indented block + | TYPE_COMMENT + ; + // *** Soft Keywords: https://docs.python.org/3.12/reference/lexical_analysis.html#soft-keywords + +soft_kw_type + : + {this.isEqualToCurrentTokenText("type")}? NAME + ; + +soft_kw_match + : + {this.isEqualToCurrentTokenText("match")}? NAME + ; + +soft_kw_case + : + {this.isEqualToCurrentTokenText("case")}? NAME + ; + +soft_kw_wildcard + : + {this.isEqualToCurrentTokenText("_")}? NAME + ; + +soft_kw__not__wildcard + : + {this.isnotEqualToCurrentTokenText("_")}? NAME + ; + // ========================= END OF THE GRAMMAR =========================== + diff --git a/scss/ScssLexer.g4 b/scss/ScssLexer.g4 index aa452f9c81..791f753b8e 100644 --- a/scss/ScssLexer.g4 +++ b/scss/ScssLexer.g4 @@ -28,138 +28,635 @@ */ // $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false + + // $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine + + // $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true lexer grammar ScssLexer; -fragment Hex : [0-9a-fA-F]; -fragment NewlineOrSpace : '\r\n' | [ \t\r\n\f] |; -fragment Unicode : '\\' Hex Hex? Hex? Hex? Hex? Hex? NewlineOrSpace; -fragment Escape : Unicode | '\\' ~[\r\n\f0-9a-fA-F]; -fragment Whitespace : Space |; -fragment Newline : '\n' | '\r\n' | '\r' | '\f'; -fragment ZeroToFourZeros : '0'? '0'? '0'? '0'?; -fragment DashChar : '-' | '\\' ZeroToFourZeros '2d' NewlineOrSpace; - -fragment Nmstart : [_a-zA-Z] | Nonascii | Escape; -fragment Nmchar : [_a-zA-Z0-9\-] | Nonascii | Escape; -fragment Nonascii : ~[\u0000-\u007f]; -fragment Name : Nmchar+; -fragment Url : ( [!#$%&*-~] | Nonascii | Escape)*; - -Comment : (LineComment | MultiLineComment) -> skip; -MultiLineComment : '/*' ~'*'* '*'+ ( ~[/*] ~'*'* '*'+)* '/'; -LineComment : '//' ~([\n\r\u2028\u2029])*; -Space : [ \t\r\n\f]+ -> skip; - -Uri : 'url(' Whitespace (Url | String_) (Space (Url | String_))* Whitespace ')'; -Format : 'format(' Whitespace String_ Whitespace ')'; - -AbsLength : 'px' | 'cm' | 'mm' | 'pt' | 'pc' | 'q'; -FontRelative : 'em' | 'ex' | 'ch' | 'rem'; -ViewportRelative : 'vw' | 'vh' | 'vmin' | 'vmax'; -Angle : 'deg' | 'rad' | 'grad' | 'turn'; -Resolution : 'dpi' | 'dpcm' | 'dppx'; -Freq : 'hz' | 'khz' | 'fr'; -Time : 'ms' | 's'; -Percentage : '%'; - -Import : '@import'; -Include : '@include'; -Use : '@use'; -Require : '@require'; -Charset : '@charset '; -Mixin : '@mixin'; -Function : '@function'; -FontFace : '@font-face'; -Forward : '@forward'; -Content : '@content'; -Keyframes : '@keyframes'; -Return : '@return'; -Media : '@media'; -Extend : '@extend'; -Warn : '@warn'; -Error : '@error'; - -If : 'if'; -AtIf : '@if'; -AtFor : '@for'; -AtElse : '@else'; -AtWhile : '@while'; -AtEach : '@each'; - -From : 'from'; -To : 'to'; -Through : 'through'; -Only : 'only'; -Not : 'not'; -And : 'and'; -Using : 'using'; -As : 'as'; -With : 'with'; -Or : 'or'; -In : 'in'; - -Default : '!default'; -Important : '!important'; - -Lparen : '('; -Rparen : ')'; -Lbrack : '['; -Rbrack : ']'; -BlockStart : '{'; -BlockEnd : '}'; - -Dot : '.'; -Comma : ','; -Colon : ':'; -Semi : ';'; - -Tilde : '~'; -Under : '_'; -Dollar : '$'; -At : '@'; -Amp : '&'; -Hash : '#'; -True : 'true'; -False : 'false'; - -Plus : '+'; -Div : '/'; -Minus : '-'; -Times : '*'; - -Eq : '='; -NotEq : '!='; -Greater : '>'; -Less : '<'; -Includes : '~='; -DashMatch : '|='; -Pipe : '|'; -Cdo : ''; - -PseudoNot : ':not('; -Calc : 'calc('; -Rotate : 'rotate('; -Var : 'var('; -Rgba : 'rgba('; -Repeat : 'repeat('; - -PrefixMatch : '^='; -SuffixMatch : '$='; -SubstringMatch : '*='; - -VendorPrefix: '-moz-' | '-webkit-' | '-o-'; - -Variable : '--' (Interpolation | Nmstart) (Interpolation | Nmchar)*; -fragment Interpolation : Hash BlockStart Dollar? Ident BlockEnd; -Number : [0-9]+ | [0-9]* '.' [0-9]+; -String_: - '"' (~[\n\r\f\\"] | '\\' Newline | Escape)* '"' - | '\'' ( ~[\n\r\f\\'] | '\\' Newline | Escape)* '\'' -; - -// Give Ident least priority so that more specific rules matches first -Ident: Nmstart Nmchar*; \ No newline at end of file +fragment Hex + : [0-9a-fA-F] + ; + +fragment NewlineOrSpace + : '\r\n' + | [ \t\r\n\f] + | + ; + +fragment Unicode + : '\\' Hex Hex? Hex? Hex? Hex? Hex? NewlineOrSpace + ; + +fragment Escape + : Unicode + | '\\' ~ [\r\n\f0-9a-fA-F] + ; + +fragment Whitespace + : Space + | + ; + +fragment Newline + : '\n' + | '\r\n' + | '\r' + | '\f' + ; + +fragment ZeroToFourZeros + : '0'? '0'? '0'? '0'? + ; + +fragment DashChar + : '-' + | '\\' ZeroToFourZeros '2d' NewlineOrSpace + ; + +fragment Nmstart + : [_a-zA-Z] + | Nonascii + | Escape + ; + +fragment Nmchar + : [_a-zA-Z0-9\-] + | Nonascii + | Escape + ; + +fragment Nonascii + : ~ [\u0000-\u007f] + ; + +fragment Name + : Nmchar+ + ; + +fragment Url + : ([!#$%&*-~] | Nonascii | Escape)* + ; + +Comment + : (LineComment | MultiLineComment) -> skip + ; + +MultiLineComment + : '/*' ~ '*'* '*'+ (~ [/*] ~ '*'* '*'+)* '/' + ; + +LineComment + : '//' ~ ([\n\r\u2028\u2029])* + ; + +Space + : [ \t\r\n\f]+ -> skip + ; + +Uri + : 'url(' Whitespace (Url | String_) (Space (Url | String_))* Whitespace ')' + ; + +Format + : 'format(' Whitespace String_ Whitespace ')' + ; + +AbsLength + : 'px' + | 'cm' + | 'mm' + | 'pt' + | 'pc' + | 'q' + ; + +FontRelative + : 'em' + | 'ex' + | 'ch' + | 'rem' + ; + +ViewportRelative + : 'vw' + | 'vh' + | 'vmin' + | 'vmax' + ; + +HtmlTags + :'a' + | 'abbr' + | 'acronym' + | 'address' + | 'applet' + | 'article' + | 'aside' + | 'audio' + | 'b' + | 'basefont' + | 'bdi' + | 'bdo' + | 'bgsound' + | 'big' + | 'blink' + | 'blockquote' + | 'body' + | 'button' + | 'canvas' + | 'caption' + | 'center' + | 'circle' + | 'cite' + | 'clipPath' + | 'code' + | 'colgroup' + | 'command' + | 'content' + | 'data' + | 'datalist' + | 'dd' + | 'defs' + | 'del' + | 'details' + | 'dfn' + | 'dialog' + | 'dir' + | 'div' + | 'dl' + | 'dt' + | 'element' + | 'ellipse' + | 'em' + | 'fieldset' + | 'figcaption' + | 'figure' + | 'font' + | 'footer' + | 'foreignObject' + | 'form' + | 'frame' + | 'frameset' + | 'g' + | 'h1' + | 'h2' + | 'h3' + | 'h4' + | 'h5' + | 'h6' + | 'head' + | 'header' + | 'hgroup' + | 'html' + | 'i' + | 'iframe' + | 'image' + | 'ins' + | 'isindex' + | 'kbd' + | 'label' + | 'legend' + | 'li' + | 'line' + | 'linearGradient' + | 'listing' + | 'main' + | 'map' + | 'mark' + | 'marquee' + | 'mask' + | 'math' + | 'menu' + | 'menuitem' + | 'meter' + | 'multicol' + | 'nav' + | 'nextid' + | 'nobr' + | 'noembed' + | 'noframes' + | 'noscript' + | 'object' + | 'ol' + | 'optgroup' + | 'option' + | 'output' + | 'p' + | 'path' + | 'pattern' + | 'picture' + | 'plaintext' + | 'polygon' + | 'polyline' + | 'pre' + | 'progress' + | 'q' + | 'radialGradient' + | 'rb' + | 'rbc' + | 'rect' + | 'rp' + | 'rt' + | 'rtc' + | 'ruby' + | 's' + | 'samp' + | 'script' + | 'section' + | 'select' + | 'shadow' + | 'slot' + | 'small' + | 'spacer' + | 'span' + | 'stop' + | 'strike' + | 'strong' + | 'style' + | 'nostyle' + | 'sub' + | 'summary' + | 'sup' + | 'svg' + | 'table' + | 'tbody' + | 'td' + | 'template' + | 'text' + | 'textarea' + | 'tfoot' + | 'th' + | 'thead' + | 'time' + | 'title' + | 'tr' + | 'tspan' + | 'tt' + | 'u' + | 'ul' + | 'var' + | 'video' + | 'xmp' + ; + +Angle + : 'deg' + | 'rad' + | 'grad' + | 'turn' + ; + +Resolution + : 'dpi' + | 'dpcm' + | 'dppx' + ; + +Freq + : 'hz' + | 'khz' + | 'fr' + ; + +Time + : 'ms' + | 's' + ; + +Percentage + : '%' + ; + +Import + : '@import' + ; + +Include + : '@include' + ; + +Use + : '@use' + ; + +Require + : '@require' + ; + +Charset + : '@charset ' + ; + +Mixin + : '@mixin' + ; + +Function + : '@function' + ; + +FontFace + : '@font-face' + ; + +Forward + : '@forward' + ; + +Content + : '@content' + ; + +Keyframes + : '@keyframes' + ; + +Return + : '@return' + ; + +Media + : '@media' + ; + +Extend + : '@extend' + ; + +Warn + : '@warn' + ; + +Error + : '@error' + ; + +If + : 'if' + ; + +AtIf + : '@if' + ; + +AtFor + : '@for' + ; + +AtElse + : '@else' + ; + +AtWhile + : '@while' + ; + +AtEach + : '@each' + ; + +From + : 'from' + ; + +To + : 'to' + ; + +Through + : 'through' + ; + +Only + : 'only' + ; + +Not + : 'not' + ; + +And + : 'and' + ; + +Using + : 'using' + ; + +As + : 'as' + ; + +With + : 'with' + ; + +Or + : 'or' + ; + +In + : 'in' + ; + +Default + : '!default' + ; + +Important + : '!important' + ; + +Lparen + : '(' + ; + +Rparen + : ')' + ; + +Lbrack + : '[' + ; + +Rbrack + : ']' + ; + +BlockStart + : '{' + ; + +BlockEnd + : '}' + ; + +Dot + : '.' + ; + +Comma + : ',' + ; + +Colon + : ':' + ; + +Semi + : ';' + ; + +Tilde + : '~' + ; + +Under + : '_' + ; + +Dollar + : '$' + ; + +At + : '@' + ; + +Amp + : '&' + ; + +Hash + : '#' + ; + +True + : 'true' + ; + +False + : 'false' + ; + +Plus + : '+' + ; + +Div + : '/' + ; + +Minus + : '-' + ; + +Times + : '*' + ; + +Eq + : '=' + ; + +NotEq + : '!=' + ; + +Greater + : '>' + ; + +Less + : '<' + ; + +Includes + : '~=' + ; + +DashMatch + : '|=' + ; + +Pipe + : '|' + ; + +Cdo + : '' + ; + +PseudoNot + : ':not(' + ; + +Calc + : 'calc(' + ; + +Rotate + : 'rotate(' + ; + +Var + : 'var(' + ; + +Rgba + : 'rgba(' + ; + +Repeat + : 'repeat(' + ; + +PrefixMatch + : '^=' + ; + +SuffixMatch + : '$=' + ; + +SubstringMatch + : '*=' + ; + +VendorPrefix + : '-moz-' + | '-webkit-' + | '-o-' + ; + +Variable + : '--' (Interpolation | Nmstart) (Interpolation | Nmchar)* + ; + +fragment Interpolation + : Hash BlockStart Dollar? Ident BlockEnd + ; + +Number + : [0-9]+ + | [0-9]* '.' [0-9]+ + ; + +String_ + : '"' (~ [\n\r\f\\"] | '\\' Newline | Escape)* '"' + | '\'' (~ [\n\r\f\\'] | '\\' Newline | Escape)* '\'' + ; + // Give Ident least priority so that more specific rules matches first + +Ident + : Nmstart Nmchar* + ; + diff --git a/yml/Readme.md b/yml/Readme.md new file mode 100644 index 0000000000..5f2d5b9cfa --- /dev/null +++ b/yml/Readme.md @@ -0,0 +1,9 @@ +### grammar yml + +#### Note + +I created this grammar because it was not in the repository and it may not be complete. I leave this work to the experts and professionals of this field. + +### Developer + + Developer \ No newline at end of file diff --git a/yml/YamlGrammar.g4 b/yml/YamlGrammar.g4 new file mode 100644 index 0000000000..e62e39e49b --- /dev/null +++ b/yml/YamlGrammar.g4 @@ -0,0 +1,55 @@ +grammar YamlGrammar; + +yaml + : document+ + ; + +document + : key_value_pairs EOF + ; + +key_value_pairs + : key_value_pair + | key_value_pairs key_value_pair + ; + +key_value_pair + : key ':' value + ; + +key + : STRING + ; + +value + : STRING + | NUMBER + | BOOLEAN + | list + | key_value_pairs + ; + +list + : '[' value (',' value)* ']' + ; + +STRING + : '"' ~ ["]* '"' + ; + +NUMBER + : ('-'? [0-9]+ '.' [0-9]+) + ; + +BOOLEAN + : ('true' | 'false') + ; + +WS + : [ \t\r\n]+ -> skip + ; + +COMMENT + : '#' ~ [\r\n]* -> skip + ; +