From 09ede81ae3ad9be0dabf56f10ebfd792d25981bd Mon Sep 17 00:00:00 2001 From: David Kunzmann Date: Mon, 30 Oct 2023 16:23:39 +0100 Subject: [PATCH] SONARPY-1538: Fix lexing error when encountering escaped characters (#1625) --- .../java/org/sonar/python/lexer/FStringChannel.java | 4 ++-- .../java/org/sonar/python/lexer/PythonLexerTest.java | 11 +++++++++++ .../src/test/resources/parser/own/fstring.py | 3 ++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/python-frontend/src/main/java/org/sonar/python/lexer/FStringChannel.java b/python-frontend/src/main/java/org/sonar/python/lexer/FStringChannel.java index 656599873d..42cb109644 100644 --- a/python-frontend/src/main/java/org/sonar/python/lexer/FStringChannel.java +++ b/python-frontend/src/main/java/org/sonar/python/lexer/FStringChannel.java @@ -47,7 +47,7 @@ public class FStringChannel extends Channel { private static final Set QUOTES = Set.of('\"', '\''); private static final Set PREFIXES = Set.of('F', 'R'); - private static final Set ESCAPED_CHARS = Set.of("{{", "}}","\\\"","\\\'"); + private static final Set ESCAPED_CHARS = Set.of("{{", "}}"); public FStringChannel(LexerState lexerState) { this.lexerState = lexerState; @@ -153,7 +153,7 @@ private static boolean isUnicodeChar(StringBuilder sb ){ } private static boolean isEscapedChar(CodeReader code) { - return ESCAPED_CHARS.contains(String.valueOf(code.peek(2))); + return ESCAPED_CHARS.contains(String.valueOf(code.peek(2))) || code.peek() == '\\'; } private static boolean areClosingQuotes(CodeReader code, FStringState state) { diff --git a/python-frontend/src/test/java/org/sonar/python/lexer/PythonLexerTest.java b/python-frontend/src/test/java/org/sonar/python/lexer/PythonLexerTest.java index f96cc031d0..61d1dd92f9 100644 --- a/python-frontend/src/test/java/org/sonar/python/lexer/PythonLexerTest.java +++ b/python-frontend/src/test/java/org/sonar/python/lexer/PythonLexerTest.java @@ -527,6 +527,17 @@ void fstring_complex_format_specifier() { hasToken("}", PythonPunctuator.RCURLYBRACE), hasToken("\"", PythonTokenType.FSTRING_END))); } + + @Test + void fstring_double_backslash() { + assertThat(lexer.lex("f\"{a}\\\\\""), allOf( + hasToken("f\"", PythonTokenType.FSTRING_START), + hasToken("{", PythonPunctuator.LCURLYBRACE), + hasToken("a", GenericTokenType.IDENTIFIER), + hasToken("}", PythonPunctuator.RCURLYBRACE), + hasToken("\\\\", PythonTokenType.FSTRING_MIDDLE), + hasToken("\"", PythonTokenType.FSTRING_END))); + } /** * http://docs.python.org/reference/lexical_analysis.html#integer-and-long-integer-literals */ diff --git a/python-frontend/src/test/resources/parser/own/fstring.py b/python-frontend/src/test/resources/parser/own/fstring.py index e6771f659f..ddf1559869 100644 --- a/python-frontend/src/test/resources/parser/own/fstring.py +++ b/python-frontend/src/test/resources/parser/own/fstring.py @@ -24,4 +24,5 @@ }" f"Current value: \"{value}\" (type: {type(value)}). " f'\N{RIGHTWARDS ARROW}' - +f" \\" +F"\\ \"{a}\":\\"