Skip to content

Commit

Permalink
SONARPY-1542: Fix lexer as raw strings consider backslash as a charac…
Browse files Browse the repository at this point in the history
…ter except before quotes (#1628)
  • Loading branch information
joke1196 authored Nov 1, 2023
1 parent 58bed0c commit 75c1690
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,11 @@ private boolean consumeFStringMiddle(List<Token> tokens, StringBuilder sb, FStri
int column = code.getColumnPosition();
FStringState.Mode currentMode = state.getTokenizerMode();
while (code.charAt(0) != EOF) {
// In a raw string we consider \ as a character not as escape so we consume it as is
if (currentMode == Mode.FSTRING_MODE && state.isRawString && code.charAt(0) == '\\') {
// In a raw string we consider \ as a character not as escape so we consume it as is.
// Except for quotes which will be consumed as an escaped char
if (currentMode == Mode.FSTRING_MODE && isRawStringBackSlash(code, state)) {
sb.append((char) code.pop());
// If we encounter an escaped char we can consume the next two chars directly
// If we encounter an escaped char we can consume the next two chars directly
} else if (currentMode == Mode.FSTRING_MODE && isEscapedChar(code)) {
sb.append((char) code.pop());
sb.append((char) code.pop());
Expand Down Expand Up @@ -152,6 +153,10 @@ private static boolean canConsumeFStringPrefix(StringBuilder sb, CodeReader code
return false;
}

private static boolean isRawStringBackSlash(CodeReader code, FStringState state) {
return state.isRawString && code.charAt(0) == '\\' && !QUOTES.contains(code.charAt(1));
}

private static boolean isUnicodeChar(StringBuilder sb) {
int lastIndexOfUnicodeChar = sb.lastIndexOf("\\N");
return lastIndexOfUnicodeChar >= 0 && lastIndexOfUnicodeChar == sb.length() - 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -530,12 +530,17 @@ void fstring_complex_format_specifier() {

@Test
void fstring_escaped_regex_pattern() {
assertThat(lexer.lex("rf\"\\{{\\n\\}}\""), allOf(
assertThat(lexer.lex("rf\"\\{{\\n\\}}\\\"{a}\\\"\""), allOf(
hasToken("rf\"", PythonTokenType.FSTRING_START),
hasToken("\\{{\\n\\}}", PythonTokenType.FSTRING_MIDDLE),
hasToken("\\{{\\n\\}}\\\"", PythonTokenType.FSTRING_MIDDLE),
hasToken("{", PythonPunctuator.LCURLYBRACE),
hasToken("a", GenericTokenType.IDENTIFIER),
hasToken("}", PythonPunctuator.RCURLYBRACE),
hasToken("\\\"", PythonTokenType.FSTRING_MIDDLE),
hasToken("\"", PythonTokenType.FSTRING_END)));
}


@Test
void fstring_double_backslash() {
assertThat(lexer.lex("f\"{a}\\\\\""), allOf(
Expand Down
1 change: 1 addition & 0 deletions python-frontend/src/test/resources/parser/own/fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@
F"\\ \"{a}\":\\"
fr"""\s*\{{(.+)\}}"""
rf'^add_example\(\s*"[^"]*",\s*{foo()},\s*\d+,\s*async \(client, console\) => \{{\n(.*?)^(?:\}}| *\}},\n)\);$'
fr'\"foo\"\s*{42}'

0 comments on commit 75c1690

Please sign in to comment.