From 75c1690097f3a5b1fa0e04598ea19b48c53d0b43 Mon Sep 17 00:00:00 2001
From: David Kunzmann <david.kunzmann@sonarsource.com>
Date: Wed, 1 Nov 2023 09:40:39 +0100
Subject: [PATCH] SONARPY-1542: Fix lexer as raw strings consider backslash as
 a character except before quotes (#1628)

---
 .../java/org/sonar/python/lexer/FStringChannel.java   | 11 ++++++++---
 .../java/org/sonar/python/lexer/PythonLexerTest.java  |  9 +++++++--
 .../src/test/resources/parser/own/fstring.py          |  1 +
 3 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/python-frontend/src/main/java/org/sonar/python/lexer/FStringChannel.java b/python-frontend/src/main/java/org/sonar/python/lexer/FStringChannel.java
index c569aa2ed5..5f1db38ced 100644
--- a/python-frontend/src/main/java/org/sonar/python/lexer/FStringChannel.java
+++ b/python-frontend/src/main/java/org/sonar/python/lexer/FStringChannel.java
@@ -108,10 +108,11 @@ private boolean consumeFStringMiddle(List<Token> tokens, StringBuilder sb, FStri
     int column = code.getColumnPosition();
     FStringState.Mode currentMode = state.getTokenizerMode();
     while (code.charAt(0) != EOF) {
-      // In a raw string we consider \ as a character not as escape so we consume it as is
-      if (currentMode == Mode.FSTRING_MODE && state.isRawString && code.charAt(0) == '\\') {
+      // In a raw string we consider \ as a character not as escape so we consume it as is. 
+      // Except for quotes which will be consumed as an escaped char
+      if (currentMode == Mode.FSTRING_MODE && isRawStringBackSlash(code, state)) {
         sb.append((char) code.pop());
-      // If we encounter an escaped char we can consume the next two chars directly
+        // If we encounter an escaped char we can consume the next two chars directly
       } else if (currentMode == Mode.FSTRING_MODE && isEscapedChar(code)) {
         sb.append((char) code.pop());
         sb.append((char) code.pop());
@@ -152,6 +153,10 @@ private static boolean canConsumeFStringPrefix(StringBuilder sb, CodeReader code
     return false;
   }
 
+  private static boolean isRawStringBackSlash(CodeReader code, FStringState state) {
+    return state.isRawString && code.charAt(0) == '\\' && !QUOTES.contains(code.charAt(1));
+  }
+
   private static boolean isUnicodeChar(StringBuilder sb) {
     int lastIndexOfUnicodeChar = sb.lastIndexOf("\\N");
     return lastIndexOfUnicodeChar >= 0 && lastIndexOfUnicodeChar == sb.length() - 2;
diff --git a/python-frontend/src/test/java/org/sonar/python/lexer/PythonLexerTest.java b/python-frontend/src/test/java/org/sonar/python/lexer/PythonLexerTest.java
index 0b9584b9b9..0a78c05e7d 100644
--- a/python-frontend/src/test/java/org/sonar/python/lexer/PythonLexerTest.java
+++ b/python-frontend/src/test/java/org/sonar/python/lexer/PythonLexerTest.java
@@ -530,12 +530,17 @@ void fstring_complex_format_specifier() {
 
   @Test
   void fstring_escaped_regex_pattern() {
-    assertThat(lexer.lex("rf\"\\{{\\n\\}}\""), allOf(
+    assertThat(lexer.lex("rf\"\\{{\\n\\}}\\\"{a}\\\"\""), allOf(
       hasToken("rf\"", PythonTokenType.FSTRING_START),
-      hasToken("\\{{\\n\\}}", PythonTokenType.FSTRING_MIDDLE),
+      hasToken("\\{{\\n\\}}\\\"", PythonTokenType.FSTRING_MIDDLE),
+      hasToken("{", PythonPunctuator.LCURLYBRACE),
+      hasToken("a", GenericTokenType.IDENTIFIER),
+      hasToken("}", PythonPunctuator.RCURLYBRACE),
+      hasToken("\\\"", PythonTokenType.FSTRING_MIDDLE),
       hasToken("\"", PythonTokenType.FSTRING_END)));
   }
 
+
   @Test
   void fstring_double_backslash() {
     assertThat(lexer.lex("f\"{a}\\\\\""), allOf(
diff --git a/python-frontend/src/test/resources/parser/own/fstring.py b/python-frontend/src/test/resources/parser/own/fstring.py
index 50bd752540..0d1a1efa67 100644
--- a/python-frontend/src/test/resources/parser/own/fstring.py
+++ b/python-frontend/src/test/resources/parser/own/fstring.py
@@ -28,3 +28,4 @@
 F"\\ \"{a}\":\\"
 fr"""\s*\{{(.+)\}}"""
 rf'^add_example\(\s*"[^"]*",\s*{foo()},\s*\d+,\s*async \(client, console\) => \{{\n(.*?)^(?:\}}| *\}},\n)\);$'
+fr'\"foo\"\s*{42}'