From 184444b8f6a52e8888450ef9d9cd4458c4f005b9 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Sat, 17 Feb 2024 15:10:09 -0800
Subject: [PATCH 1/4] [lldb] Call Import_AppendInittab before Py_Initialize
 (#82095)

The Python documentation [1] says that `PyImport_AppendInittab` should
be called before `Py_Initialize()`. Starting with Python 3.12, this is
enforced with a fatal error:

  Fatal Python error: PyImport_AppendInittab: PyImport_AppendInittab()
  may not be called after Py_Initialize()

This commit ensures we only modify the table of built-in modules if
Python hasn't been initialized. For Python embedded in LLDB, that means
this happen exactly once, before the first call to `Py_Initialize`,
which becomes a NO-OP after. However, when lldb is imported in an
existing Python interpreter, Python will have already been initialized,
but by definition, the lldb module will already have been loaded, so
it's safe to skip adding it (again).

This fixes #70453.

[1] https://docs.python.org/3.12/c-api/import.html#c.PyImport_AppendInittab

(cherry picked from commit fbce244299524fc3d736cce9d25b4262303b45d5)
---
 .../Python/ScriptInterpreterPython.cpp        | 32 +++++++++++--------
 1 file changed, 18 insertions(+), 14 deletions(-)
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index 751776c14a1c09..6d3e79296ea2f5 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -93,24 +93,28 @@ struct InitializePythonRAII {
   InitializePythonRAII() {
     InitializePythonHome();
 
+    // The table of built-in modules can only be extended before Python is
+    // initialized.
+    if (!Py_IsInitialized()) {
 #ifdef LLDB_USE_LIBEDIT_READLINE_COMPAT_MODULE
-    // Python's readline is incompatible with libedit being linked into lldb.
-    // Provide a patched version local to the embedded interpreter.
-    bool ReadlinePatched = false;
-    for (auto *p = PyImport_Inittab; p->name != nullptr; p++) {
-      if (strcmp(p->name, "readline") == 0) {
-        p->initfunc = initlldb_readline;
-        break;
+      // Python's readline is incompatible with libedit being linked into lldb.
+      // Provide a patched version local to the embedded interpreter.
+      bool ReadlinePatched = false;
+      for (auto *p = PyImport_Inittab; p->name != nullptr; p++) {
+        if (strcmp(p->name, "readline") == 0) {
+          p->initfunc = initlldb_readline;
+          break;
+        }
+      }
+      if (!ReadlinePatched) {
+        PyImport_AppendInittab("readline", initlldb_readline);
+        ReadlinePatched = true;
       }
-    }
-    if (!ReadlinePatched) {
-      PyImport_AppendInittab("readline", initlldb_readline);
-      ReadlinePatched = true;
-    }
 #endif
 
-    // Register _lldb as a built-in module.
-    PyImport_AppendInittab("_lldb", LLDBSwigPyInit);
+      // Register _lldb as a built-in module.
+      PyImport_AppendInittab("_lldb", LLDBSwigPyInit);
+    }
 
 // Python < 3.2 and Python >= 3.2 reversed the ordering requirements for
 // calling `Py_Initialize` and `PyEval_InitThreads`.  < 3.2 requires that you

From 92d51cf5efcc6f214f4bceb0a9f802a64cae4771 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Sat, 17 Feb 2024 20:55:33 -0800
Subject: [PATCH 2/4] [lldb] Don't rely on
 ScriptInterpreterPythonImpl::Initialize in the unit tests (#82096)

The unit tests only test the Python objects and don't actually use
anything from the LLDB module. That means that all the additional
complexity in ScriptInterpreterPythonImpl::Initialize is overkill.

By doing the initialization by hand, we avoid the annoying
ModuleNotFoundError.

  Traceback (most recent call last):
    File "<string>", line 1, in <module>
  ModuleNotFoundError: No module named 'lldb'

The error is the result of us stubbing out the SWIG (specifically
`PyInit__lldb`) because we cannot link against libLLDB from the unit
tests.

The downside of doing the initialization manually is that we do lose a
bit of test coverage. For example, issue #70453 also manifested itself
in the unit tests.

(cherry picked from commit 5c96e71d0d49dd55711ccdb57a22d033fe7a8fae)
---
 .../Python/PythonTestSuite.cpp                | 24 ++++---------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp
index d4af4ad3b001af..e2e4161431d5e1 100644
--- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp
+++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp
@@ -9,42 +9,26 @@
 #include "gtest/gtest.h"
 
 #include "Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h"
-#include "Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h"
-#include "Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h"
 #include "Plugins/ScriptInterpreter/Python/lldb-python.h"
 
-#include "lldb/Host/FileSystem.h"
-#include "lldb/Host/HostInfo.h"
-
 #include "PythonTestSuite.h"
 
-using namespace lldb_private;
-class TestScriptInterpreterPython : public ScriptInterpreterPythonImpl {
-public:
-  using ScriptInterpreterPythonImpl::Initialize;
-};
-
 void PythonTestSuite::SetUp() {
-  FileSystem::Initialize();
-  HostInfoBase::Initialize();
-  // ScriptInterpreterPython::Initialize() depends on HostInfo being
-  // initializedso it can compute the python directory etc.
-  TestScriptInterpreterPython::Initialize();
-
   // Although we don't care about concurrency for the purposes of running
   // this test suite, Python requires the GIL to be locked even for
   // deallocating memory, which can happen when you call Py_DECREF or
   // Py_INCREF.  So acquire the GIL for the entire duration of this
   // test suite.
+  Py_InitializeEx(0);
   m_gil_state = PyGILState_Ensure();
+  PyRun_SimpleString("import sys");
 }
 
 void PythonTestSuite::TearDown() {
   PyGILState_Release(m_gil_state);
 
-  TestScriptInterpreterPython::Terminate();
-  HostInfoBase::Terminate();
-  FileSystem::Terminate();
+  // We could call Py_FinalizeEx here, but initializing and finalizing Python is
+  // pretty slow, so just keep Python initialized across tests.
 }
 
 // The following functions are the Pythonic implementations of the required

From fbbe75612051a69cbec8128031b2a388c7ba981a Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Sat, 17 Feb 2024 11:37:26 -0800
Subject: [PATCH 3/4] [lldb] Use PyBytes and PyByteArray in Python Data Objects
 unittest (#82098)

Use a Python Bytes and ByteArray object instead of Integers for
TestOwnedReferences and TestBorrowedReferences. These two tests were
failing when building against Python 3.12 because these Integer objects
had a refcount of 4294967296 (-1). I didn't dig into it, but I suspect
the Python runtime has adopted an optimization to decrease refcounting
traffic for these simple objects.

(cherry picked from commit 27f2908fbc8092f3567385a63676d623523b318b)
---
 .../Python/PythonDataObjectsTests.cpp         | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp
index 34507123952760..fca964526f2b0f 100644
--- a/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp
+++ b/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp
@@ -52,21 +52,24 @@ class PythonDataObjectsTest : public PythonTestSuite {
 
 TEST_F(PythonDataObjectsTest, TestOwnedReferences) {
   // After creating a new object, the refcount should be >= 1
-  PyObject *obj = PyLong_FromLong(3);
-  Py_ssize_t original_refcnt = obj->ob_refcnt;
+  PyObject *obj = PyBytes_FromString("foo");
+  Py_ssize_t original_refcnt = Py_REFCNT(obj);
   EXPECT_LE(1, original_refcnt);
 
   // If we take an owned reference, the refcount should be the same
-  PythonObject owned_long(PyRefType::Owned, obj);
-  EXPECT_EQ(original_refcnt, owned_long.get()->ob_refcnt);
+  PythonObject owned(PyRefType::Owned, obj);
+  Py_ssize_t owned_refcnt = Py_REFCNT(owned.get());
+  EXPECT_EQ(original_refcnt, owned_refcnt);
 
   // Take another reference and verify that the refcount increases by 1
-  PythonObject strong_ref(owned_long);
-  EXPECT_EQ(original_refcnt + 1, strong_ref.get()->ob_refcnt);
+  PythonObject strong_ref(owned);
+  Py_ssize_t strong_refcnt = Py_REFCNT(strong_ref.get());
+  EXPECT_EQ(original_refcnt + 1, strong_refcnt);
 
   // If we reset the first one, the refcount should be the original value.
-  owned_long.Reset();
-  EXPECT_EQ(original_refcnt, strong_ref.get()->ob_refcnt);
+  owned.Reset();
+  strong_refcnt = Py_REFCNT(strong_ref.get());
+  EXPECT_EQ(original_refcnt, strong_refcnt);
 }
 
 TEST_F(PythonDataObjectsTest, TestResetting) {
@@ -83,12 +86,15 @@ TEST_F(PythonDataObjectsTest, TestResetting) {
 }
 
 TEST_F(PythonDataObjectsTest, TestBorrowedReferences) {
-  PythonInteger long_value(PyRefType::Owned, PyLong_FromLong(3));
-  Py_ssize_t original_refcnt = long_value.get()->ob_refcnt;
+  PythonByteArray byte_value(PyRefType::Owned,
+                             PyByteArray_FromStringAndSize("foo", 3));
+  Py_ssize_t original_refcnt = Py_REFCNT(byte_value.get());
   EXPECT_LE(1, original_refcnt);
 
-  PythonInteger borrowed_long(PyRefType::Borrowed, long_value.get());
-  EXPECT_EQ(original_refcnt + 1, borrowed_long.get()->ob_refcnt);
+  PythonByteArray borrowed_byte(PyRefType::Borrowed, byte_value.get());
+  Py_ssize_t borrowed_refcnt = Py_REFCNT(borrowed_byte.get());
+
+  EXPECT_EQ(original_refcnt + 1, borrowed_refcnt);
 }
 
 TEST_F(PythonDataObjectsTest, TestGlobalNameResolutionNoDot) {

From 681e41344b9d6ce42c0a5fae1133e7cbd286b937 Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht@google.com>
Date: Wed, 21 Feb 2024 22:59:03 -0800
Subject: [PATCH 4/4] [lldb][test] Fix PythonDataObjectsTest

This is using `FileSystem::Instance()` w/o calling `FileSystem::Initialize()`. Use `SubsystemRAII` to do that.

(cherry picked from commit 675791335285fa86434dc46e5c92f543e0e79d19)
---
 .../Python/PythonDataObjectsTests.cpp                  | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp
index fca964526f2b0f..b1218748dbcfcd 100644
--- a/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp
+++ b/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp
@@ -11,6 +11,7 @@
 
 #include "Plugins/ScriptInterpreter/Python/PythonDataObjects.h"
 #include "Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h"
+#include "TestingSupport/SubsystemRAII.h"
 #include "lldb/Host/File.h"
 #include "lldb/Host/FileSystem.h"
 #include "lldb/Host/HostInfo.h"
@@ -27,6 +28,8 @@ using llvm::Error;
 using llvm::Expected;
 
 class PythonDataObjectsTest : public PythonTestSuite {
+  SubsystemRAII<FileSystem> subsystems;
+
 public:
   void SetUp() override {
     PythonTestSuite::SetUp();
@@ -210,8 +213,8 @@ TEST_F(PythonDataObjectsTest, TestPythonBoolean) {
   };
 
   // Test PythonBoolean constructed from long integer values.
-  test_from_long(0); // Test 'false' value.
-  test_from_long(1); // Test 'true' value.
+  test_from_long(0);  // Test 'false' value.
+  test_from_long(1);  // Test 'true' value.
   test_from_long(~0); // Any value != 0 is 'true'.
 }
 
@@ -809,7 +812,8 @@ main = foo
                                 testing::ContainsRegex("line 7, in baz"),
                                 testing::ContainsRegex("ZeroDivisionError")))));
 
-#if !((defined(_WIN32) || defined(_WIN64)) && (defined(__aarch64__) || defined(_M_ARM64)))
+#if !((defined(_WIN32) || defined(_WIN64)) &&                                  \
+      (defined(__aarch64__) || defined(_M_ARM64)))
 
   static const char script2[] = R"(
 class MyError(Exception):