Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eliminate duplicate TLS keys for loader_life_support stack #3275

Merged
merged 3 commits into from
Sep 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,24 @@ jobs:
if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10-dev'))"
run: cmake --build build2 --target cpptest

# Third build - C++17 mode with unstable ABI
- name: Configure (unstable ABI)
run: >
cmake -S . -B build3
-DPYBIND11_WERROR=ON
-DDOWNLOAD_CATCH=ON
-DDOWNLOAD_EIGEN=ON
-DCMAKE_CXX_STANDARD=17
-DPYBIND11_INTERNALS_VERSION=10000000
"-DPYBIND11_TEST_OVERRIDE=test_call_policies.cpp;test_gil_scoped.cpp;test_thread.cpp"
${{ matrix.args }}

- name: Build (unstable ABI)
run: cmake --build build3 -j 2

- name: Python tests (unstable ABI)
run: cmake --build build3 --target pytest

- name: Interface test
run: cmake --build build2 --target test_cmake_build

Expand Down
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ endif()
option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT})
option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT})
option(PYBIND11_NOPYTHON "Disable search for Python" OFF)
set(PYBIND11_INTERNALS_VERSION
""
CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.")

cmake_dependent_option(
USE_PYTHON_INCLUDE_DIR
Expand Down Expand Up @@ -183,6 +186,10 @@ if(NOT TARGET pybind11_headers)

target_compile_features(pybind11_headers INTERFACE cxx_inheriting_constructors cxx_user_literals
cxx_right_angle_brackets)
if(NOT "${PYBIND11_INTERNALS_VERSION}" STREQUAL "")
target_compile_definitions(
pybind11_headers INTERFACE "PYBIND11_INTERNALS_VERSION=${PYBIND11_INTERNALS_VERSION}")
endif()
else()
# It is invalid to install a target twice, too.
set(PYBIND11_INSTALL OFF)
Expand Down
178 changes: 131 additions & 47 deletions include/pybind11/detail/internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@

#include "../pytypes.h"

/// Tracks the `internals` and `type_info` ABI version independent of the main library version.
///
/// Some portions of the code use an ABI that is conditional depending on this
/// version number. That allows ABI-breaking changes to be "pre-implemented".
/// Once the default version number is incremented, the conditional logic that
/// no longer applies can be removed. Additionally, users that need not
/// maintain ABI compatibility can increase the version number in order to take
/// advantage of any functionality/efficiency improvements that depend on the
/// newer ABI.
///
/// WARNING: If you choose to manually increase the ABI version, note that
/// pybind11 may not be tested as thoroughly with a non-default ABI version, and
/// further ABI-incompatible changes may be made before the ABI is officially
/// changed to the new version.
#ifndef PYBIND11_INTERNALS_VERSION
# define PYBIND11_INTERNALS_VERSION 4
#endif

PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)

using ExceptionTranslator = void (*)(std::exception_ptr);
Expand All @@ -25,30 +43,58 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass);
// The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new
// Thread Specific Storage (TSS) API.
#if PY_VERSION_HEX >= 0x03070000
# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t *var = nullptr
# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key))
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value))
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr)
# define PYBIND11_TLS_FREE(key) PyThread_tss_free(key)
// Avoid unnecessary allocation of `Py_tss_t`, since we cannot use
// `Py_LIMITED_API` anyway.
# if PYBIND11_INTERNALS_VERSION > 4
# define PYBIND11_TLS_KEY_REF Py_tss_t &
# ifdef __GNUC__
// Clang on macOS warns due to `Py_tss_NEEDS_INIT` not specifying an initializer
// for every field.
# define PYBIND11_TLS_KEY_INIT(var) \
_Pragma("GCC diagnostic push") /**/ \
_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") /**/ \
Py_tss_t var \
= Py_tss_NEEDS_INIT; \
_Pragma("GCC diagnostic pop")
# else
# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t var = Py_tss_NEEDS_INIT;
# endif
# define PYBIND11_TLS_KEY_CREATE(var) (PyThread_tss_create(&(var)) == 0)
# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get(&(key))
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set(&(key), (value))
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set(&(key), nullptr)
# define PYBIND11_TLS_FREE(key) PyThread_tss_delete(&(key))
# else
# define PYBIND11_TLS_KEY_REF Py_tss_t *
# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t *var = nullptr;
# define PYBIND11_TLS_KEY_CREATE(var) \
(((var) = PyThread_tss_alloc()) != nullptr && (PyThread_tss_create((var)) == 0))
# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key))
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value))
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr)
# define PYBIND11_TLS_FREE(key) PyThread_tss_free(key)
# endif
#else
// Usually an int but a long on Cygwin64 with Python 3.x
# define PYBIND11_TLS_KEY_INIT(var) decltype(PyThread_create_key()) var = 0
// Usually an int but a long on Cygwin64 with Python 3.x
# define PYBIND11_TLS_KEY_REF decltype(PyThread_create_key())
# define PYBIND11_TLS_KEY_INIT(var) PYBIND11_TLS_KEY_REF var = 0;
# define PYBIND11_TLS_KEY_CREATE(var) (((var) = PyThread_create_key()) != -1)
jbms marked this conversation as resolved.
Show resolved Hide resolved
# define PYBIND11_TLS_GET_VALUE(key) PyThread_get_key_value((key))
# if PY_MAJOR_VERSION < 3
# define PYBIND11_TLS_DELETE_VALUE(key) \
PyThread_delete_key_value(key)
# define PYBIND11_TLS_REPLACE_VALUE(key, value) \
do { \
PyThread_delete_key_value((key)); \
PyThread_set_key_value((key), (value)); \
} while (false)
# if PY_MAJOR_VERSION < 3 || defined(PYPY_VERSION)
// On CPython < 3.4 and on PyPy, `PyThread_set_key_value` strangely does not set
// the value if it has already been set. Instead, it must first be deleted and
// then set again.
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_delete_key_value(key)
# define PYBIND11_TLS_REPLACE_VALUE(key, value) \
do { \
PyThread_delete_key_value((key)); \
PyThread_set_key_value((key), (value)); \
} while (false)
# else
# define PYBIND11_TLS_DELETE_VALUE(key) \
PyThread_set_key_value((key), nullptr)
# define PYBIND11_TLS_REPLACE_VALUE(key, value) \
PyThread_set_key_value((key), (value))
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_set_key_value((key), nullptr)
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_set_key_value((key), (value))
# endif
# define PYBIND11_TLS_FREE(key) (void)key
# define PYBIND11_TLS_FREE(key) (void) key
#endif

// Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly
Expand Down Expand Up @@ -106,22 +152,31 @@ struct internals {
std::unordered_map<const PyObject *, std::vector<PyObject *>> patients;
std::forward_list<ExceptionTranslator> registered_exception_translators;
std::unordered_map<std::string, void *> shared_data; // Custom data to be shared across extensions
#if PYBIND11_INTERNALS_VERSION == 4
std::vector<PyObject *> unused_loader_patient_stack_remove_at_v5;
#endif
std::forward_list<std::string> static_strings; // Stores the std::strings backing detail::c_str()
PyTypeObject *static_property_type;
PyTypeObject *default_metaclass;
PyObject *instance_base;
#if defined(WITH_THREAD)
PYBIND11_TLS_KEY_INIT(tstate);
PYBIND11_TLS_KEY_INIT(tstate)
# if PYBIND11_INTERNALS_VERSION > 4
PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key)
# endif // PYBIND11_INTERNALS_VERSION > 4
PyInterpreterState *istate = nullptr;
~internals() {
# if PYBIND11_INTERNALS_VERSION > 4
PYBIND11_TLS_FREE(loader_life_support_tls_key);
# endif // PYBIND11_INTERNALS_VERSION > 4

// This destructor is called *after* Py_Finalize() in finalize_interpreter().
// That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is called.
// PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does nothing.
// PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
// PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). Neither
// of those have anything to do with CPython internals.
// PyMem_RawFree *requires* that the `tstate` be allocated with the CPython allocator.
// That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is
// called. PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does
// nothing. PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
// PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX).
// Neither of those have anything to do with CPython internals. PyMem_RawFree *requires*
// that the `tstate` be allocated with the CPython allocator.
PYBIND11_TLS_FREE(tstate);
}
#endif
Expand Down Expand Up @@ -153,9 +208,6 @@ struct type_info {
bool module_local : 1;
};

/// Tracks the `internals` and `type_info` ABI version independent of the main library version
#define PYBIND11_INTERNALS_VERSION 4

/// On MSVC, debug and release builds are not ABI-compatible!
#if defined(_MSC_VER) && defined(_DEBUG)
# define PYBIND11_BUILD_TYPE "_debug"
Expand Down Expand Up @@ -291,21 +343,21 @@ PYBIND11_NOINLINE internals &get_internals() {
internals_ptr = new internals();
#if defined(WITH_THREAD)

#if PY_VERSION_HEX < 0x03090000
PyEval_InitThreads();
#endif
# if PY_VERSION_HEX < 0x03090000
PyEval_InitThreads();
# endif
PyThreadState *tstate = PyThreadState_Get();
#if PY_VERSION_HEX >= 0x03070000
internals_ptr->tstate = PyThread_tss_alloc();
if (!internals_ptr->tstate || (PyThread_tss_create(internals_ptr->tstate) != 0))
pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!");
PyThread_tss_set(internals_ptr->tstate, tstate);
#else
internals_ptr->tstate = PyThread_create_key();
if (internals_ptr->tstate == -1)
pybind11_fail("get_internals: could not successfully initialize the tstate TLS key!");
PyThread_set_key_value(internals_ptr->tstate, tstate);
#endif
if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->tstate)) {
pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!");
}
PYBIND11_TLS_REPLACE_VALUE(internals_ptr->tstate, tstate);

# if PYBIND11_INTERNALS_VERSION > 4
if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->loader_life_support_tls_key)) {
pybind11_fail("get_internals: could not successfully initialize the "
"loader_life_support TSS key!");
}
# endif
internals_ptr->istate = tstate->interp;
#endif
builtins[id] = capsule(internals_pp);
Expand All @@ -317,16 +369,48 @@ PYBIND11_NOINLINE internals &get_internals() {
return **internals_pp;
}


// the internals struct (above) is shared between all the modules. local_internals are only
// for a single module. Any changes made to internals may require an update to
// PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design,
// restricted to a single module. Whether a module has local internals or not should not
// impact any other modules, because the only things accessing the local internals is the
// module that contains them.
struct local_internals {
type_map<type_info *> registered_types_cpp;
std::forward_list<ExceptionTranslator> registered_exception_translators;
type_map<type_info *> registered_types_cpp;
std::forward_list<ExceptionTranslator> registered_exception_translators;
#if defined(WITH_THREAD) && PYBIND11_INTERNALS_VERSION == 4

// For ABI compatibility, we can't store the loader_life_support TLS key in
// the `internals` struct directly. Instead, we store it in `shared_data` and
// cache a copy in `local_internals`. If we allocated a separate TLS key for
// each instance of `local_internals`, we could end up allocating hundreds of
// TLS keys if hundreds of different pybind11 modules are loaded (which is a
// plausible number).
PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key)

// Holds the shared TLS key for the loader_life_support stack.
struct shared_loader_life_support_data {
PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key)
shared_loader_life_support_data() {
if (!PYBIND11_TLS_KEY_CREATE(loader_life_support_tls_key)) {
pybind11_fail("local_internals: could not successfully initialize the "
"loader_life_support TLS key!");
}
}
// We can't help but leak the TLS key, because Python never unloads extension modules.
};

local_internals() {
auto &internals = get_internals();
// Get or create the `loader_life_support_stack_key`.
auto &ptr = internals.shared_data["_life_support"];
if (!ptr) {
ptr = new shared_loader_life_support_data;
}
loader_life_support_tls_key
= static_cast<shared_loader_life_support_data *>(ptr)->loader_life_support_tls_key;
}
#endif // defined(WITH_THREAD) && PYBIND11_INTERNALS_VERSION == 4
};

/// Works like `get_internals`, but for things which are locally registered.
Expand Down
39 changes: 26 additions & 13 deletions include/pybind11/detail/type_caster_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,38 +35,51 @@ class loader_life_support {
loader_life_support* parent = nullptr;
std::unordered_set<PyObject *> keep_alive;

static loader_life_support** get_stack_pp() {
#if defined(WITH_THREAD)
thread_local static loader_life_support* per_thread_stack = nullptr;
return &per_thread_stack;
// Store stack pointer in thread-local storage.
static PYBIND11_TLS_KEY_REF get_stack_tls_key() {
# if PYBIND11_INTERNALS_VERSION == 4
return get_local_internals().loader_life_support_tls_key;
# else
return get_internals().loader_life_support_tls_key;
# endif
}
static loader_life_support *get_stack_top() {
return static_cast<loader_life_support *>(PYBIND11_TLS_GET_VALUE(get_stack_tls_key()));
}
static void set_stack_top(loader_life_support *value) {
PYBIND11_TLS_REPLACE_VALUE(get_stack_tls_key(), value);
}
#else
static loader_life_support* global_stack = nullptr;
return &global_stack;
#endif
// Use single global variable for stack.
static loader_life_support **get_stack_pp() {
static loader_life_support *global_stack = nullptr;
return global_stack;
}
static loader_life_support *get_stack_top() { return *get_stack_pp(); }
static void set_stack_top(loader_life_support *value) { *get_stack_pp() = value; }
#endif

public:
/// A new patient frame is created when a function is entered
loader_life_support() {
loader_life_support** stack = get_stack_pp();
parent = *stack;
*stack = this;
parent = get_stack_top();
set_stack_top(this);
}

/// ... and destroyed after it returns
~loader_life_support() {
loader_life_support** stack = get_stack_pp();
if (*stack != this)
if (get_stack_top() != this)
pybind11_fail("loader_life_support: internal error");
*stack = parent;
set_stack_top(parent);
for (auto* item : keep_alive)
Py_DECREF(item);
}

/// This can only be used inside a pybind11-bound function, either by `argument_loader`
/// at argument preparation time or by `py::cast()` at execution time.
PYBIND11_NOINLINE static void add_patient(handle h) {
loader_life_support* frame = *get_stack_pp();
loader_life_support *frame = get_stack_top();
if (!frame) {
// NOTE: It would be nice to include the stack frames here, as this indicates
// use of pybind11::cast<> outside the normal call framework, finding such
Expand Down
4 changes: 2 additions & 2 deletions include/pybind11/gil.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ PYBIND11_NAMESPACE_END(detail)
class gil_scoped_acquire {
public:
PYBIND11_NOINLINE gil_scoped_acquire() {
auto const &internals = detail::get_internals();
auto &internals = detail::get_internals();
tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate);

if (!tstate) {
Expand Down Expand Up @@ -132,7 +132,7 @@ class gil_scoped_release {
// `get_internals()` must be called here unconditionally in order to initialize
// `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an
// initialization race could occur as multiple threads try `gil_scoped_acquire`.
const auto &internals = detail::get_internals();
auto &internals = detail::get_internals();
tstate = PyEval_SaveThread();
if (disassoc) {
auto key = internals.tstate;
Expand Down
3 changes: 2 additions & 1 deletion tests/test_pytypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,8 @@ def test_issue2361():
assert m.issue2361_str_implicit_copy_none() == "None"
with pytest.raises(TypeError) as excinfo:
assert m.issue2361_dict_implicit_copy_none()
assert "'NoneType' object is not iterable" in str(excinfo.value)
assert "NoneType" in str(excinfo.value)
assert "iterable" in str(excinfo.value)


@pytest.mark.parametrize(
Expand Down