Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-128679: Fix tracemalloc.stop() race condition #128710

Closed
wants to merge 11 commits into from
10 changes: 8 additions & 2 deletions Lib/test/test_tracemalloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
from test.support.script_helper import (assert_python_ok, assert_python_failure,
interpreter_requires_environment)
from test import support
from test.support import os_helper
from test.support import force_not_colorized
from test.support import os_helper
from test.support import threading_helper

try:
import _testcapi
Expand Down Expand Up @@ -952,7 +953,6 @@ def check_env_var_invalid(self, nframe):
return
self.fail(f"unexpected output: {stderr!a}")


def test_env_var_invalid(self):
for nframe in INVALID_NFRAME:
with self.subTest(nframe=nframe):
Expand Down Expand Up @@ -1101,6 +1101,12 @@ def test_stop_untrack(self):
with self.assertRaises(RuntimeError):
self.untrack()

@unittest.skipIf(_testcapi is None, 'need _testcapi')
@threading_helper.requires_working_threading()
def test_tracemalloc_track_race(self):
# gh-128679: Test fix for tracemalloc.stop() race condition
_testcapi.tracemalloc_track_race()


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix :func:`tracemalloc.stop` race condition. Fix :mod:`tracemalloc` to
support calling :func:`tracemalloc.stop` in one thread, while another thread
is tracing memory allocations. Patch by Victor Stinner.
99 changes: 99 additions & 0 deletions Modules/_testcapimodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -3435,6 +3435,104 @@ code_offset_to_line(PyObject* self, PyObject* const* args, Py_ssize_t nargsf)
return PyLong_FromInt32(PyCode_Addr2Line(code, offset));
}


static void
tracemalloc_track_race_thread(void *data)
{
PyTraceMalloc_Track(123, 10, 1);

PyThread_type_lock lock = (PyThread_type_lock)data;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a lot easier to use a PyMutex, considering you don't have to heap allocate it. I think it works without a thread state if you use _PyMutex_LockFlags(mutex, _Py_LOCK_DONT_DETACH)

PyThread_release_lock(lock);
}

// gh-128679: Test fix for tracemalloc.stop() race condition
static PyObject *
tracemalloc_track_race(PyObject *self, PyObject *args)
{
#define NTHREAD 50
PyObject *tracemalloc = NULL;
PyObject *stop = NULL;
PyThread_type_lock locks[NTHREAD];
memset(locks, 0, sizeof(locks));

// Call tracemalloc.start()
tracemalloc = PyImport_ImportModule("tracemalloc");
if (tracemalloc == NULL) {
goto error;
}
PyObject *start = PyObject_GetAttrString(tracemalloc, "start");
if (start == NULL) {
goto error;
}
PyObject *res = PyObject_CallNoArgs(start);
Py_DECREF(start);
if (res == NULL) {
goto error;
}
Py_DECREF(res);

stop = PyObject_GetAttrString(tracemalloc, "stop");
Py_CLEAR(tracemalloc);
if (stop == NULL) {
goto error;
}

// Start threads
for (size_t i = 0; i < NTHREAD; i++) {
PyThread_type_lock lock = PyThread_allocate_lock();
if (!lock) {
PyErr_NoMemory();
goto error;
}
locks[i] = lock;
PyThread_acquire_lock(lock, 1);

unsigned long thread;
thread = PyThread_start_new_thread(tracemalloc_track_race_thread,
(void*)lock);
if (thread == (unsigned long)-1) {
PyErr_SetString(PyExc_RuntimeError, "can't start new thread");
goto error;
}
}

// Call tracemalloc.stop() while threads are running
res = PyObject_CallNoArgs(stop);
vstinner marked this conversation as resolved.
Show resolved Hide resolved
Py_CLEAR(stop);
if (res == NULL) {
goto error;
}
Py_DECREF(res);

// Wait until threads complete with the GIL released
Py_BEGIN_ALLOW_THREADS
for (size_t i = 0; i < NTHREAD; i++) {
PyThread_type_lock lock = locks[i];
PyThread_acquire_lock(lock, 1);
PyThread_release_lock(lock);
}
Py_END_ALLOW_THREADS

// Free threads locks
for (size_t i=0; i < NTHREAD; i++) {
PyThread_type_lock lock = locks[i];
PyThread_free_lock(lock);
}
Py_RETURN_NONE;

error:
Py_CLEAR(tracemalloc);
Py_CLEAR(stop);
for (size_t i=0; i < NTHREAD; i++) {
PyThread_type_lock lock = locks[i];
if (lock) {
PyThread_free_lock(lock);
}
}
return NULL;
#undef NTHREAD
}

static PyMethodDef TestMethods[] = {
{"set_errno", set_errno, METH_VARARGS},
{"test_config", test_config, METH_NOARGS},
Expand Down Expand Up @@ -3578,6 +3676,7 @@ static PyMethodDef TestMethods[] = {
{"type_freeze", type_freeze, METH_VARARGS},
{"test_atexit", test_atexit, METH_NOARGS},
{"code_offset_to_line", _PyCFunction_CAST(code_offset_to_line), METH_FASTCALL},
{"tracemalloc_track_race", tracemalloc_track_race, METH_NOARGS},
{NULL, NULL} /* sentinel */
};

Expand Down
67 changes: 56 additions & 11 deletions Python/tracemalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -603,18 +603,39 @@ tracemalloc_realloc(void *ctx, void *ptr, size_t new_size)
static void
tracemalloc_free(void *ctx, void *ptr)
{
if (ptr == NULL)
return;

PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx;

alloc->free(alloc->ctx, ptr);

TABLES_LOCK();
REMOVE_TRACE(ptr);
TABLES_UNLOCK();
}


static void
tracemalloc_raw_free(void *ctx, void *ptr)
{
if (ptr == NULL)
return;

PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx;

/* GIL cannot be locked in PyMem_RawFree() because it would introduce
a deadlock in _PyThreadState_DeleteCurrent(). */

alloc->free(alloc->ctx, ptr);

TABLES_LOCK();
REMOVE_TRACE(ptr);
if (tracemalloc_config.tracing) {
REMOVE_TRACE(ptr);
}
else {
// gh-128679: tracemalloc.stop() was called by another thread
}
Comment on lines +636 to +638
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can remove the else:

Suggested change
else {
// gh-128679: tracemalloc.stop() was called by another thread
}
// gh-128679: If we're not tracing, then tracemalloc.stop() was called by another thread

TABLES_UNLOCK();
}

Expand Down Expand Up @@ -712,7 +733,18 @@ tracemalloc_raw_alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
set_reentrant(1);

gil_state = PyGILState_Ensure();
ptr = tracemalloc_alloc(use_calloc, ctx, nelem, elsize);
if (tracemalloc_config.tracing) {
ptr = tracemalloc_alloc(use_calloc, ctx, nelem, elsize);
}
else {
// gh-128679: tracemalloc.stop() was called by another thread during
// PyGILState_Ensure() call.
PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx;
if (use_calloc)
ptr = alloc->calloc(alloc->ctx, nelem, elsize);
else
ptr = alloc->malloc(alloc->ctx, nelem * elsize);
}
PyGILState_Release(gil_state);

set_reentrant(0);
Expand Down Expand Up @@ -779,17 +811,15 @@ tracemalloc_clear_filename(void *value)

/* reentrant flag must be set to call this function and GIL must be held */
static void
tracemalloc_clear_traces(void)
tracemalloc_clear_traces_unlocked(void)
{
/* The GIL protects variables against concurrent access */
assert(PyGILState_Check());

TABLES_LOCK();
_Py_hashtable_clear(tracemalloc_traces);
_Py_hashtable_clear(tracemalloc_domains);
tracemalloc_traced_memory = 0;
tracemalloc_peak_traced_memory = 0;
TABLES_UNLOCK();

_Py_hashtable_clear(tracemalloc_tracebacks);

Expand Down Expand Up @@ -930,7 +960,7 @@ _PyTraceMalloc_Start(int max_nframe)
alloc.malloc = tracemalloc_raw_malloc;
alloc.calloc = tracemalloc_raw_calloc;
alloc.realloc = tracemalloc_raw_realloc;
alloc.free = tracemalloc_free;
alloc.free = tracemalloc_raw_free;

alloc.ctx = &allocators.raw;
PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw);
Expand Down Expand Up @@ -963,6 +993,10 @@ _PyTraceMalloc_Stop(void)
if (!tracemalloc_config.tracing)
return;

// Lock to synchronize with tracemalloc_raw_free() which checks
// 'tracing' while holding the lock.
TABLES_LOCK();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I said in the other PR, relying on lock ordering like this is really error prone. If we hold the GIL, then we should be able to use tracemalloc_config.tracing without a lock, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I said in the other PR, relying on lock ordering like this is really error prone.

Would you mind to elaborate?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my comment: #128710 (comment)


/* stop tracing Python memory allocations */
tracemalloc_config.tracing = 0;

Expand All @@ -973,11 +1007,13 @@ _PyTraceMalloc_Stop(void)
PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem);
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj);

tracemalloc_clear_traces();
tracemalloc_clear_traces_unlocked();

/* release memory */
raw_free(tracemalloc_traceback);
tracemalloc_traceback = NULL;

TABLES_UNLOCK();
}


Expand Down Expand Up @@ -1317,9 +1353,16 @@ PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's either lock the read above this or remove it entirely, then LGTM for 3.12.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wrote #128897 to fix Python 3.12 and 3.13.

gil_state = PyGILState_Ensure();

TABLES_LOCK();
res = tracemalloc_add_trace(domain, ptr, size);
TABLES_UNLOCK();
if (tracemalloc_config.tracing) {
TABLES_LOCK();
res = tracemalloc_add_trace(domain, ptr, size);
TABLES_UNLOCK();
}
else {
// gh-128679: tracemalloc.stop() was called by another thread during
// PyGILState_Ensure() call.
res = 0;
}

PyGILState_Release(gil_state);
return res;
Expand Down Expand Up @@ -1418,7 +1461,9 @@ _PyTraceMalloc_ClearTraces(void)
return;
}
set_reentrant(1);
tracemalloc_clear_traces();
TABLES_LOCK();
tracemalloc_clear_traces_unlocked();
TABLES_UNLOCK();
set_reentrant(0);
}

Expand Down
Loading