-
-
Notifications
You must be signed in to change notification settings - Fork 31k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gh-128679: Fix tracemalloc.stop() race condition #128710
Changes from all commits
8f58d77
1e3eac3
59fd8de
0e6de65
48e172d
146167b
aceff9b
61edb4c
9df058a
ecfa30b
997148b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Fix :func:`tracemalloc.stop` race condition. Fix :mod:`tracemalloc` to | ||
support calling :func:`tracemalloc.stop` in one thread, while another thread | ||
is tracing memory allocations. Patch by Victor Stinner. |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -603,18 +603,39 @@ tracemalloc_realloc(void *ctx, void *ptr, size_t new_size) | |||||||||
static void | ||||||||||
tracemalloc_free(void *ctx, void *ptr) | ||||||||||
{ | ||||||||||
if (ptr == NULL) | ||||||||||
return; | ||||||||||
|
||||||||||
PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; | ||||||||||
|
||||||||||
alloc->free(alloc->ctx, ptr); | ||||||||||
|
||||||||||
TABLES_LOCK(); | ||||||||||
REMOVE_TRACE(ptr); | ||||||||||
TABLES_UNLOCK(); | ||||||||||
} | ||||||||||
|
||||||||||
|
||||||||||
static void | ||||||||||
tracemalloc_raw_free(void *ctx, void *ptr) | ||||||||||
{ | ||||||||||
if (ptr == NULL) | ||||||||||
return; | ||||||||||
|
||||||||||
PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; | ||||||||||
|
||||||||||
/* GIL cannot be locked in PyMem_RawFree() because it would introduce | ||||||||||
a deadlock in _PyThreadState_DeleteCurrent(). */ | ||||||||||
|
||||||||||
alloc->free(alloc->ctx, ptr); | ||||||||||
|
||||||||||
TABLES_LOCK(); | ||||||||||
REMOVE_TRACE(ptr); | ||||||||||
if (tracemalloc_config.tracing) { | ||||||||||
REMOVE_TRACE(ptr); | ||||||||||
} | ||||||||||
else { | ||||||||||
// gh-128679: tracemalloc.stop() was called by another thread | ||||||||||
} | ||||||||||
Comment on lines
+636
to
+638
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can remove the
Suggested change
|
||||||||||
TABLES_UNLOCK(); | ||||||||||
} | ||||||||||
|
||||||||||
|
@@ -712,7 +733,18 @@ tracemalloc_raw_alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) | |||||||||
set_reentrant(1); | ||||||||||
|
||||||||||
gil_state = PyGILState_Ensure(); | ||||||||||
ptr = tracemalloc_alloc(use_calloc, ctx, nelem, elsize); | ||||||||||
if (tracemalloc_config.tracing) { | ||||||||||
ptr = tracemalloc_alloc(use_calloc, ctx, nelem, elsize); | ||||||||||
} | ||||||||||
else { | ||||||||||
// gh-128679: tracemalloc.stop() was called by another thread during | ||||||||||
// PyGILState_Ensure() call. | ||||||||||
PyMemAllocatorEx *alloc = (PyMemAllocatorEx *)ctx; | ||||||||||
if (use_calloc) | ||||||||||
ptr = alloc->calloc(alloc->ctx, nelem, elsize); | ||||||||||
else | ||||||||||
ptr = alloc->malloc(alloc->ctx, nelem * elsize); | ||||||||||
} | ||||||||||
PyGILState_Release(gil_state); | ||||||||||
|
||||||||||
set_reentrant(0); | ||||||||||
|
@@ -779,17 +811,15 @@ tracemalloc_clear_filename(void *value) | |||||||||
|
||||||||||
/* reentrant flag must be set to call this function and GIL must be held */ | ||||||||||
static void | ||||||||||
tracemalloc_clear_traces(void) | ||||||||||
tracemalloc_clear_traces_unlocked(void) | ||||||||||
{ | ||||||||||
/* The GIL protects variables against concurrent access */ | ||||||||||
assert(PyGILState_Check()); | ||||||||||
|
||||||||||
TABLES_LOCK(); | ||||||||||
_Py_hashtable_clear(tracemalloc_traces); | ||||||||||
_Py_hashtable_clear(tracemalloc_domains); | ||||||||||
tracemalloc_traced_memory = 0; | ||||||||||
tracemalloc_peak_traced_memory = 0; | ||||||||||
TABLES_UNLOCK(); | ||||||||||
|
||||||||||
_Py_hashtable_clear(tracemalloc_tracebacks); | ||||||||||
|
||||||||||
|
@@ -930,7 +960,7 @@ _PyTraceMalloc_Start(int max_nframe) | |||||||||
alloc.malloc = tracemalloc_raw_malloc; | ||||||||||
alloc.calloc = tracemalloc_raw_calloc; | ||||||||||
alloc.realloc = tracemalloc_raw_realloc; | ||||||||||
alloc.free = tracemalloc_free; | ||||||||||
alloc.free = tracemalloc_raw_free; | ||||||||||
|
||||||||||
alloc.ctx = &allocators.raw; | ||||||||||
PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &allocators.raw); | ||||||||||
|
@@ -963,6 +993,10 @@ _PyTraceMalloc_Stop(void) | |||||||||
if (!tracemalloc_config.tracing) | ||||||||||
return; | ||||||||||
|
||||||||||
// Lock to synchronize with tracemalloc_raw_free() which checks | ||||||||||
// 'tracing' while holding the lock. | ||||||||||
TABLES_LOCK(); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I said in the other PR, relying on lock ordering like this is really error prone. If we hold the GIL, then we should be able to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Would you mind to elaborate? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See my comment: #128710 (comment) |
||||||||||
|
||||||||||
/* stop tracing Python memory allocations */ | ||||||||||
tracemalloc_config.tracing = 0; | ||||||||||
|
||||||||||
|
@@ -973,11 +1007,13 @@ _PyTraceMalloc_Stop(void) | |||||||||
PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem); | ||||||||||
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj); | ||||||||||
|
||||||||||
tracemalloc_clear_traces(); | ||||||||||
tracemalloc_clear_traces_unlocked(); | ||||||||||
|
||||||||||
/* release memory */ | ||||||||||
raw_free(tracemalloc_traceback); | ||||||||||
tracemalloc_traceback = NULL; | ||||||||||
|
||||||||||
TABLES_UNLOCK(); | ||||||||||
} | ||||||||||
|
||||||||||
|
||||||||||
|
@@ -1317,9 +1353,16 @@ PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr, | |||||||||
|
||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's either lock the read above this or remove it entirely, then LGTM for 3.12. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wrote #128897 to fix Python 3.12 and 3.13. |
||||||||||
gil_state = PyGILState_Ensure(); | ||||||||||
|
||||||||||
TABLES_LOCK(); | ||||||||||
res = tracemalloc_add_trace(domain, ptr, size); | ||||||||||
TABLES_UNLOCK(); | ||||||||||
if (tracemalloc_config.tracing) { | ||||||||||
TABLES_LOCK(); | ||||||||||
res = tracemalloc_add_trace(domain, ptr, size); | ||||||||||
TABLES_UNLOCK(); | ||||||||||
} | ||||||||||
else { | ||||||||||
// gh-128679: tracemalloc.stop() was called by another thread during | ||||||||||
// PyGILState_Ensure() call. | ||||||||||
res = 0; | ||||||||||
} | ||||||||||
|
||||||||||
PyGILState_Release(gil_state); | ||||||||||
return res; | ||||||||||
|
@@ -1418,7 +1461,9 @@ _PyTraceMalloc_ClearTraces(void) | |||||||||
return; | ||||||||||
} | ||||||||||
set_reentrant(1); | ||||||||||
tracemalloc_clear_traces(); | ||||||||||
TABLES_LOCK(); | ||||||||||
tracemalloc_clear_traces_unlocked(); | ||||||||||
TABLES_UNLOCK(); | ||||||||||
set_reentrant(0); | ||||||||||
} | ||||||||||
|
||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a lot easier to use a
PyMutex
, considering you don't have to heap allocate it. I think it works without a thread state if you use_PyMutex_LockFlags(mutex, _Py_LOCK_DONT_DETACH)