From 138fc0ef7caaffd5885c93c73133e81e60e637fc Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Aug 2023 12:09:32 -0700 Subject: [PATCH 01/17] Add function-by-version cache --- Include/internal/pycore_function.h | 9 +++++++ Objects/funcobject.c | 41 ++++++++++++++++++++++++++++-- Python/bytecodes.c | 3 ++- Python/executor_cases.c.h | 3 ++- Python/generated_cases.c.h | 3 ++- Python/sysmodule.c | 2 ++ 6 files changed, 56 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index e844d323ec7927..d059c3731b435f 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -16,13 +16,22 @@ extern PyObject* _PyFunction_Vectorcall( #define FUNC_MAX_WATCHERS 8 +#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ struct _py_func_state { uint32_t next_version; + // Function objects whose func_version % FUNC_VERSION_CACHE_SIZE + // once equaled the index in the table. The references are owned: + // Call _PyFunction_ClearByVersionCache() to clear. + PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE]; }; extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr); extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func); +extern void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version); +PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version); +void _PyFunction_ClearByVersionCache(void); + extern PyObject *_Py_set_function_type_params( PyThreadState* unused, PyObject *func, PyObject *type_params); diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 8c0bface3ac710..10655fd3def4f3 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -223,7 +223,44 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname return NULL; } -uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func) +void +_PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) +{ + func->func_version = version; + if (version != 0) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyFunctionObject **slot = + interp->func_state.func_version_cache + + (version % FUNC_VERSION_CACHE_SIZE); + Py_XSETREF(*slot, (PyFunctionObject *)Py_NewRef(func)); + } +} + +PyFunctionObject * +_PyFunction_LookupByVersion(uint32_t version) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyFunctionObject **slot = + interp->func_state.func_version_cache + + (version % FUNC_VERSION_CACHE_SIZE); + if (*slot != NULL && (*slot)->func_version == version) { + return (PyFunctionObject *)Py_NewRef(*slot); + } + return NULL; +} + +void +_PyFunction_ClearByVersionCache(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + for (int i = 0; i < FUNC_VERSION_CACHE_SIZE; i++) { + PyFunctionObject **slot = interp->func_state.func_version_cache + i; + Py_CLEAR(*slot); + } +} + +uint32_t +_PyFunction_GetVersionForCurrentState(PyFunctionObject *func) { if (func->func_version != 0) { return func->func_version; @@ -236,7 +273,7 @@ uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func) return 0; } uint32_t v = interp->func_state.next_version++; - func->func_version = v; + _PyFunction_SetVersion(func, v); return v; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ae2923c65b3308..c585269716f569 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3549,7 +3549,8 @@ dummy_func( goto error; } - func_obj->func_version = ((PyCodeObject *)codeobj)->co_version; + _PyFunction_SetVersion( + func_obj, ((PyCodeObject *)codeobj)->co_version); func = (PyObject *)func_obj; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b3dd3133530562..022cdba45f72ef 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2608,7 +2608,8 @@ goto error; } - func_obj->func_version = ((PyCodeObject *)codeobj)->co_version; + _PyFunction_SetVersion( + func_obj, ((PyCodeObject *)codeobj)->co_version); func = (PyObject *)func_obj; stack_pointer[-1] = func; break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 11d560a6e77adf..eab55aba806ad0 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4578,7 +4578,8 @@ goto error; } - func_obj->func_version = ((PyCodeObject *)codeobj)->co_version; + _PyFunction_SetVersion( + func_obj, ((PyCodeObject *)codeobj)->co_version); func = (PyObject *)func_obj; stack_pointer[-1] = func; DISPATCH(); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index be026d95ba7e77..f6511fd7b0c9dc 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2079,6 +2079,8 @@ sys__clear_type_cache_impl(PyObject *module) /*[clinic end generated code: output=20e48ca54a6f6971 input=127f3e04a8d9b555]*/ { PyType_ClearCache(); + // Also clear the function-by-version cache + _PyFunction_ClearByVersionCache(); Py_RETURN_NONE; } From 6e3143159ba83036ba54d17e815b304c776b8629 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Aug 2023 14:54:46 -0700 Subject: [PATCH 02/17] Trace into function calls - This uses the function-by-version cache I just added - There's not yet a way to trace back via `RETURN_VALUE` --- Python/optimizer.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Python/optimizer.c b/Python/optimizer.c index 559c4ae987263e..dd30d53063b4c6 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -448,6 +448,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); +top: // Jump here after _PUSH_FRAME for (;;) { RESERVE_RAW(2, "epilogue"); // Always need space for SAVE_IP and EXIT_TRACE ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0); @@ -621,6 +622,34 @@ translate_bytecode_to_trace( ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand); if (expansion->uops[i].uop == _PUSH_FRAME) { assert(i + 1 == nuops); + int func_version_offset = + offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT) + // Add one to account for the actual opcode/oparg pair: + + 1; + uint32_t func_version = read_u32(&instr[func_version_offset].cache); + PyFunctionObject *func = _PyFunction_LookupByVersion(func_version); + DPRINTF(3, "Function object: %p\n", func); + if (func != NULL) { + PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(func); + if (new_code == code) { + // Recursive call, bail (we could be here forever). + DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n", + PyUnicode_AsUTF8(new_code->co_qualname), + PyUnicode_AsUTF8(new_code->co_filename), + new_code->co_firstlineno); + ADD_TO_TRACE(SAVE_IP, 0, 0); + goto done; + } + code = new_code; + initial_instr = instr = _PyCode_CODE(code); + DPRINTF(2, + "Continuing in %s (%s:%d) at byte offset %d\n", + PyUnicode_AsUTF8(code->co_qualname), + PyUnicode_AsUTF8(code->co_filename), + code->co_firstlineno, + 2 * INSTR_IP(initial_instr, code)); + goto top; + } ADD_TO_TRACE(SAVE_IP, 0, 0); goto done; } From e2c8fa7e79420f74932aa90e7fc660d9e64482b6 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Aug 2023 18:40:41 -0700 Subject: [PATCH 03/17] Make RESUME a viable uop --- Include/internal/pycore_opcode_metadata.h | 1 + Lib/test/test_capi/test_misc.py | 1 + Python/bytecodes.c | 5 ++++- Python/executor_cases.c.h | 18 ++++++++++++++++++ Python/generated_cases.c.h | 5 ++++- 5 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index afe8aa172b703b..f771b616c0965a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1400,6 +1400,7 @@ extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACR #ifdef NEED_OPCODE_METADATA const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPANSION_SIZE] = { [NOP] = { .nuops = 1, .uops = { { NOP, 0, 0 } } }, + [RESUME] = { .nuops = 1, .uops = { { RESUME, 0, 0 } } }, [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { LOAD_FAST_CHECK, 0, 0 } } }, [LOAD_FAST] = { .nuops = 1, .uops = { { LOAD_FAST, 0, 0 } } }, [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { LOAD_FAST_AND_CLEAR, 0, 0 } } }, diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 3dfbfdc26e7416..18a0476122dabf 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2633,6 +2633,7 @@ def dummy(x): self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} self.assertIn("_PUSH_FRAME", uops) + self.assertIn("_BINARY_OP_ADD_INT", uops) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c585269716f569..f8a020b2e38cf9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -134,6 +134,7 @@ dummy_func( } inst(RESUME, (--)) { + #if TIER_ONE assert(tstate->cframe == &cframe); assert(frame == cframe.current_frame); /* Possibly combine this with eval breaker */ @@ -142,7 +143,9 @@ dummy_func( ERROR_IF(err, error); next_instr--; } - else if (oparg < 2) { + else + #endif + if (oparg < 2) { CHECK_EVAL_BREAKER(); } } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 022cdba45f72ef..87debe1e1a6815 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7,6 +7,24 @@ break; } + case RESUME: { + #if TIER_ONE + assert(tstate->cframe == &cframe); + assert(frame == cframe.current_frame); + /* Possibly combine this with eval breaker */ + if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) { + int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); + if (err) goto error; + next_instr--; + } + else + #endif + if (oparg < 2) { + CHECK_EVAL_BREAKER(); + } + break; + } + case LOAD_FAST_CHECK: { PyObject *value; value = GETLOCAL(oparg); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index eab55aba806ad0..2479e27ef45838 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8,6 +8,7 @@ } TARGET(RESUME) { + #if TIER_ONE assert(tstate->cframe == &cframe); assert(frame == cframe.current_frame); /* Possibly combine this with eval breaker */ @@ -16,7 +17,9 @@ if (err) goto error; next_instr--; } - else if (oparg < 2) { + else + #endif + if (oparg < 2) { CHECK_EVAL_BREAKER(); } DISPATCH(); From 5aeb1ea3e63229ac4973890056baf182a6303035 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Aug 2023 12:57:13 -0700 Subject: [PATCH 04/17] Clear func_version_cache in interpreter_clear() This should fix leaks and hopefully most failing tests. --- Include/internal/pycore_function.h | 2 +- Objects/funcobject.c | 3 +-- Python/pystate.c | 3 +++ Python/sysmodule.c | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index d059c3731b435f..cb3f856410d0d1 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -30,7 +30,7 @@ extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr) extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func); extern void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version); PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version); -void _PyFunction_ClearByVersionCache(void); +void _PyFunction_ClearByVersionCache(PyInterpreterState *interp); extern PyObject *_Py_set_function_type_params( PyThreadState* unused, PyObject *func, PyObject *type_params); diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 10655fd3def4f3..82c358301aa8bf 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -250,9 +250,8 @@ _PyFunction_LookupByVersion(uint32_t version) } void -_PyFunction_ClearByVersionCache(void) +_PyFunction_ClearByVersionCache(PyInterpreterState *interp) { - PyInterpreterState *interp = _PyInterpreterState_GET(); for (int i = 0; i < FUNC_VERSION_CACHE_SIZE; i++) { PyFunctionObject **slot = interp->func_state.func_version_cache + i; Py_CLEAR(*slot); diff --git a/Python/pystate.c b/Python/pystate.c index 3a05cb0fa7988d..321abe7a6c633b 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -840,6 +840,9 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) */ // XXX Make sure we properly deal with problematic finalizers. + interp->func_state.next_version = 0; // No more new versions + _PyFunction_ClearByVersionCache(interp); + Py_CLEAR(interp->audit_hooks); for (int i = 0; i < _PY_MONITORING_UNGROUPED_EVENTS; i++) { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index f6511fd7b0c9dc..f8cb8e6d61a048 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2080,7 +2080,8 @@ sys__clear_type_cache_impl(PyObject *module) { PyType_ClearCache(); // Also clear the function-by-version cache - _PyFunction_ClearByVersionCache(); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyFunction_ClearByVersionCache(interp); Py_RETURN_NONE; } From 129dd396dc0e9aa10f58822636c3d3476f0fbf00 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Aug 2023 14:12:04 -0700 Subject: [PATCH 05/17] Add a small essay on function and code versions --- Objects/funcobject.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 82c358301aa8bf..6a1d38da169163 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -223,6 +223,48 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname return NULL; } +/* +Function versions +----------------- + +Function versions are used to detect when a function object has been +updated, invalidating inline cache data used by the `CALL` bytecode +(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations). + +They are also used by the Tier 2 superblock creation code to find +the function being called (and from there the code object). + +How does a function's `func_version` field get initialized? + +- `PyFunction_New` and friends initialize it to 0. +- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`. +- It is reset to 0 when various attributes like `__code__` are set. +- A new version is allocated by `_PyFunction_GetVersionForCurrentState` + when the specializer needs a version and the version is 0. + +The latter allocates versions using a counter in the interpreter state; +when the counter wraps around to 0, no more versions are allocated. +There is one other special case: functions with a non-standard +`vectorcall` field are not given a version. + +When the function version is 0, the `CALL` bytecode is not specialized. + +Code object versions +-------------------- + +So where to code objects get their `co_version`? There is a single +static global counter, `_Py_next_func_version`. This is initialized in +the generated (!) file `Python/deepfreeze/deepfreeze.c`, to 1 plus the +number of deep-frozen function objects in that file. +(In `_bootstrap_python.c` and `freeze_module.c` it is initialized to 1.) + +Code objects get a new `co_version` allocated from this counter upon +creation. Since code objects are nominally immutable, `co_version` can +not be invalidated. The only way it can be 0 is when 2**32 or more +code objects have been created during the process's lifetime. +(The counter isn't reset by `fork()`, extending the lifetime.) +*/ + void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) { From 7579ff7d1e61c1f5bc1da39f25bc93bd13f96039 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Aug 2023 14:49:36 -0700 Subject: [PATCH 06/17] Move function cache clearing earlier in finalization --- Python/pylifecycle.c | 5 +++++ Python/pystate.c | 3 --- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 0de3abf9407899..b3319e81c89e96 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1767,6 +1767,11 @@ Py_FinalizeEx(void) // XXX assert(_Py_IsMainInterpreter(tstate->interp)); // XXX assert(_Py_IsMainThread()); + // The function version cache keeps functions alive, so clear it. + // It's used for optimizations, which we don't need in this stage. + tstate->interp->func_state.next_version = 0; // No more new versions + _PyFunction_ClearByVersionCache(tstate->interp); + // Block some operations. tstate->interp->finalizing = 1; diff --git a/Python/pystate.c b/Python/pystate.c index 321abe7a6c633b..3a05cb0fa7988d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -840,9 +840,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) */ // XXX Make sure we properly deal with problematic finalizers. - interp->func_state.next_version = 0; // No more new versions - _PyFunction_ClearByVersionCache(interp); - Py_CLEAR(interp->audit_hooks); for (int i = 0; i < _PY_MONITORING_UNGROUPED_EVENTS; i++) { From 60967905dce381e377eb81d6ea7f587bb40990af Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Aug 2023 18:56:14 -0700 Subject: [PATCH 07/17] Cache borrowed references, cleared in func_dealloc --- Include/internal/pycore_function.h | 8 ++++---- Objects/funcobject.c | 33 ++++++++++++++---------------- Python/abstract_interp_cases.c.h | 4 ++++ Python/pylifecycle.c | 5 ----- Python/sysmodule.c | 3 --- 5 files changed, 23 insertions(+), 30 deletions(-) diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index cb3f856410d0d1..3f3da8a44b77e4 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -19,9 +19,10 @@ extern PyObject* _PyFunction_Vectorcall( #define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ struct _py_func_state { uint32_t next_version; - // Function objects whose func_version % FUNC_VERSION_CACHE_SIZE - // once equaled the index in the table. The references are owned: - // Call _PyFunction_ClearByVersionCache() to clear. + // Borrowed references to function objects whose + // func_version % FUNC_VERSION_CACHE_SIZE + // once was equal to the index in the table. + // They are cleared when the function is deallocated. PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE]; }; @@ -30,7 +31,6 @@ extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr) extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func); extern void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version); PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version); -void _PyFunction_ClearByVersionCache(PyInterpreterState *interp); extern PyObject *_Py_set_function_type_params( PyThreadState* unused, PyObject *func, PyObject *type_params); diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 6a1d38da169163..33191d23f18230 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -271,10 +271,8 @@ _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) func->func_version = version; if (version != 0) { PyInterpreterState *interp = _PyInterpreterState_GET(); - PyFunctionObject **slot = - interp->func_state.func_version_cache - + (version % FUNC_VERSION_CACHE_SIZE); - Py_XSETREF(*slot, (PyFunctionObject *)Py_NewRef(func)); + interp->func_state.func_version_cache[ + version % FUNC_VERSION_CACHE_SIZE] = func; } } @@ -282,24 +280,14 @@ PyFunctionObject * _PyFunction_LookupByVersion(uint32_t version) { PyInterpreterState *interp = _PyInterpreterState_GET(); - PyFunctionObject **slot = - interp->func_state.func_version_cache - + (version % FUNC_VERSION_CACHE_SIZE); - if (*slot != NULL && (*slot)->func_version == version) { - return (PyFunctionObject *)Py_NewRef(*slot); + PyFunctionObject *func = interp->func_state.func_version_cache[ + version % FUNC_VERSION_CACHE_SIZE]; + if (func != NULL && func->func_version == version) { + return (PyFunctionObject *)Py_NewRef(func); } return NULL; } -void -_PyFunction_ClearByVersionCache(PyInterpreterState *interp) -{ - for (int i = 0; i < FUNC_VERSION_CACHE_SIZE; i++) { - PyFunctionObject **slot = interp->func_state.func_version_cache + i; - Py_CLEAR(*slot); - } -} - uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func) { @@ -929,6 +917,15 @@ func_dealloc(PyFunctionObject *op) if (op->func_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *) op); } + if (op->func_version != 0) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyFunctionObject **slot = + interp->func_state.func_version_cache + + (op->func_version % FUNC_VERSION_CACHE_SIZE); + if (*slot == op) { + *slot = NULL; + } + } (void)func_clear(op); // These aren't cleared by func_clear(). Py_DECREF(op->func_code); diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index eef071119bcd84..9db631651f154a 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -7,6 +7,10 @@ break; } + case RESUME: { + break; + } + case POP_TOP: { STACK_SHRINK(1); break; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index b3319e81c89e96..0de3abf9407899 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1767,11 +1767,6 @@ Py_FinalizeEx(void) // XXX assert(_Py_IsMainInterpreter(tstate->interp)); // XXX assert(_Py_IsMainThread()); - // The function version cache keeps functions alive, so clear it. - // It's used for optimizations, which we don't need in this stage. - tstate->interp->func_state.next_version = 0; // No more new versions - _PyFunction_ClearByVersionCache(tstate->interp); - // Block some operations. tstate->interp->finalizing = 1; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index f8cb8e6d61a048..be026d95ba7e77 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2079,9 +2079,6 @@ sys__clear_type_cache_impl(PyObject *module) /*[clinic end generated code: output=20e48ca54a6f6971 input=127f3e04a8d9b555]*/ { PyType_ClearCache(); - // Also clear the function-by-version cache - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyFunction_ClearByVersionCache(interp); Py_RETURN_NONE; } From 36a3e868e65ff4f44f0f771c9303df33bf36d2fd Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Aug 2023 11:45:42 -0700 Subject: [PATCH 08/17] Correctly set instr_fmt metadata for macros Macros not using oparg should have a format starting with IX, not IB. This corrects the metadata for a bunch of specializations, e.g. BINARY_OP_MULTIPLY_INT (which never looks at oparg). --- Include/internal/pycore_opcode_metadata.h | 20 ++++++++++---------- Tools/cases_generator/analysis.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index f771b616c0965a..d8d411b91c72d6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1191,7 +1191,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [STORE_FAST_STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [POP_TOP] = { true, INSTR_FMT_IX, 0 }, [PUSH_NULL] = { true, INSTR_FMT_IX, 0 }, - [END_FOR] = { true, INSTR_FMT_IB, 0 }, + [END_FOR] = { true, INSTR_FMT_IX, 0 }, [INSTRUMENTED_END_FOR] = { true, INSTR_FMT_IX, 0 }, [END_SEND] = { true, INSTR_FMT_IX, 0 }, [INSTRUMENTED_END_SEND] = { true, INSTR_FMT_IX, 0 }, @@ -1205,14 +1205,14 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, 0 }, [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, 0 }, [UNARY_INVERT] = { true, INSTR_FMT_IX, 0 }, - [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IBC, 0 }, - [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IBC, 0 }, - [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IBC, 0 }, - [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IBC, 0 }, - [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IBC, 0 }, - [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IBC, 0 }, - [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IBC, 0 }, - [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IB, HAS_LOCAL_FLAG }, + [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, 0 }, + [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, 0 }, + [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, 0 }, + [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, 0 }, + [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, 0 }, + [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, 0 }, + [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC, 0 }, + [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IX, HAS_LOCAL_FLAG }, [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, 0 }, [BINARY_SLICE] = { true, INSTR_FMT_IX, 0 }, [STORE_SLICE] = { true, INSTR_FMT_IX, 0 }, @@ -1259,7 +1259,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [DELETE_ATTR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG }, [STORE_GLOBAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG }, [DELETE_GLOBAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG }, - [LOAD_LOCALS] = { true, INSTR_FMT_IB, 0 }, + [LOAD_LOCALS] = { true, INSTR_FMT_IX, 0 }, [LOAD_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG }, [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG }, [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG }, diff --git a/Tools/cases_generator/analysis.py b/Tools/cases_generator/analysis.py index 2db1cd01c19ae5..593cabbe919740 100644 --- a/Tools/cases_generator/analysis.py +++ b/Tools/cases_generator/analysis.py @@ -367,7 +367,7 @@ def analyze_macro(self, macro: parsing.Macro) -> MacroInstruction: flags.add(instr.instr_flags) case _: typing.assert_never(component) - format = "IB" + format = "IB" if flags.HAS_ARG_FLAG else "IX" if offset: format += "C" + "0" * (offset - 1) return MacroInstruction(macro.name, format, flags, macro, parts, offset) From d31197dbdb76f38fec04b4ea0acf5c1f076040f5 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Aug 2023 13:14:44 -0700 Subject: [PATCH 09/17] Split RETURN_{VALUE,CONST} into uops (mostly works) --- Include/internal/pycore_ceval.h | 1 + Include/internal/pycore_opcode_metadata.h | 66 ++++++++++++--------- Python/abstract_interp_cases.c.h | 5 ++ Python/bytecodes.c | 46 ++++++++------- Python/ceval.c | 8 +-- Python/executor_cases.c.h | 31 ++++++++++ Python/generated_cases.c.h | 72 +++++++++++++++++------ Python/optimizer.c | 6 ++ 8 files changed, 162 insertions(+), 73 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 05b7380597812b..0e3a99be8c36aa 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -171,6 +171,7 @@ void _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject * PyObject *_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs); PyObject *_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys); int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, PyObject **sp); +void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); #ifdef __cplusplus diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index d8d411b91c72d6..f075c85a4c6c01 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -33,35 +33,36 @@ #define _BINARY_OP_SUBTRACT_FLOAT 309 #define _GUARD_BOTH_UNICODE 310 #define _BINARY_OP_ADD_UNICODE 311 -#define _LOAD_LOCALS 312 -#define _LOAD_FROM_DICT_OR_GLOBALS 313 -#define _GUARD_GLOBALS_VERSION 314 -#define _GUARD_BUILTINS_VERSION 315 -#define _LOAD_GLOBAL_MODULE 316 -#define _LOAD_GLOBAL_BUILTINS 317 -#define _GUARD_TYPE_VERSION 318 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 319 -#define _LOAD_ATTR_INSTANCE_VALUE 320 -#define IS_NONE 321 -#define _ITER_CHECK_LIST 322 -#define _IS_ITER_EXHAUSTED_LIST 323 -#define _ITER_NEXT_LIST 324 -#define _ITER_CHECK_TUPLE 325 -#define _IS_ITER_EXHAUSTED_TUPLE 326 -#define _ITER_NEXT_TUPLE 327 -#define _ITER_CHECK_RANGE 328 -#define _IS_ITER_EXHAUSTED_RANGE 329 -#define _ITER_NEXT_RANGE 330 -#define _CHECK_PEP_523 331 -#define _CHECK_FUNCTION_EXACT_ARGS 332 -#define _CHECK_STACK_SPACE 333 -#define _INIT_CALL_PY_EXACT_ARGS 334 -#define _PUSH_FRAME 335 -#define _POP_JUMP_IF_FALSE 336 -#define _POP_JUMP_IF_TRUE 337 -#define JUMP_TO_TOP 338 -#define SAVE_CURRENT_IP 339 -#define INSERT 340 +#define _POP_FRAME 312 +#define _LOAD_LOCALS 313 +#define _LOAD_FROM_DICT_OR_GLOBALS 314 +#define _GUARD_GLOBALS_VERSION 315 +#define _GUARD_BUILTINS_VERSION 316 +#define _LOAD_GLOBAL_MODULE 317 +#define _LOAD_GLOBAL_BUILTINS 318 +#define _GUARD_TYPE_VERSION 319 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 320 +#define _LOAD_ATTR_INSTANCE_VALUE 321 +#define IS_NONE 322 +#define _ITER_CHECK_LIST 323 +#define _IS_ITER_EXHAUSTED_LIST 324 +#define _ITER_NEXT_LIST 325 +#define _ITER_CHECK_TUPLE 326 +#define _IS_ITER_EXHAUSTED_TUPLE 327 +#define _ITER_NEXT_TUPLE 328 +#define _ITER_CHECK_RANGE 329 +#define _IS_ITER_EXHAUSTED_RANGE 330 +#define _ITER_NEXT_RANGE 331 +#define _CHECK_PEP_523 332 +#define _CHECK_FUNCTION_EXACT_ARGS 333 +#define _CHECK_STACK_SPACE 334 +#define _INIT_CALL_PY_EXACT_ARGS 335 +#define _PUSH_FRAME 336 +#define _POP_JUMP_IF_FALSE 337 +#define _POP_JUMP_IF_TRUE 338 +#define JUMP_TO_TOP 339 +#define SAVE_CURRENT_IP 340 +#define INSERT 341 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -197,6 +198,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return oparg; case INTERPRETER_EXIT: return 1; + case _POP_FRAME: + return 1; case RETURN_VALUE: return 1; case INSTRUMENTED_RETURN_VALUE: @@ -723,6 +726,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case INTERPRETER_EXIT: return 0; + case _POP_FRAME: + return 0; case RETURN_VALUE: return 0; case INSTRUMENTED_RETURN_VALUE: @@ -1445,6 +1450,8 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [DELETE_SUBSCR] = { .nuops = 1, .uops = { { DELETE_SUBSCR, 0, 0 } } }, [CALL_INTRINSIC_1] = { .nuops = 1, .uops = { { CALL_INTRINSIC_1, 0, 0 } } }, [CALL_INTRINSIC_2] = { .nuops = 1, .uops = { { CALL_INTRINSIC_2, 0, 0 } } }, + [RETURN_VALUE] = { .nuops = 1, .uops = { { _POP_FRAME, 0, 0 } } }, + [RETURN_CONST] = { .nuops = 2, .uops = { { LOAD_CONST, 0, 0 }, { _POP_FRAME, 0, 0 } } }, [GET_AITER] = { .nuops = 1, .uops = { { GET_AITER, 0, 0 } } }, [GET_ANEXT] = { .nuops = 1, .uops = { { GET_ANEXT, 0, 0 } } }, [GET_AWAITABLE] = { .nuops = 1, .uops = { { GET_AWAITABLE, 0, 0 } } }, @@ -1546,6 +1553,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_BINARY_OP_SUBTRACT_FLOAT] = "_BINARY_OP_SUBTRACT_FLOAT", [_GUARD_BOTH_UNICODE] = "_GUARD_BOTH_UNICODE", [_BINARY_OP_ADD_UNICODE] = "_BINARY_OP_ADD_UNICODE", + [_POP_FRAME] = "_POP_FRAME", [_LOAD_LOCALS] = "_LOAD_LOCALS", [_LOAD_FROM_DICT_OR_GLOBALS] = "_LOAD_FROM_DICT_OR_GLOBALS", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 9db631651f154a..1b99b929fa8014 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -195,6 +195,11 @@ break; } + case _POP_FRAME: { + STACK_SHRINK(1); + break; + } + case GET_AITER: { PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f8a020b2e38cf9..d1ad6d9ae0d370 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -763,21 +763,39 @@ dummy_func( return retval; } - inst(RETURN_VALUE, (retval --)) { - STACK_SHRINK(1); + // The stack effect here is ambiguous. + // We definitely pop the return value off the stack on entry. + // We also push it onto the stack on exit, but that's a + // different frame, and it's accounted for by _PUSH_FRAME. + op(_POP_FRAME, (retval --)) { assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); + SAVE_FRAME_STATE(); // Signals to the code generator + #if TIER_ONE _Py_LeaveRecursiveCallPy(tstate); assert(frame != &entry_frame); + #endif // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; + #if TIER_ONE frame = cframe.current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + #endif + #if TIER_TWO + frame = tstate->cframe->current_frame = dying->previous; + #endif + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); + #if TIER_ONE goto resume_frame; + #endif + #if TIER_TWO + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif } + macro(RETURN_VALUE) = _POP_FRAME; + inst(INSTRUMENTED_RETURN_VALUE, (retval --)) { int err = _Py_call_instrumentation_arg( tstate, PY_MONITORING_EVENT_PY_RETURN, @@ -791,27 +809,13 @@ dummy_func( // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; frame = cframe.current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; } - inst(RETURN_CONST, (--)) { - PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg); - Py_INCREF(retval); - assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); - _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); - // GH-99729: We need to unlink the frame *before* clearing it: - _PyInterpreterFrame *dying = frame; - frame = cframe.current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); - frame->prev_instr += frame->return_offset; - _PyFrame_StackPush(frame, retval); - goto resume_frame; - } + macro(RETURN_CONST) = LOAD_CONST + _POP_FRAME; inst(INSTRUMENTED_RETURN_CONST, (--)) { PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg); @@ -827,7 +831,7 @@ dummy_func( // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; frame = cframe.current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; diff --git a/Python/ceval.c b/Python/ceval.c index 26e741ed7c7547..3887bfdb76cd11 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -222,8 +222,6 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, static _PyInterpreterFrame * _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func, PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs); -static void -_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); #ifdef HAVE_ERRNO_H #include @@ -925,7 +923,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; frame = cframe.current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->return_offset = 0; if (frame == &entry_frame) { /* Restore previous cframe and exit */ @@ -1495,8 +1493,8 @@ clear_gen_frame(PyThreadState *tstate, _PyInterpreterFrame * frame) frame->previous = NULL; } -static void -_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame) +void +_PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame) { if (frame->owner == FRAME_OWNED_BY_THREAD) { clear_thread_frame(tstate, frame); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 87debe1e1a6815..b9b0670558c9ef 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -684,6 +684,37 @@ break; } + case _POP_FRAME: { + PyObject *retval; + retval = stack_pointer[-1]; + assert(EMPTY()); + SAVE_FRAME_STATE(); // Signals to the code generator + #if TIER_ONE + _Py_LeaveRecursiveCallPy(tstate); + assert(frame != &entry_frame); + #endif + // GH-99729: We need to unlink the frame *before* clearing it: + _PyInterpreterFrame *dying = frame; + #if TIER_ONE + frame = cframe.current_frame = dying->previous; + #endif + #if TIER_TWO + frame = tstate->cframe->current_frame = dying->previous; + #endif + _PyEval_FrameClearAndPop(tstate, dying); + frame->prev_instr += frame->return_offset; + _PyFrame_StackPush(frame, retval); + #if TIER_ONE + goto resume_frame; + #endif + #if TIER_TWO + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif + STACK_SHRINK(1); + break; + } + case GET_AITER: { PyObject *obj; PyObject *iter; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2479e27ef45838..098182232cc79d 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -976,19 +976,32 @@ TARGET(RETURN_VALUE) { PyObject *retval; retval = stack_pointer[-1]; - STACK_SHRINK(1); assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); + SAVE_FRAME_STATE(); // Signals to the code generator + #if TIER_ONE _Py_LeaveRecursiveCallPy(tstate); assert(frame != &entry_frame); + #endif // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; + #if TIER_ONE frame = cframe.current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + #endif + #if TIER_TWO + frame = tstate->cframe->current_frame = dying->previous; + #endif + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); + #if TIER_ONE goto resume_frame; + #endif + #if TIER_TWO + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif STACK_SHRINK(1); + DISPATCH(); } TARGET(INSTRUMENTED_RETURN_VALUE) { @@ -1006,7 +1019,7 @@ // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; frame = cframe.current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; @@ -1014,19 +1027,42 @@ } TARGET(RETURN_CONST) { - PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg); - Py_INCREF(retval); - assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); - _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); - // GH-99729: We need to unlink the frame *before* clearing it: - _PyInterpreterFrame *dying = frame; - frame = cframe.current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); - frame->prev_instr += frame->return_offset; - _PyFrame_StackPush(frame, retval); - goto resume_frame; + PyObject *value; + PyObject *retval; + // LOAD_CONST + { + value = GETITEM(FRAME_CO_CONSTS, oparg); + Py_INCREF(value); + } + // _POP_FRAME + retval = value; + { + assert(EMPTY()); + SAVE_FRAME_STATE(); // Signals to the code generator + #if TIER_ONE + _Py_LeaveRecursiveCallPy(tstate); + assert(frame != &entry_frame); + #endif + // GH-99729: We need to unlink the frame *before* clearing it: + _PyInterpreterFrame *dying = frame; + #if TIER_ONE + frame = cframe.current_frame = dying->previous; + #endif + #if TIER_TWO + frame = tstate->cframe->current_frame = dying->previous; + #endif + _PyEval_FrameClearAndPop(tstate, dying); + frame->prev_instr += frame->return_offset; + _PyFrame_StackPush(frame, retval); + #if TIER_ONE + goto resume_frame; + #endif + #if TIER_TWO + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif + } + DISPATCH(); } TARGET(INSTRUMENTED_RETURN_CONST) { @@ -1043,7 +1079,7 @@ // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; frame = cframe.current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; diff --git a/Python/optimizer.c b/Python/optimizer.c index dd30d53063b4c6..36bf035b0b1f4a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -619,6 +619,12 @@ translate_bytecode_to_trace( expansion->uops[i].offset); Py_FatalError("garbled expansion"); } + if (expansion->uops[i].uop == _POP_FRAME) { + // TODO: Move this code *after* adding it to the trace + // TODO: Continue in the previous code object, if any + ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0); + goto done; + } ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand); if (expansion->uops[i].uop == _PUSH_FRAME) { assert(i + 1 == nuops); From 01c1cdb080c85e2bfc874560ac8695d09c0912b7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Aug 2023 16:34:26 -0700 Subject: [PATCH 10/17] Change LLTRACE debug to trigger on PYTHONUOPSDEBUG >= 5, not 4 --- Python/ceval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index 3887bfdb76cd11..af557ba532e5de 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -736,7 +736,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // When tracing executed uops, also trace bytecode char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = (*uop_debug - '0') >= 4; // TODO: Parse an int and all that + lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that } } } From 41338800ef1238f9f3085722c66369bc59de563f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Aug 2023 17:08:53 -0700 Subject: [PATCH 11/17] Handle _POP_FRAME in superblock properly --- Python/optimizer.c | 53 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 36bf035b0b1f4a..c678346d5b477f 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -373,6 +373,8 @@ static PyTypeObject UOpExecutor_Type = { .tp_as_sequence = &uop_as_sequence, }; +#define TRACE_STACK_SIZE 5 + static int translate_bytecode_to_trace( PyCodeObject *code, @@ -380,10 +382,16 @@ translate_bytecode_to_trace( _PyUOpInstruction *trace, int buffer_size) { + PyCodeObject *initial_code = code; _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; int max_length = buffer_size; int reserved = 0; + struct { + PyCodeObject *code; + _Py_CODEUNIT *instr; + } trace_stack[TRACE_STACK_SIZE]; + int trace_stack_depth = 0; #ifdef Py_DEBUG char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); @@ -441,6 +449,24 @@ translate_bytecode_to_trace( // Reserve space for main+stub uops, plus 2 for SAVE_IP and EXIT_TRACE #define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode)) +// Trace stack operations (used by _PUSH_FRAME, _POP_FRAME) +#define TRACE_STACK_PUSH() \ + if (trace_stack_depth >= TRACE_STACK_SIZE) { \ + DPRINTF(2, "Trace stack overflow\n"); \ + ADD_TO_TRACE(SAVE_IP, 0, 0); \ + goto done; \ + } \ + trace_stack[trace_stack_depth].code = code; \ + trace_stack[trace_stack_depth].instr = instr; \ + trace_stack_depth++; +#define TRACE_STACK_POP() \ + if (trace_stack_depth <= 0) { \ + Py_FatalError("Trace stack underflow\n"); \ + } \ + trace_stack_depth--; \ + code = trace_stack[trace_stack_depth].code; \ + instr = trace_stack[trace_stack_depth].instr; + DPRINTF(4, "Optimizing %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), @@ -509,7 +535,7 @@ translate_bytecode_to_trace( case JUMP_BACKWARD: { - if (instr + 2 - oparg == initial_instr) { + if (instr + 2 - oparg == initial_instr && code == initial_code) { RESERVE(1, 0); ADD_TO_TRACE(JUMP_TO_TOP, 0, 0); } @@ -619,13 +645,17 @@ translate_bytecode_to_trace( expansion->uops[i].offset); Py_FatalError("garbled expansion"); } + ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand); if (expansion->uops[i].uop == _POP_FRAME) { - // TODO: Move this code *after* adding it to the trace - // TODO: Continue in the previous code object, if any - ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0); - goto done; + TRACE_STACK_POP(); + DPRINTF(2, + "Returning to %s (%s:%d) at byte offset %d\n", + PyUnicode_AsUTF8(code->co_qualname), + PyUnicode_AsUTF8(code->co_filename), + code->co_firstlineno, + 2 * INSTR_IP(instr, code)); + goto top; } - ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand); if (expansion->uops[i].uop == _PUSH_FRAME) { assert(i + 1 == nuops); int func_version_offset = @@ -646,14 +676,17 @@ translate_bytecode_to_trace( ADD_TO_TRACE(SAVE_IP, 0, 0); goto done; } + // Increment IP to the return address + instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; + TRACE_STACK_PUSH(); code = new_code; - initial_instr = instr = _PyCode_CODE(code); + instr = _PyCode_CODE(code); DPRINTF(2, "Continuing in %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), PyUnicode_AsUTF8(code->co_filename), code->co_firstlineno, - 2 * INSTR_IP(initial_instr, code)); + 2 * INSTR_IP(instr, code)); goto top; } ADD_TO_TRACE(SAVE_IP, 0, 0); @@ -674,6 +707,10 @@ translate_bytecode_to_trace( } // End for (;;) done: + while (trace_stack_depth > 0) { + TRACE_STACK_POP(); + } + assert(code == initial_code); // Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE if (trace_length > 3) { ADD_TO_TRACE(EXIT_TRACE, 0, 0); From 78929408a44c06b0960cfa3b6dc9d249801d8060 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Aug 2023 17:37:04 -0700 Subject: [PATCH 12/17] Handle trace stack underflow --- Python/optimizer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Python/optimizer.c b/Python/optimizer.c index c678346d5b477f..f282c041a4c59f 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -600,6 +600,14 @@ translate_bytecode_to_trace( // Reserve space for nuops (+ SAVE_IP + EXIT_TRACE) int nuops = expansion->nuops; RESERVE(nuops, 0); + if (expansion->uops[nuops-1].uop == _POP_FRAME) { + // Check for trace stack underflow now: + // We can't bail e.g. in the middle of + // LOAD_CONST + _POP_FRAME. + if (trace_stack_depth == 0) { + DPRINTF(2, "Trace stack underflow\n"); + goto done;} + } uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM for (int i = 0; i < nuops; i++) { oparg = orig_oparg; From 7760028dbd6a89712a42330c1bf54531105903f9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 15 Aug 2023 13:58:59 -0700 Subject: [PATCH 13/17] Add _Py_LeaveRecursiveCallPy to _POP_FRAME --- Python/bytecodes.c | 4 +--- Python/ceval.c | 4 ---- Python/ceval_macros.h | 4 ++++ Python/executor_cases.c.h | 4 +--- Python/generated_cases.c.h | 8 ++------ 5 files changed, 8 insertions(+), 16 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d1ad6d9ae0d370..bdd42430048ca9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -770,13 +770,11 @@ dummy_func( op(_POP_FRAME, (retval --)) { assert(EMPTY()); SAVE_FRAME_STATE(); // Signals to the code generator - #if TIER_ONE _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); - #endif // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; #if TIER_ONE + assert(frame != &entry_frame); frame = cframe.current_frame = dying->previous; #endif #if TIER_TWO diff --git a/Python/ceval.c b/Python/ceval.c index af557ba532e5de..e2b1f808c6d217 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -601,10 +601,6 @@ int _Py_CheckRecursiveCallPy( } -static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) { - tstate->py_recursion_remaining++; -} - static const _Py_CODEUNIT _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS[] = { /* Put a NOP at the start, so that the IP points into * the code, rather than before it */ diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 5e2db1e0b394e6..43e24feec5da1d 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -369,3 +369,7 @@ static inline int _Py_EnterRecursivePy(PyThreadState *tstate) { return (tstate->py_recursion_remaining-- <= 0) && _Py_CheckRecursiveCallPy(tstate); } + +static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) { + tstate->py_recursion_remaining++; +} diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b9b0670558c9ef..25168909882d1a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -689,13 +689,11 @@ retval = stack_pointer[-1]; assert(EMPTY()); SAVE_FRAME_STATE(); // Signals to the code generator - #if TIER_ONE _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); - #endif // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; #if TIER_ONE + assert(frame != &entry_frame); frame = cframe.current_frame = dying->previous; #endif #if TIER_TWO diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 098182232cc79d..73dd119ba94d8c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -978,13 +978,11 @@ retval = stack_pointer[-1]; assert(EMPTY()); SAVE_FRAME_STATE(); // Signals to the code generator - #if TIER_ONE _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); - #endif // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; #if TIER_ONE + assert(frame != &entry_frame); frame = cframe.current_frame = dying->previous; #endif #if TIER_TWO @@ -1039,13 +1037,11 @@ { assert(EMPTY()); SAVE_FRAME_STATE(); // Signals to the code generator - #if TIER_ONE _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); - #endif // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; #if TIER_ONE + assert(frame != &entry_frame); frame = cframe.current_frame = dying->previous; #endif #if TIER_TWO From 976f86751403ad7e9b3837a7410aeef3578cc404 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 15 Aug 2023 16:56:12 -0700 Subject: [PATCH 14/17] Ensure co_stacksize >= 1, else RETURN_CONST may crash --- Objects/codeobject.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 2c9c8cec77ff9f..4d6efe938f45d6 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -396,6 +396,9 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) int nlocals, ncellvars, nfreevars; get_localsplus_counts(con->localsplusnames, con->localspluskinds, &nlocals, &ncellvars, &nfreevars); + if (con->stacksize == 0) { + con->stacksize = 1; + } co->co_filename = Py_NewRef(con->filename); co->co_name = Py_NewRef(con->name); From 20786b28761c4153cc11c2aa1766fbf9769c2e60 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 15 Aug 2023 17:52:06 -0700 Subject: [PATCH 15/17] Fix failing test_code (co_stacksize is never 0) --- Lib/test/test_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index ca06a39f5df142..e056c16466e8c4 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -264,7 +264,7 @@ def func2(): ("co_posonlyargcount", 0), ("co_kwonlyargcount", 0), ("co_nlocals", 1), - ("co_stacksize", 0), + ("co_stacksize", 1), ("co_flags", code.co_flags | inspect.CO_COROUTINE), ("co_firstlineno", 100), ("co_code", code2.co_code), From 23e98957ef332bd6370950edd038135ddc5409ed Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 15 Aug 2023 18:23:05 -0700 Subject: [PATCH 16/17] Don't trace into functions if func_version != co_version Seems to fix test_opcache, playing games with func.__code__ = func.__code__.replace(). --- Python/optimizer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Python/optimizer.c b/Python/optimizer.c index f282c041a4c59f..57518404c3f19d 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -684,6 +684,14 @@ translate_bytecode_to_trace( ADD_TO_TRACE(SAVE_IP, 0, 0); goto done; } + if (new_code->co_version != func_version) { + // func.__code__ was updated. + // Perhaps it may happen again, so don't bother tracing. + // TODO: Reason about this -- is it better to bail or not? + DPRINTF(2, "Bailing because co_version != func_version\n"); + ADD_TO_TRACE(SAVE_IP, 0, 0); + goto done; + } // Increment IP to the return address instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; TRACE_STACK_PUSH(); From bde65470da9c0d679751802a00b57ba98866242b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 16 Aug 2023 15:23:15 -0700 Subject: [PATCH 17/17] Fix RETURN_VALUE/CONST with hacks --- Include/internal/pycore_opcode_metadata.h | 4 +- Python/bytecodes.c | 13 ++++- Python/executor_cases.c.h | 4 +- Python/generated_cases.c.h | 71 +++++++++++++++-------- Tools/cases_generator/analysis.py | 4 +- Tools/cases_generator/stacking.py | 2 +- 6 files changed, 64 insertions(+), 34 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index f075c85a4c6c01..396d194ed2734e 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1450,8 +1450,8 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [DELETE_SUBSCR] = { .nuops = 1, .uops = { { DELETE_SUBSCR, 0, 0 } } }, [CALL_INTRINSIC_1] = { .nuops = 1, .uops = { { CALL_INTRINSIC_1, 0, 0 } } }, [CALL_INTRINSIC_2] = { .nuops = 1, .uops = { { CALL_INTRINSIC_2, 0, 0 } } }, - [RETURN_VALUE] = { .nuops = 1, .uops = { { _POP_FRAME, 0, 0 } } }, - [RETURN_CONST] = { .nuops = 2, .uops = { { LOAD_CONST, 0, 0 }, { _POP_FRAME, 0, 0 } } }, + [RETURN_VALUE] = { .nuops = 3, .uops = { { SAVE_IP, 7, 0 }, { SAVE_CURRENT_IP, 0, 0 }, { _POP_FRAME, 0, 0 } } }, + [RETURN_CONST] = { .nuops = 4, .uops = { { LOAD_CONST, 0, 0 }, { SAVE_IP, 7, 0 }, { SAVE_CURRENT_IP, 0, 0 }, { _POP_FRAME, 0, 0 } } }, [GET_AITER] = { .nuops = 1, .uops = { { GET_AITER, 0, 0 } } }, [GET_ANEXT] = { .nuops = 1, .uops = { { GET_ANEXT, 0, 0 } } }, [GET_AWAITABLE] = { .nuops = 1, .uops = { { GET_AWAITABLE, 0, 0 } } }, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index bdd42430048ca9..4ba938ed71d4cf 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -769,7 +769,7 @@ dummy_func( // different frame, and it's accounted for by _PUSH_FRAME. op(_POP_FRAME, (retval --)) { assert(EMPTY()); - SAVE_FRAME_STATE(); // Signals to the code generator + _PyFrame_SetStackPointer(frame, stack_pointer); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; @@ -792,7 +792,10 @@ dummy_func( #endif } - macro(RETURN_VALUE) = _POP_FRAME; + macro(RETURN_VALUE) = + SAVE_IP + // Tier 2 only; special-cased oparg + SAVE_CURRENT_IP + // Sets frame->prev_instr + _POP_FRAME; inst(INSTRUMENTED_RETURN_VALUE, (retval --)) { int err = _Py_call_instrumentation_arg( @@ -813,7 +816,11 @@ dummy_func( goto resume_frame; } - macro(RETURN_CONST) = LOAD_CONST + _POP_FRAME; + macro(RETURN_CONST) = + LOAD_CONST + + SAVE_IP + // Tier 2 only; special-cased oparg + SAVE_CURRENT_IP + // Sets frame->prev_instr + _POP_FRAME; inst(INSTRUMENTED_RETURN_CONST, (--)) { PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 25168909882d1a..ac97cd5e855fad 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -687,8 +687,9 @@ case _POP_FRAME: { PyObject *retval; retval = stack_pointer[-1]; + STACK_SHRINK(1); assert(EMPTY()); - SAVE_FRAME_STATE(); // Signals to the code generator + _PyFrame_SetStackPointer(frame, stack_pointer); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; @@ -709,7 +710,6 @@ stack_pointer = _PyFrame_GetStackPointer(frame); ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; #endif - STACK_SHRINK(1); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 73dd119ba94d8c..96ec30c1c225c6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -975,31 +975,43 @@ TARGET(RETURN_VALUE) { PyObject *retval; + // SAVE_CURRENT_IP + { + #if TIER_ONE + frame->prev_instr = next_instr - 1; + #endif + #if TIER_TWO + // Relies on a preceding SAVE_IP + frame->prev_instr--; + #endif + } + // _POP_FRAME retval = stack_pointer[-1]; - assert(EMPTY()); - SAVE_FRAME_STATE(); // Signals to the code generator - _Py_LeaveRecursiveCallPy(tstate); - // GH-99729: We need to unlink the frame *before* clearing it: - _PyInterpreterFrame *dying = frame; - #if TIER_ONE - assert(frame != &entry_frame); - frame = cframe.current_frame = dying->previous; - #endif - #if TIER_TWO - frame = tstate->cframe->current_frame = dying->previous; - #endif - _PyEval_FrameClearAndPop(tstate, dying); - frame->prev_instr += frame->return_offset; - _PyFrame_StackPush(frame, retval); - #if TIER_ONE - goto resume_frame; - #endif - #if TIER_TWO - stack_pointer = _PyFrame_GetStackPointer(frame); - ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; - #endif STACK_SHRINK(1); - DISPATCH(); + { + assert(EMPTY()); + _PyFrame_SetStackPointer(frame, stack_pointer); + _Py_LeaveRecursiveCallPy(tstate); + // GH-99729: We need to unlink the frame *before* clearing it: + _PyInterpreterFrame *dying = frame; + #if TIER_ONE + assert(frame != &entry_frame); + frame = cframe.current_frame = dying->previous; + #endif + #if TIER_TWO + frame = tstate->cframe->current_frame = dying->previous; + #endif + _PyEval_FrameClearAndPop(tstate, dying); + frame->prev_instr += frame->return_offset; + _PyFrame_StackPush(frame, retval); + #if TIER_ONE + goto resume_frame; + #endif + #if TIER_TWO + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif + } } TARGET(INSTRUMENTED_RETURN_VALUE) { @@ -1032,11 +1044,21 @@ value = GETITEM(FRAME_CO_CONSTS, oparg); Py_INCREF(value); } + // SAVE_CURRENT_IP + { + #if TIER_ONE + frame->prev_instr = next_instr - 1; + #endif + #if TIER_TWO + // Relies on a preceding SAVE_IP + frame->prev_instr--; + #endif + } // _POP_FRAME retval = value; { assert(EMPTY()); - SAVE_FRAME_STATE(); // Signals to the code generator + _PyFrame_SetStackPointer(frame, stack_pointer); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; @@ -1058,7 +1080,6 @@ ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; #endif } - DISPATCH(); } TARGET(INSTRUMENTED_RETURN_CONST) { diff --git a/Tools/cases_generator/analysis.py b/Tools/cases_generator/analysis.py index 593cabbe919740..48f2db981c95b6 100644 --- a/Tools/cases_generator/analysis.py +++ b/Tools/cases_generator/analysis.py @@ -364,7 +364,9 @@ def analyze_macro(self, macro: parsing.Macro) -> MacroInstruction: case Instruction() as instr: part, offset = self.analyze_instruction(instr, offset) parts.append(part) - flags.add(instr.instr_flags) + if instr.name != "SAVE_IP": + # SAVE_IP in a macro is a no-op in Tier 1 + flags.add(instr.instr_flags) case _: typing.assert_never(component) format = "IB" if flags.HAS_ARG_FLAG else "IX" diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 8361eb99f88a7c..632298a567dd40 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -380,7 +380,7 @@ def write_components( poke.as_stack_effect(lax=True), ) - if mgr.instr.name == "_PUSH_FRAME": + if mgr.instr.name in ("_PUSH_FRAME", "_POP_FRAME"): # Adjust stack to min_offset (input effects materialized) out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high) # Use clone() since adjust_inverse() mutates final_offset