From 4759ba6eec9f0b36b24b8eb7e7b120d471c67e82 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 22 Nov 2024 17:55:33 +0000 Subject: [PATCH] gh-127022: Simplify `PyStackRef_FromPyObjectSteal` (#127024) This gets rid of the immortal check in `PyStackRef_FromPyObjectSteal()`. Overall, this improves performance about 2% in the free threading build. This also renames `PyStackRef_Is()` to `PyStackRef_IsExactly()` because the macro requires that the tag bits of the arguments match, which is only true in certain special cases. --- Include/internal/pycore_stackref.h | 14 +++++++--- Python/bytecodes.c | 42 ++++++++++++------------------ Python/executor_cases.c.h | 28 +++++++------------- Python/generated_cases.c.h | 40 ++++++++++++---------------- Tools/cases_generator/analyzer.py | 5 +++- 5 files changed, 57 insertions(+), 72 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 588e57f6cd97e0..90a3118352f7ae 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -99,8 +99,7 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj) assert(obj != NULL); // Make sure we don't take an already tagged value. assert(((uintptr_t)obj & Py_TAG_BITS) == 0); - unsigned int tag = _Py_IsImmortal(obj) ? (Py_TAG_DEFERRED) : Py_TAG_PTR; - return ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); + return (_PyStackRef){ .bits = (uintptr_t)obj }; } # define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj)) @@ -190,9 +189,16 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; #endif // Py_GIL_DISABLED -// Note: this is a macro because MSVC (Windows) has trouble inlining it. +// Check if a stackref is exactly the same as another stackref, including the +// the deferred bit. This can only be used safely if you know that the deferred +// bits of `a` and `b` match. +#define PyStackRef_IsExactly(a, b) \ + (assert(((a).bits & Py_TAG_BITS) == ((b).bits & Py_TAG_BITS)), (a).bits == (b).bits) -#define PyStackRef_Is(a, b) ((a).bits == (b).bits) +// Checks that mask out the deferred bit in the free threading build. +#define PyStackRef_IsNone(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_None) +#define PyStackRef_IsTrue(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_True) +#define PyStackRef_IsFalse(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_False) // Converts a PyStackRef back to a PyObject *, converting the // stackref to a new reference. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 83240d5b95be92..88e96afe4151f5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -376,7 +376,7 @@ dummy_func( pure inst(UNARY_NOT, (value -- res)) { assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_Is(value, PyStackRef_False) + res = PyStackRef_IsFalse(value) ? PyStackRef_True : PyStackRef_False; DEAD(value); } @@ -441,7 +441,7 @@ dummy_func( inst(TO_BOOL_NONE, (unused/1, unused/2, value -- res)) { // This one is a bit weird, because we expect *some* failures: - EXIT_IF(!PyStackRef_Is(value, PyStackRef_None)); + EXIT_IF(!PyStackRef_IsNone(value)); DEAD(value); STAT_INC(TO_BOOL, hit); res = PyStackRef_False; @@ -651,9 +651,7 @@ dummy_func( // specializations, but there is no output. // At the end we just skip over the STORE_FAST. op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { - #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; @@ -664,7 +662,7 @@ dummy_func( next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - DEOPT_IF(!PyStackRef_Is(*target_local, left)); + DEOPT_IF(PyStackRef_AsPyObjectBorrow(*target_local) != left_o); STAT_INC(BINARY_OP, hit); /* Handle `left = left + right` or `left += right` for str. * @@ -1141,7 +1139,7 @@ dummy_func( gen_frame->previous = frame; DISPATCH_INLINED(gen_frame); } - if (PyStackRef_Is(v, PyStackRef_None) && PyIter_Check(receiver_o)) { + if (PyStackRef_IsNone(v) && PyIter_Check(receiver_o)) { retval_o = Py_TYPE(receiver_o)->tp_iternext(receiver_o); } else { @@ -1249,7 +1247,7 @@ dummy_func( inst(POP_EXCEPT, (exc_value -- )) { _PyErr_StackItem *exc_info = tstate->exc_info; Py_XSETREF(exc_info->exc_value, - PyStackRef_Is(exc_value, PyStackRef_None) + PyStackRef_IsNone(exc_value) ? NULL : PyStackRef_AsPyObjectSteal(exc_value)); } @@ -2502,13 +2500,7 @@ dummy_func( } inst(IS_OP, (left, right -- b)) { -#ifdef Py_GIL_DISABLED - // On free-threaded builds, objects are conditionally immortalized. - // So their bits don't always compare equally. int res = Py_Is(PyStackRef_AsPyObjectBorrow(left), PyStackRef_AsPyObjectBorrow(right)) ^ oparg; -#else - int res = PyStackRef_Is(left, right) ^ oparg; -#endif DECREF_INPUTS(); b = res ? PyStackRef_True : PyStackRef_False; } @@ -2715,7 +2707,7 @@ dummy_func( replaced op(_POP_JUMP_IF_FALSE, (cond -- )) { assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); DEAD(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); @@ -2723,14 +2715,14 @@ dummy_func( replaced op(_POP_JUMP_IF_TRUE, (cond -- )) { assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); DEAD(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } op(_IS_NONE, (value -- b)) { - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; DEAD(value); } @@ -3774,7 +3766,7 @@ dummy_func( inst(EXIT_INIT_CHECK, (should_be_none -- )) { assert(STACK_LEVEL() == 2); - if (!PyStackRef_Is(should_be_none, PyStackRef_None)) { + if (!PyStackRef_IsNone(should_be_none)) { PyErr_Format(PyExc_TypeError, "__init__() should return None, not '%.200s'", Py_TYPE(PyStackRef_AsPyObjectBorrow(should_be_none))->tp_name); @@ -4734,7 +4726,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_TRUE, (unused/1 -- )) { _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -4743,7 +4735,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_FALSE, (unused/1 -- )) { _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -4751,7 +4743,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_NONE, (unused/1 -- )) { _PyStackRef value_stackref = POP(); - int flag = PyStackRef_Is(value_stackref, PyStackRef_None); + int flag = PyStackRef_IsNone(value_stackref); int offset; if (flag) { offset = oparg; @@ -4767,7 +4759,7 @@ dummy_func( inst(INSTRUMENTED_POP_JUMP_IF_NOT_NONE, (unused/1 -- )) { _PyStackRef value_stackref = POP(); int offset; - int nflag = PyStackRef_Is(value_stackref, PyStackRef_None); + int nflag = PyStackRef_IsNone(value_stackref); if (nflag) { offset = 0; } @@ -4802,21 +4794,21 @@ dummy_func( ///////// Tier-2 only opcodes ///////// op (_GUARD_IS_TRUE_POP, (flag -- )) { - int is_true = PyStackRef_Is(flag, PyStackRef_True); + int is_true = PyStackRef_IsTrue(flag); DEAD(flag); SYNC_SP(); EXIT_IF(!is_true); } op (_GUARD_IS_FALSE_POP, (flag -- )) { - int is_false = PyStackRef_Is(flag, PyStackRef_False); + int is_false = PyStackRef_IsFalse(flag); DEAD(flag); SYNC_SP(); EXIT_IF(!is_false); } op (_GUARD_IS_NONE_POP, (val -- )) { - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); if (!is_none) { PyStackRef_CLOSE(val); SYNC_SP(); @@ -4826,7 +4818,7 @@ dummy_func( } op (_GUARD_IS_NOT_NONE_POP, (val -- )) { - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); PyStackRef_CLOSE(val); SYNC_SP(); EXIT_IF(is_none); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 22de7ca11cd553..5af970ec4ae219 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -445,7 +445,7 @@ _PyStackRef res; value = stack_pointer[-1]; assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_Is(value, PyStackRef_False) + res = PyStackRef_IsFalse(value) ? PyStackRef_True : PyStackRef_False; stack_pointer[-1] = res; break; @@ -519,7 +519,7 @@ _PyStackRef res; value = stack_pointer[-1]; // This one is a bit weird, because we expect *some* failures: - if (!PyStackRef_Is(value, PyStackRef_None)) { + if (!PyStackRef_IsNone(value)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -822,9 +822,7 @@ _PyStackRef left; right = stack_pointer[-1]; left = stack_pointer[-2]; - #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE @@ -834,7 +832,7 @@ next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - if (!PyStackRef_Is(*target_local, left)) { + if (PyStackRef_AsPyObjectBorrow(*target_local) != left_o) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -1522,7 +1520,7 @@ _PyErr_StackItem *exc_info = tstate->exc_info; _PyFrame_SetStackPointer(frame, stack_pointer); Py_XSETREF(exc_info->exc_value, - PyStackRef_Is(exc_value, PyStackRef_None) + PyStackRef_IsNone(exc_value) ? NULL : PyStackRef_AsPyObjectSteal(exc_value)); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += -1; @@ -3110,13 +3108,7 @@ oparg = CURRENT_OPARG(); right = stack_pointer[-1]; left = stack_pointer[-2]; - #ifdef Py_GIL_DISABLED - // On free-threaded builds, objects are conditionally immortalized. - // So their bits don't always compare equally. int res = Py_Is(PyStackRef_AsPyObjectBorrow(left), PyStackRef_AsPyObjectBorrow(right)) ^ oparg; - #else - int res = PyStackRef_Is(left, right) ^ oparg; - #endif PyStackRef_CLOSE(left); PyStackRef_CLOSE(right); b = res ? PyStackRef_True : PyStackRef_False; @@ -3320,7 +3312,7 @@ _PyStackRef value; _PyStackRef b; value = stack_pointer[-1]; - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; } else { @@ -4562,7 +4554,7 @@ _PyStackRef should_be_none; should_be_none = stack_pointer[-1]; assert(STACK_LEVEL() == 2); - if (!PyStackRef_Is(should_be_none, PyStackRef_None)) { + if (!PyStackRef_IsNone(should_be_none)) { _PyFrame_SetStackPointer(frame, stack_pointer); PyErr_Format(PyExc_TypeError, "__init__() should return None, not '%.200s'", @@ -5643,7 +5635,7 @@ case _GUARD_IS_TRUE_POP: { _PyStackRef flag; flag = stack_pointer[-1]; - int is_true = PyStackRef_Is(flag, PyStackRef_True); + int is_true = PyStackRef_IsTrue(flag); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (!is_true) { @@ -5656,7 +5648,7 @@ case _GUARD_IS_FALSE_POP: { _PyStackRef flag; flag = stack_pointer[-1]; - int is_false = PyStackRef_Is(flag, PyStackRef_False); + int is_false = PyStackRef_IsFalse(flag); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (!is_false) { @@ -5669,7 +5661,7 @@ case _GUARD_IS_NONE_POP: { _PyStackRef val; val = stack_pointer[-1]; - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); if (!is_none) { PyStackRef_CLOSE(val); stack_pointer += -1; @@ -5687,7 +5679,7 @@ case _GUARD_IS_NOT_NONE_POP: { _PyStackRef val; val = stack_pointer[-1]; - int is_none = PyStackRef_Is(val, PyStackRef_None); + int is_none = PyStackRef_IsNone(val); PyStackRef_CLOSE(val); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 182c9cf9919845..36ec727eed3fc9 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -183,9 +183,7 @@ /* Skip 1 cache entry */ // _BINARY_OP_INPLACE_ADD_UNICODE { - #ifndef NDEBUG PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - #endif PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE @@ -195,7 +193,7 @@ next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - DEOPT_IF(!PyStackRef_Is(*target_local, left), BINARY_OP); + DEOPT_IF(PyStackRef_AsPyObjectBorrow(*target_local) != left_o, BINARY_OP); STAT_INC(BINARY_OP, hit); /* Handle `left = left + right` or `left += right` for str. * @@ -3824,7 +3822,7 @@ _PyStackRef should_be_none; should_be_none = stack_pointer[-1]; assert(STACK_LEVEL() == 2); - if (!PyStackRef_Is(should_be_none, PyStackRef_None)) { + if (!PyStackRef_IsNone(should_be_none)) { _PyFrame_SetStackPointer(frame, stack_pointer); PyErr_Format(PyExc_TypeError, "__init__() should return None, not '%.200s'", @@ -4760,7 +4758,7 @@ /* Skip 1 cache entry */ _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -4774,7 +4772,7 @@ INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_NONE); /* Skip 1 cache entry */ _PyStackRef value_stackref = POP(); - int flag = PyStackRef_Is(value_stackref, PyStackRef_None); + int flag = PyStackRef_IsNone(value_stackref); int offset; if (flag) { offset = oparg; @@ -4796,7 +4794,7 @@ /* Skip 1 cache entry */ _PyStackRef value_stackref = POP(); int offset; - int nflag = PyStackRef_Is(value_stackref, PyStackRef_None); + int nflag = PyStackRef_IsNone(value_stackref); if (nflag) { offset = 0; } @@ -4819,7 +4817,7 @@ /* Skip 1 cache entry */ _PyStackRef cond = POP(); assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); int offset = flag * oparg; RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); @@ -5040,13 +5038,7 @@ _PyStackRef b; right = stack_pointer[-1]; left = stack_pointer[-2]; - #ifdef Py_GIL_DISABLED - // On free-threaded builds, objects are conditionally immortalized. - // So their bits don't always compare equally. int res = Py_Is(PyStackRef_AsPyObjectBorrow(left), PyStackRef_AsPyObjectBorrow(right)) ^ oparg; - #else - int res = PyStackRef_Is(left, right) ^ oparg; - #endif PyStackRef_CLOSE(left); PyStackRef_CLOSE(right); b = res ? PyStackRef_True : PyStackRef_False; @@ -6663,7 +6655,7 @@ _PyErr_StackItem *exc_info = tstate->exc_info; _PyFrame_SetStackPointer(frame, stack_pointer); Py_XSETREF(exc_info->exc_value, - PyStackRef_Is(exc_value, PyStackRef_None) + PyStackRef_IsNone(exc_value) ? NULL : PyStackRef_AsPyObjectSteal(exc_value)); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += -1; @@ -6680,7 +6672,7 @@ /* Skip 1 cache entry */ cond = stack_pointer[-1]; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); stack_pointer += -1; @@ -6700,7 +6692,7 @@ // _IS_NONE { value = stack_pointer[-1]; - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; } else { @@ -6712,7 +6704,7 @@ { cond = b; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } @@ -6733,7 +6725,7 @@ // _IS_NONE { value = stack_pointer[-1]; - if (PyStackRef_Is(value, PyStackRef_None)) { + if (PyStackRef_IsNone(value)) { b = PyStackRef_True; } else { @@ -6745,7 +6737,7 @@ { cond = b; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_False); + int flag = PyStackRef_IsFalse(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } @@ -6763,7 +6755,7 @@ /* Skip 1 cache entry */ cond = stack_pointer[-1]; assert(PyStackRef_BoolCheck(cond)); - int flag = PyStackRef_Is(cond, PyStackRef_True); + int flag = PyStackRef_IsTrue(cond); RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); stack_pointer += -1; @@ -7100,7 +7092,7 @@ gen_frame->previous = frame; DISPATCH_INLINED(gen_frame); } - if (PyStackRef_Is(v, PyStackRef_None) && PyIter_Check(receiver_o)) { + if (PyStackRef_IsNone(v) && PyIter_Check(receiver_o)) { _PyFrame_SetStackPointer(frame, stack_pointer); retval_o = Py_TYPE(receiver_o)->tp_iternext(receiver_o); stack_pointer = _PyFrame_GetStackPointer(frame); @@ -7880,7 +7872,7 @@ /* Skip 2 cache entries */ value = stack_pointer[-1]; // This one is a bit weird, because we expect *some* failures: - DEOPT_IF(!PyStackRef_Is(value, PyStackRef_None), TO_BOOL); + DEOPT_IF(!PyStackRef_IsNone(value), TO_BOOL); STAT_INC(TO_BOOL, hit); res = PyStackRef_False; stack_pointer[-1] = res; @@ -7955,7 +7947,7 @@ _PyStackRef res; value = stack_pointer[-1]; assert(PyStackRef_BoolCheck(value)); - res = PyStackRef_Is(value, PyStackRef_False) + res = PyStackRef_IsFalse(value) ? PyStackRef_True : PyStackRef_False; stack_pointer[-1] = res; DISPATCH(); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index e02e07ec748231..eca851e6de87ae 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -548,7 +548,10 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", - "PyStackRef_Is", + "PyStackRef_IsExactly", + "PyStackRef_IsNone", + "PyStackRef_IsTrue", + "PyStackRef_IsFalse", "PyStackRef_IsNull", "PyStackRef_None", "PyStackRef_TYPE",