From 1a18d22f7de9cbcd71634d6a8782bcf850a2dc85 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 20 Nov 2023 10:08:53 -0800 Subject: [PATCH] gh-106529: Make FOR_ITER a viable uop (#112134) This uses the new mechanism whereby certain uops are replaced by others during translation, using the `_PyUop_Replacements` table. We further special-case the `_FOR_ITER_TIER_TWO` uop to update the deoptimization target to point just past the corresponding `END_FOR` opcode. Two tiny code cleanups are also part of this PR. --- Include/internal/pycore_opcode_metadata.h | 86 ++++++++++--------- Lib/test/test_capi/test_misc.py | 30 +++++++ ...-11-15-16-14-10.gh-issue-106529.Y48ax9.rst | 1 + Python/abstract_interp_cases.c.h | 6 ++ Python/bytecodes.c | 23 ++++- Python/ceval.c | 4 +- Python/executor_cases.c.h | 25 ++++++ Python/optimizer.c | 6 ++ 8 files changed, 138 insertions(+), 43 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 4d98b23df5d927c..4e45725d3934796 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -81,45 +81,46 @@ #define _IS_NONE 353 #define _SPECIALIZE_FOR_ITER 354 #define _FOR_ITER 355 -#define _ITER_CHECK_LIST 356 -#define _ITER_JUMP_LIST 357 -#define _GUARD_NOT_EXHAUSTED_LIST 358 -#define _ITER_NEXT_LIST 359 -#define _ITER_CHECK_TUPLE 360 -#define _ITER_JUMP_TUPLE 361 -#define _GUARD_NOT_EXHAUSTED_TUPLE 362 -#define _ITER_NEXT_TUPLE 363 -#define _ITER_CHECK_RANGE 364 -#define _ITER_JUMP_RANGE 365 -#define _GUARD_NOT_EXHAUSTED_RANGE 366 -#define _ITER_NEXT_RANGE 367 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 368 -#define _GUARD_KEYS_VERSION 369 -#define _LOAD_ATTR_METHOD_WITH_VALUES 370 -#define _LOAD_ATTR_METHOD_NO_DICT 371 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 372 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 373 -#define _CHECK_ATTR_METHOD_LAZY_DICT 374 -#define _LOAD_ATTR_METHOD_LAZY_DICT 375 -#define _SPECIALIZE_CALL 376 -#define _CALL 377 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 378 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 379 -#define _CHECK_PEP_523 380 -#define _CHECK_FUNCTION_EXACT_ARGS 381 -#define _CHECK_STACK_SPACE 382 -#define _INIT_CALL_PY_EXACT_ARGS 383 -#define _PUSH_FRAME 384 -#define _SPECIALIZE_BINARY_OP 385 -#define _BINARY_OP 386 -#define _GUARD_IS_TRUE_POP 387 -#define _GUARD_IS_FALSE_POP 388 -#define _GUARD_IS_NONE_POP 389 -#define _GUARD_IS_NOT_NONE_POP 390 -#define _JUMP_TO_TOP 391 -#define _SAVE_RETURN_OFFSET 392 -#define _INSERT 393 -#define _CHECK_VALIDITY 394 +#define _FOR_ITER_TIER_TWO 356 +#define _ITER_CHECK_LIST 357 +#define _ITER_JUMP_LIST 358 +#define _GUARD_NOT_EXHAUSTED_LIST 359 +#define _ITER_NEXT_LIST 360 +#define _ITER_CHECK_TUPLE 361 +#define _ITER_JUMP_TUPLE 362 +#define _GUARD_NOT_EXHAUSTED_TUPLE 363 +#define _ITER_NEXT_TUPLE 364 +#define _ITER_CHECK_RANGE 365 +#define _ITER_JUMP_RANGE 366 +#define _GUARD_NOT_EXHAUSTED_RANGE 367 +#define _ITER_NEXT_RANGE 368 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 369 +#define _GUARD_KEYS_VERSION 370 +#define _LOAD_ATTR_METHOD_WITH_VALUES 371 +#define _LOAD_ATTR_METHOD_NO_DICT 372 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 373 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 374 +#define _CHECK_ATTR_METHOD_LAZY_DICT 375 +#define _LOAD_ATTR_METHOD_LAZY_DICT 376 +#define _SPECIALIZE_CALL 377 +#define _CALL 378 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 379 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 380 +#define _CHECK_PEP_523 381 +#define _CHECK_FUNCTION_EXACT_ARGS 382 +#define _CHECK_STACK_SPACE 383 +#define _INIT_CALL_PY_EXACT_ARGS 384 +#define _PUSH_FRAME 385 +#define _SPECIALIZE_BINARY_OP 386 +#define _BINARY_OP 387 +#define _GUARD_IS_TRUE_POP 388 +#define _GUARD_IS_FALSE_POP 389 +#define _GUARD_IS_NONE_POP 390 +#define _GUARD_IS_NOT_NONE_POP 391 +#define _JUMP_TO_TOP 392 +#define _SAVE_RETURN_OFFSET 393 +#define _INSERT 394 +#define _CHECK_VALIDITY 395 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -543,6 +544,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case _FOR_ITER: return 1; + case _FOR_ITER_TIER_TWO: + return 1; case FOR_ITER: return 1; case INSTRUMENTED_FOR_ITER: @@ -1181,6 +1184,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case _FOR_ITER: return 2; + case _FOR_ITER_TIER_TWO: + return 2; case FOR_ITER: return 2; case INSTRUMENTED_FOR_ITER: @@ -1676,6 +1681,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_FOR_ITER] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, @@ -1906,6 +1912,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [MATCH_KEYS] = { .nuops = 1, .uops = { { MATCH_KEYS, 0, 0 } } }, [GET_ITER] = { .nuops = 1, .uops = { { GET_ITER, 0, 0 } } }, [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } }, + [FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, 0, 0 } } }, [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 0, 0 }, { _ITER_NEXT_LIST, 0, 0 } } }, [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 0, 0 }, { _ITER_NEXT_TUPLE, 0, 0 } } }, [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 0, 0 }, { _ITER_NEXT_RANGE, 0, 0 } } }, @@ -2005,6 +2012,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_IS_NONE] = "_IS_NONE", [_SPECIALIZE_FOR_ITER] = "_SPECIALIZE_FOR_ITER", [_FOR_ITER] = "_FOR_ITER", + [_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO", [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", [_ITER_JUMP_LIST] = "_ITER_JUMP_LIST", [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST", diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index fe5c36c0c0dec9f..21a5cd3326d7073 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2808,6 +2808,36 @@ def testfunc(n): uops = {opname for opname, _, _ in ex} self.assertIn("_GUARD_IS_FALSE_POP", uops) + def test_for_iter_tier_two(self): + class MyIter: + def __init__(self, n): + self.n = n + def __iter__(self): + return self + def __next__(self): + self.n -= 1 + if self.n < 0: + raise StopIteration + return self.n + + def testfunc(n, m): + x = 0 + for i in range(m): + for j in MyIter(n): + x += 1000*i + j + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + x = testfunc(10, 10) + + self.assertEqual(x, sum(range(10)) * 10010) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_FOR_ITER_TIER_TWO", uops) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst new file mode 100644 index 000000000000000..b2a34ac735cdeba --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst @@ -0,0 +1 @@ +Enable translating unspecialized ``FOR_ITER`` to Tier 2. diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index a2f6aa8def8f693..0d7fbe8a39a5d46 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -624,6 +624,12 @@ break; } + case _FOR_ITER_TIER_TWO: { + STACK_GROW(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + case _ITER_CHECK_LIST: { break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index da98630f53943a0..a1ca66e457e47d9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2369,7 +2369,7 @@ dummy_func( goto enter_tier_one; } - replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { + replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { assert(PyBool_Check(cond)); int flag = Py_IsFalse(cond); #if ENABLE_SPECIALIZATION @@ -2513,7 +2513,7 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION */ } - op(_FOR_ITER, (iter -- iter, next)) { + replaced op(_FOR_ITER, (iter -- iter, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ next = (*Py_TYPE(iter)->tp_iternext)(iter); if (next == NULL) { @@ -2536,6 +2536,25 @@ dummy_func( // Common case: no jump, leave it to the code generator } + op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) { + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + GOTO_ERROR(error); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + /* The translator sets the deopt target just past END_FOR */ + DEOPT_IF(true); + } + // Common case: no jump, leave it to the code generator + } + macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER; inst(INSTRUMENTED_FOR_ITER, (unused/1 -- )) { diff --git a/Python/ceval.c b/Python/ceval.c index ba234dc57744729..b5e85930c81c9ea 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1074,7 +1074,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int UOP_STAT_INC(opcode, miss); frame->return_offset = 0; // Dispatch to frame->instr_ptr _PyFrame_SetStackPointer(frame, stack_pointer); - frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); + frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); Py_DECREF(current_executor); // Fall through // Jump here from ENTER_EXECUTOR @@ -1085,7 +1085,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // Jump here from _EXIT_TRACE exit_trace: _PyFrame_SetStackPointer(frame, stack_pointer); - frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); + frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); Py_DECREF(current_executor); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); goto enter_tier_one; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4e29fb9f0fa93d2..ae662b20e4403f2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2101,6 +2101,31 @@ break; } + case _FOR_ITER_TIER_TWO: { + PyObject *iter; + PyObject *next; + iter = stack_pointer[-1]; + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + GOTO_ERROR(error); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + /* The translator sets the deopt target just past END_FOR */ + DEOPT_IF(true, _FOR_ITER_TIER_TWO); + } + // Common case: no jump, leave it to the code generator + STACK_GROW(1); + stack_pointer[-1] = next; + break; + } + case _ITER_CHECK_LIST: { PyObject *iter; iter = stack_pointer[-1]; diff --git a/Python/optimizer.c b/Python/optimizer.c index 64a15e0dd6696ec..261a5ffd1e22e0e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -392,6 +392,7 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = { [_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE, [_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST, [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE, + [_FOR_ITER] = _FOR_ITER_TIER_TWO, }; static const uint16_t @@ -620,6 +621,11 @@ translate_bytecode_to_trace( } if (_PyUop_Replacements[uop]) { uop = _PyUop_Replacements[uop]; + if (uop == _FOR_ITER_TIER_TWO) { + target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + assert(_PyCode_CODE(code)[target-1].op.code == END_FOR || + _PyCode_CODE(code)[target-1].op.code == INSTRUMENTED_END_FOR); + } } break; case OPARG_CACHE_1: