Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-113710: Backedge counter improvements. #115166

Merged
merged 2 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions Include/cpython/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ typedef struct {

PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor);

_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer);

PyAPI_FUNC(void) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer);

PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
Expand All @@ -79,8 +81,6 @@ PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int o
int
_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer);

extern _PyOptimizerObject _PyOptimizer_Default;

void _Py_ExecutorInit(_PyExecutorObject *, _PyBloomFilter *);
void _Py_ExecutorClear(_PyExecutorObject *);
void _Py_BloomFilter_Init(_PyBloomFilter *);
Expand All @@ -95,7 +95,11 @@ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);

#define OPTIMIZER_BITS_IN_COUNTER 4
/* Minimum of 16 additional executions before retry */
#define MINIMUM_TIER2_BACKOFF 4
#define MIN_TIER2_BACKOFF 4
#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER)
#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1)
/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */
#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX

#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
Expand Down
6 changes: 4 additions & 2 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,10 @@ struct _is {
struct callable_cache callable_cache;
_PyOptimizerObject *optimizer;
_PyExecutorObject *executor_list_head;
uint16_t optimizer_resume_threshold;
uint16_t optimizer_backedge_threshold;
/* These values are shifted and offset to speed up check in JUMP_BACKWARD */
uint32_t optimizer_resume_threshold;
uint32_t optimizer_backedge_threshold;

uint32_t next_func_version;
_rare_events rare_events;
PyDict_WatchCallback builtins_dict_watcher;
Expand Down
29 changes: 16 additions & 13 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2318,13 +2318,16 @@ dummy_func(
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
#if ENABLE_SPECIALIZATION
this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER);
uint16_t counter = this_instr[1].cache;
this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
/* We are using unsigned values, but we really want signed values, so
* do the 2s complement comparison manually */
uint16_t ucounter = this_instr[1].cache + (1 << 15);
uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15);
* do the 2s complement adjustment manually */
uint32_t offset_counter = counter ^ (1 << 15);
uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
assert((threshold & OPTIMIZER_BITS_MASK) == 0);
// Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
// Double-check that the opcode isn't instrumented or something:
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
OPT_STAT_INC(attempts);
_Py_CODEUNIT *start = this_instr;
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
Expand All @@ -2338,18 +2341,18 @@ dummy_func(
// Rewind and enter the executor:
assert(start->op.code == ENTER_EXECUTOR);
next_instr = start;
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
this_instr[1].cache &= OPTIMIZER_BITS_MASK;
}
else {
int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
if (backoff < MINIMUM_TIER2_BACKOFF) {
backoff = MINIMUM_TIER2_BACKOFF;
int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
backoff++;
if (backoff < MIN_TIER2_BACKOFF) {
backoff = MIN_TIER2_BACKOFF;
}
else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) {
backoff++;
else if (backoff > MAX_TIER2_BACKOFF) {
backoff = MAX_TIER2_BACKOFF;
}
assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER);
this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff;
this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
}
}
#endif /* ENABLE_SPECIALIZATION */
Expand Down
29 changes: 16 additions & 13 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 33 additions & 15 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ never_optimize(
_PyExecutorObject **exec,
int Py_UNUSED(stack_entries))
{
/* Although it should be benign for this to be called,
* it shouldn't happen, so fail in debug builds. */
assert(0 && "never optimize should never be called");
return 0;
}

Expand All @@ -126,38 +129,53 @@ PyTypeObject _PyDefaultOptimizer_Type = {
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
};

_PyOptimizerObject _PyOptimizer_Default = {
static _PyOptimizerObject _PyOptimizer_Default = {
PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
.optimize = never_optimize,
.resume_threshold = INT16_MAX,
.backedge_threshold = INT16_MAX,
.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
.backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
};

static uint32_t
shift_and_offset_threshold(uint16_t threshold)
{
return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
}

_PyOptimizerObject *
PyUnstable_GetOptimizer(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (interp->optimizer == &_PyOptimizer_Default) {
return NULL;
}
assert(interp->optimizer_backedge_threshold == interp->optimizer->backedge_threshold);
assert(interp->optimizer_resume_threshold == interp->optimizer->resume_threshold);
assert(interp->optimizer_backedge_threshold ==
shift_and_offset_threshold(interp->optimizer->backedge_threshold));
assert(interp->optimizer_resume_threshold ==
shift_and_offset_threshold(interp->optimizer->resume_threshold));
Py_INCREF(interp->optimizer);
return interp->optimizer;
}

void
PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
_PyOptimizerObject *
_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (optimizer == NULL) {
optimizer = &_PyOptimizer_Default;
}
_PyOptimizerObject *old = interp->optimizer;
Py_INCREF(optimizer);
interp->optimizer = optimizer;
interp->optimizer_backedge_threshold = optimizer->backedge_threshold;
interp->optimizer_resume_threshold = optimizer->resume_threshold;
interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold);
interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold);
return old;
}

void
PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer);
Py_DECREF(old);
}

Expand Down Expand Up @@ -865,10 +883,10 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
return NULL;
}
opt->optimize = uop_optimize;
opt->resume_threshold = INT16_MAX;
// Need at least 3 iterations to settle specializations.
// A few lower bits of the counter are reserved for other flags.
opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER;
opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
// Need a few iterations to settle specializations,
// and to ammortize the cost of optimization.
opt->backedge_threshold = 16;
return (PyObject *)opt;
}

Expand Down Expand Up @@ -955,7 +973,7 @@ PyUnstable_Optimizer_NewCounter(void)
return NULL;
}
opt->base.optimize = counter_optimize;
opt->base.resume_threshold = INT16_MAX;
opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
opt->base.backedge_threshold = 0;
opt->count = 0;
return (PyObject *)opt;
Expand Down
4 changes: 2 additions & 2 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1627,8 +1627,8 @@ finalize_modules(PyThreadState *tstate)

// Invalidate all executors and turn off tier 2 optimizer
_Py_Executors_InvalidateAll(interp);
Py_XDECREF(interp->optimizer);
interp->optimizer = &_PyOptimizer_Default;
_PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
Py_XDECREF(old);

// Stop watching __builtin__ modifications
PyDict_Unwatch(0, interp->builtins);
Expand Down
10 changes: 3 additions & 7 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -622,9 +622,7 @@ init_interpreter(PyInterpreterState *interp,
}
interp->sys_profile_initialized = false;
interp->sys_trace_initialized = false;
interp->optimizer = &_PyOptimizer_Default;
interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold;
interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold;
(void)_Py_SetOptimizer(interp, NULL);
interp->next_func_version = 1;
interp->executor_list_head = NULL;
if (interp != &runtime->_main_interpreter) {
Expand Down Expand Up @@ -777,10 +775,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
tstate->_status.cleared = 0;
}

Py_CLEAR(interp->optimizer);
interp->optimizer = &_PyOptimizer_Default;
interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold;
interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold;
_PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
Py_DECREF(old);

/* It is possible that any of the objects below have a finalizer
that runs Python code or otherwise relies on a thread state
Expand Down
Loading