-
-
Notifications
You must be signed in to change notification settings - Fork 30.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-106485: Dematerialize instance dictionaries when possible #106539
Changes from 21 commits
a4e456f
716cc5a
b9ec16f
c5f2067
0ab8274
c3d076b
2b20a5b
ebcad24
5d76456
d00eefe
2c4f262
912e12e
222469a
94dd38f
fe19772
7f4fd05
25202d9
d6d2045
125a15b
b2495de
d42d2e6
a4e8fbf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Reduce the number of materialized instances dictionaries by dematerializing | ||
them when possible. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5464,6 +5464,35 @@ _PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values) | |
return make_dict_from_instance_attributes(interp, keys, values); | ||
} | ||
|
||
// Return 1 if the dict was dematerialized, 0 otherwise. | ||
int | ||
_PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv) | ||
{ | ||
assert(_PyObject_DictOrValuesPointer(obj) == dorv); | ||
assert(!_PyDictOrValues_IsValues(*dorv)); | ||
PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(*dorv); | ||
if (dict == NULL) { | ||
return 0; | ||
} | ||
// It's likely that this dict still shares its keys (if it was materialized | ||
// on request and not heavily modified): | ||
assert(PyDict_CheckExact(dict)); | ||
assert(_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_HEAPTYPE)); | ||
if (dict->ma_keys != CACHED_KEYS(Py_TYPE(obj)) || Py_REFCNT(dict) != 1) { | ||
return 0; | ||
} | ||
assert(dict->ma_values); | ||
// We have an opportunity to do something *really* cool: dematerialize it! | ||
_PyDictKeys_DecRef(dict->ma_keys); | ||
_PyDictOrValues_SetValues(dorv, dict->ma_values); | ||
OBJECT_STAT_INC(dict_dematerialized); | ||
// Don't try this at home, kids: | ||
dict->ma_keys = NULL; | ||
dict->ma_values = NULL; | ||
Py_DECREF(dict); | ||
return 1; | ||
} | ||
|
||
int | ||
_PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, | ||
PyObject *name, PyObject *value) | ||
|
@@ -5688,6 +5717,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) | |
dict = _PyDictOrValues_GetDict(*dorv_ptr); | ||
if (dict == NULL) { | ||
dictkeys_incref(CACHED_KEYS(tp)); | ||
OBJECT_STAT_INC(dict_materialized_on_request); | ||
dict = new_dict_with_shared_keys(interp, CACHED_KEYS(tp)); | ||
dorv_ptr->dict = dict; | ||
} | ||
|
@@ -5731,6 +5761,9 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, | |
dict = *dictptr; | ||
if (dict == NULL) { | ||
dictkeys_incref(cached); | ||
if (_PyType_HasFeature(tp, Py_TPFLAGS_MANAGED_DICT)) { | ||
OBJECT_STAT_INC(dict_materialized_on_request); | ||
} | ||
Comment on lines
+5764
to
+5766
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @markshannon BTW, this was the missing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Turns out, this is a major source of dict materializations in the benchmarks (about ~4 million dicts that we weren't counting before). It looks like we're creating all of these just to dematerialize them almost immediately. |
||
dict = new_dict_with_shared_keys(interp, cached); | ||
if (dict == NULL) | ||
return -1; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1817,8 +1817,10 @@ dummy_func( | |
op(_CHECK_MANAGED_OBJECT_HAS_VALUES, (owner -- owner)) { | ||
assert(Py_TYPE(owner)->tp_dictoffset < 0); | ||
assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); | ||
PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); | ||
DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR); | ||
PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is turning:
into
or, which is even worse:
I think this will need to be DEOPT_IF(!_PyDictOrValues_IsValues(*dorv), LOAD_ATTR_DEMATERIALIZE) To keep the slow-path from messing up the code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This really complicates the code (and probably needs ugly special-casing in the resulting uop trace), since Can we leave that to another PR, if we determine that the compiler indeed hasn't just moved this cold branch with the register spills out-of-line (like it should, under PGO)?
|
||
DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && | ||
!_PyObject_MakeInstanceAttributesFromDict(owner, dorv), | ||
LOAD_ATTR); | ||
} | ||
|
||
op(_LOAD_ATTR_INSTANCE_VALUE, (index/1, owner -- res2 if (oparg & 1), res)) { | ||
|
@@ -2719,8 +2721,10 @@ dummy_func( | |
assert(type_version != 0); | ||
DEOPT_IF(self_cls->tp_version_tag != type_version, LOAD_ATTR); | ||
assert(self_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT); | ||
PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(self); | ||
DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR); | ||
PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(self); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment as for |
||
DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && | ||
!_PyObject_MakeInstanceAttributesFromDict(self, dorv), | ||
LOAD_ATTR); | ||
PyHeapTypeObject *self_heap_type = (PyHeapTypeObject *)self_cls; | ||
DEOPT_IF(self_heap_type->ht_cached_keys->dk_version != | ||
keys_version, LOAD_ATTR); | ||
|
@@ -2749,8 +2753,10 @@ dummy_func( | |
assert(type_version != 0); | ||
DEOPT_IF(self_cls->tp_version_tag != type_version, LOAD_ATTR); | ||
assert(self_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT); | ||
PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(self); | ||
DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR); | ||
PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(self); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && | ||
!_PyObject_MakeInstanceAttributesFromDict(self, dorv), | ||
LOAD_ATTR); | ||
PyHeapTypeObject *self_heap_type = (PyHeapTypeObject *)self_cls; | ||
DEOPT_IF(self_heap_type->ht_cached_keys->dk_version != | ||
keys_version, LOAD_ATTR); | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -192,6 +192,7 @@ print_object_stats(FILE *out, ObjectStats *stats) | |
fprintf(out, "Object materialize dict (new key): %" PRIu64 "\n", stats->dict_materialized_new_key); | ||
fprintf(out, "Object materialize dict (too big): %" PRIu64 "\n", stats->dict_materialized_too_big); | ||
fprintf(out, "Object materialize dict (str subclass): %" PRIu64 "\n", stats->dict_materialized_str_subclass); | ||
fprintf(out, "Object dematerialize dict: %" PRIu64 "\n", stats->dict_dematerialized); | ||
fprintf(out, "Object method cache hits: %" PRIu64 "\n", stats->type_cache_hits); | ||
fprintf(out, "Object method cache misses: %" PRIu64 "\n", stats->type_cache_misses); | ||
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions); | ||
|
@@ -686,8 +687,10 @@ specialize_dict_access( | |
return 0; | ||
} | ||
_PyAttrCache *cache = (_PyAttrCache *)(instr + 1); | ||
PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); | ||
if (_PyDictOrValues_IsValues(dorv)) { | ||
PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); | ||
if (_PyDictOrValues_IsValues(*dorv) || | ||
_PyObject_MakeInstanceAttributesFromDict(owner, dorv)) | ||
{ | ||
// Virtual dictionary | ||
PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys; | ||
assert(PyUnicode_CheckExact(name)); | ||
|
@@ -705,12 +708,16 @@ specialize_dict_access( | |
instr->op.code = values_op; | ||
} | ||
else { | ||
PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv); | ||
PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(*dorv); | ||
if (dict == NULL || !PyDict_CheckExact(dict)) { | ||
SPECIALIZATION_FAIL(base_op, SPEC_FAIL_NO_DICT); | ||
return 0; | ||
} | ||
// We found an instance with a __dict__. | ||
if (dict->ma_values) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is quite a complex condition. The goto makes it even harder to follow. |
||
SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_NON_STRING_OR_SPLIT); | ||
return 0; | ||
} | ||
Py_ssize_t index = | ||
_PyDict_LookupIndex(dict, name); | ||
if (index != (uint16_t)index) { | ||
|
@@ -1093,9 +1100,11 @@ PyObject *descr, DescriptorClassification kind, bool is_method) | |
assert(descr != NULL); | ||
assert((is_method && kind == METHOD) || (!is_method && kind == NON_DESCRIPTOR)); | ||
if (owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT) { | ||
PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); | ||
PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); | ||
PyDictKeysObject *keys = ((PyHeapTypeObject *)owner_cls)->ht_cached_keys; | ||
if (!_PyDictOrValues_IsValues(dorv)) { | ||
if (!_PyDictOrValues_IsValues(*dorv) && | ||
!_PyObject_MakeInstanceAttributesFromDict(owner, dorv)) | ||
{ | ||
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_HAS_MANAGED_DICT); | ||
return 0; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe this should allow dict subclasses?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can't