diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 26a913435ef9cf..6ae81c033c43aa 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -10,6 +10,7 @@ extern "C" { #endif #include "pycore_dict_state.h" +#include "pycore_object.h" #include "pycore_runtime.h" // _PyRuntime // Unsafe flavor of PyDict_GetItemWithError(): no error checking @@ -62,6 +63,8 @@ extern uint32_t _PyDictKeys_GetVersionForCurrentState( extern size_t _PyDict_KeysSize(PyDictKeysObject *keys); +extern void _PyDictKeys_DecRef(PyDictKeysObject *keys); + /* _Py_dict_lookup() returns index of entry which can be used like DK_ENTRIES(dk)[index]. * -1 when no entry found, -3 when compare raises error. */ @@ -196,6 +199,7 @@ _PyDict_NotifyEvent(PyInterpreterState *interp, } extern PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values); +extern int _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv); extern PyObject *_PyDict_FromItems( PyObject *const *keys, Py_ssize_t keys_offset, PyObject *const *values, Py_ssize_t values_offset, diff --git a/Include/pystats.h b/Include/pystats.h index e24aef5fe8072b..b1957596745f00 100644 --- a/Include/pystats.h +++ b/Include/pystats.h @@ -65,6 +65,7 @@ typedef struct _object_stats { uint64_t dict_materialized_new_key; uint64_t dict_materialized_too_big; uint64_t dict_materialized_str_subclass; + uint64_t dict_dematerialized; uint64_t type_cache_hits; uint64_t type_cache_misses; uint64_t type_cache_dunder_hits; diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 564dc4745ae64e..7d317c012a304e 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -865,8 +865,10 @@ class C: items = [] for _ in range(self.ITEMS): item = C() - item.__dict__ item.a = None + # Resize into a combined unicode dict: + for i in range(29): + setattr(item, f"_{i}", None) items.append(item) return items @@ -932,7 +934,9 @@ class C: items = [] for _ in range(self.ITEMS): item = C() - item.__dict__ + # Resize into a combined unicode dict: + for i in range(29): + setattr(item, f"_{i}", None) items.append(item) return items diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-07-16-07-55-19.gh-issue-106485.wPb1bH.rst b/Misc/NEWS.d/next/Core and Builtins/2023-07-16-07-55-19.gh-issue-106485.wPb1bH.rst new file mode 100644 index 00000000000000..1f80082821edac --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-07-16-07-55-19.gh-issue-106485.wPb1bH.rst @@ -0,0 +1,2 @@ +Reduce the number of materialized instances dictionaries by dematerializing +them when possible. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 41ae1fca90b468..931103cdc1a064 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -5464,6 +5464,35 @@ _PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values) return make_dict_from_instance_attributes(interp, keys, values); } +// Return 1 if the dict was dematerialized, 0 otherwise. +int +_PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv) +{ + assert(_PyObject_DictOrValuesPointer(obj) == dorv); + assert(!_PyDictOrValues_IsValues(*dorv)); + PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(*dorv); + if (dict == NULL) { + return 0; + } + // It's likely that this dict still shares its keys (if it was materialized + // on request and not heavily modified): + assert(PyDict_CheckExact(dict)); + assert(_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_HEAPTYPE)); + if (dict->ma_keys != CACHED_KEYS(Py_TYPE(obj)) || Py_REFCNT(dict) != 1) { + return 0; + } + assert(dict->ma_values); + // We have an opportunity to do something *really* cool: dematerialize it! + _PyDictKeys_DecRef(dict->ma_keys); + _PyDictOrValues_SetValues(dorv, dict->ma_values); + OBJECT_STAT_INC(dict_dematerialized); + // Don't try this at home, kids: + dict->ma_keys = NULL; + dict->ma_values = NULL; + Py_DECREF(dict); + return 1; +} + int _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, PyObject *name, PyObject *value) @@ -5688,6 +5717,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) dict = _PyDictOrValues_GetDict(*dorv_ptr); if (dict == NULL) { dictkeys_incref(CACHED_KEYS(tp)); + OBJECT_STAT_INC(dict_materialized_on_request); dict = new_dict_with_shared_keys(interp, CACHED_KEYS(tp)); dorv_ptr->dict = dict; } @@ -5731,6 +5761,9 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, dict = *dictptr; if (dict == NULL) { dictkeys_incref(cached); + if (_PyType_HasFeature(tp, Py_TPFLAGS_MANAGED_DICT)) { + OBJECT_STAT_INC(dict_materialized_on_request); + } dict = new_dict_with_shared_keys(interp, cached); if (dict == NULL) return -1; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 76809494dd8817..71e96f5af4e419 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4965,9 +4965,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value) return res; } -extern void -_PyDictKeys_DecRef(PyDictKeysObject *keys); - static void type_dealloc_common(PyTypeObject *type) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b2281abc6663da..5efa36fcf5c629 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1827,8 +1827,10 @@ dummy_func( op(_CHECK_MANAGED_OBJECT_HAS_VALUES, (owner -- owner)) { assert(Py_TYPE(owner)->tp_dictoffset < 0); assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR); + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); + DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && + !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), + LOAD_ATTR); } op(_LOAD_ATTR_INSTANCE_VALUE, (index/1, owner -- attr, null if (oparg & 1))) { @@ -2727,8 +2729,10 @@ dummy_func( assert(type_version != 0); DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR); assert(owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR); + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); + DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && + !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), + LOAD_ATTR); PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version, LOAD_ATTR); @@ -2757,8 +2761,10 @@ dummy_func( assert(type_version != 0); DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR); assert(owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR); + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); + DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && + !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), + LOAD_ATTR); PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version, LOAD_ATTR); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d6d541a3b61ab4..a7b5054417ed8f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1623,8 +1623,10 @@ owner = stack_pointer[-1]; assert(Py_TYPE(owner)->tp_dictoffset < 0); assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR); + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); + DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && + !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), + LOAD_ATTR); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index cf20b869b8182f..ccf43c727b9e0f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2366,8 +2366,10 @@ { assert(Py_TYPE(owner)->tp_dictoffset < 0); assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR); + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); + DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && + !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), + LOAD_ATTR); } // _LOAD_ATTR_INSTANCE_VALUE { @@ -3507,8 +3509,10 @@ assert(type_version != 0); DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR); assert(owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR); + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); + DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && + !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), + LOAD_ATTR); PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version, LOAD_ATTR); @@ -3559,8 +3563,10 @@ assert(type_version != 0); DEOPT_IF(owner_cls->tp_version_tag != type_version, LOAD_ATTR); assert(owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - DEOPT_IF(!_PyDictOrValues_IsValues(dorv), LOAD_ATTR); + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); + DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && + !_PyObject_MakeInstanceAttributesFromDict(owner, dorv), + LOAD_ATTR); PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version, LOAD_ATTR); diff --git a/Python/specialize.c b/Python/specialize.c index 98455ae3efc60d..2d514c0dc476d3 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -192,6 +192,7 @@ print_object_stats(FILE *out, ObjectStats *stats) fprintf(out, "Object materialize dict (new key): %" PRIu64 "\n", stats->dict_materialized_new_key); fprintf(out, "Object materialize dict (too big): %" PRIu64 "\n", stats->dict_materialized_too_big); fprintf(out, "Object materialize dict (str subclass): %" PRIu64 "\n", stats->dict_materialized_str_subclass); + fprintf(out, "Object dematerialize dict: %" PRIu64 "\n", stats->dict_dematerialized); fprintf(out, "Object method cache hits: %" PRIu64 "\n", stats->type_cache_hits); fprintf(out, "Object method cache misses: %" PRIu64 "\n", stats->type_cache_misses); fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions); @@ -685,8 +686,10 @@ specialize_dict_access( return 0; } _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); - PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); - if (_PyDictOrValues_IsValues(dorv)) { + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); + if (_PyDictOrValues_IsValues(*dorv) || + _PyObject_MakeInstanceAttributesFromDict(owner, dorv)) + { // Virtual dictionary PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys; assert(PyUnicode_CheckExact(name)); @@ -704,12 +707,16 @@ specialize_dict_access( instr->op.code = values_op; } else { - PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(dorv); + PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(*dorv); if (dict == NULL || !PyDict_CheckExact(dict)) { SPECIALIZATION_FAIL(base_op, SPEC_FAIL_NO_DICT); return 0; } // We found an instance with a __dict__. + if (dict->ma_values) { + SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_NON_STRING_OR_SPLIT); + return 0; + } Py_ssize_t index = _PyDict_LookupIndex(dict, name); if (index != (uint16_t)index) { @@ -1100,9 +1107,11 @@ PyObject *descr, DescriptorClassification kind, bool is_method) assert(descr != NULL); assert((is_method && kind == METHOD) || (!is_method && kind == NON_DESCRIPTOR)); if (owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT) { - PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); + PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); PyDictKeysObject *keys = ((PyHeapTypeObject *)owner_cls)->ht_cached_keys; - if (!_PyDictOrValues_IsValues(dorv)) { + if (!_PyDictOrValues_IsValues(*dorv) && + !_PyObject_MakeInstanceAttributesFromDict(owner, dorv)) + { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_HAS_MANAGED_DICT); return 0; }