From 699f4e957ee091382bccd6dcee70046c6907d0f5 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 17 Dec 2024 12:50:10 -0800 Subject: [PATCH] Add locking for split dict keys. For `specialize_dict_access_inline()`, we need to lock the keys object. * Add `_PyDictKeys_StringLookupSplit` which does required locking and use in place of `_PyDictKeys_StringLookup`. * Change `_PyObject_TryGetInstanceAttribute` to use that function in the case of split keys. * Add `unicodekeys_lookup_split` helper which allows code sharing between `_Py_dict_lookup` and `_PyDictKeys_StringLookupSplit`. --- Include/internal/pycore_dict.h | 1 + Objects/dictobject.c | 74 ++++++++++++++++++++++++++-------- Python/specialize.c | 2 +- 3 files changed, 59 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 6e4a308226f3fe..71927006d1cd48 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -114,6 +114,7 @@ extern Py_ssize_t _Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *); extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key); +extern Py_ssize_t _PyDictKeys_StringLookupSplit(PyDictKeysObject* dictkeys, PyObject *key); PyAPI_FUNC(PyObject *)_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *); PyAPI_FUNC(void) _PyDict_LoadGlobalStackRef(PyDictObject *, PyDictObject *, PyObject *, _PyStackRef *); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 05c93a3e448181..db9dca154ddd72 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1129,6 +1129,38 @@ dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, P return do_lookup(mp, dk, key, hash, compare_generic); } +#ifdef Py_GIL_DISABLED +static Py_ssize_t +unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key, + Py_hash_t hash); +#endif + +static Py_ssize_t +unicodekeys_lookup_split(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) +{ + Py_ssize_t ix; + assert(dk->dk_kind == DICT_KEYS_SPLIT); + assert(PyUnicode_CheckExact(key)); + +#ifdef Py_GIL_DISABLED + // A split dictionaries keys can be mutated by other dictionaries + // but if we have a unicode key we can avoid locking the shared + // keys. + ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); + if (ix == DKIX_KEY_CHANGED) { + LOCK_KEYS(dk); + ix = unicodekeys_lookup_unicode(dk, key, hash); + UNLOCK_KEYS(dk); + } + else { + ix = unicodekeys_lookup_unicode(dk, key, hash); + } +#else + ix = unicodekeys_lookup_unicode(dk, key, hash); +#endif + return ix; +} + /* Lookup a string in a (all unicode) dict keys. * Returns DKIX_ERROR if key is not a string, * or if the dict keys is not all strings. @@ -1153,13 +1185,24 @@ _PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key) return unicodekeys_lookup_unicode(dk, key, hash); } -#ifdef Py_GIL_DISABLED - -static Py_ssize_t -unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key, - Py_hash_t hash); - -#endif +/* Like _PyDictKeys_StringLookup() but only works on split keys. Note + * that in free-threaded builds this locks the keys object as required. + */ +Py_ssize_t +_PyDictKeys_StringLookupSplit(PyDictKeysObject* dk, PyObject *key) +{ + assert(dk->dk_kind == DICT_KEYS_SPLIT); + assert(PyUnicode_CheckExact(key)); + Py_hash_t hash = unicode_get_hash(key); + if (hash == -1) { + hash = PyUnicode_Type.tp_hash(key); + if (hash == -1) { + PyErr_Clear(); + return DKIX_ERROR; + } + } + return unicodekeys_lookup_split(dk, key, hash); +} /* The basic lookup function used by all operations. @@ -1192,15 +1235,7 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu if (PyUnicode_CheckExact(key)) { #ifdef Py_GIL_DISABLED if (kind == DICT_KEYS_SPLIT) { - // A split dictionaries keys can be mutated by other - // dictionaries but if we have a unicode key we can avoid - // locking the shared keys. - ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); - if (ix == DKIX_KEY_CHANGED) { - LOCK_KEYS(dk); - ix = unicodekeys_lookup_unicode(dk, key, hash); - UNLOCK_KEYS(dk); - } + ix = unicodekeys_lookup_split(dk, key, hash); } else { ix = unicodekeys_lookup_unicode(dk, key, hash); @@ -6967,7 +7002,12 @@ _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj)); assert(keys != NULL); - Py_ssize_t ix = _PyDictKeys_StringLookup(keys, name); + Py_ssize_t ix; + if (keys->dk_kind == DICT_KEYS_SPLIT) { + ix = _PyDictKeys_StringLookupSplit(keys, name); + } else { + ix = _PyDictKeys_StringLookup(keys, name); + } if (ix == DKIX_EMPTY) { *attr = NULL; return true; diff --git a/Python/specialize.c b/Python/specialize.c index 385ab7517f0d8f..fd029a690c2c21 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -967,7 +967,7 @@ specialize_dict_access_inline( _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys; assert(PyUnicode_CheckExact(name)); - Py_ssize_t index = _PyDictKeys_StringLookup(keys, name); + Py_ssize_t index = _PyDictKeys_StringLookupSplit(keys, name); assert (index != DKIX_ERROR); if (index == DKIX_EMPTY) { SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_NOT_IN_KEYS);