Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-128137: Update PyASCIIObject to handle interned field with the atomic operation #128196

Merged
merged 19 commits into from
Jan 5, 2025
Merged
28 changes: 16 additions & 12 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,17 @@ typedef struct {
PyObject_HEAD
Py_ssize_t length; /* Number of code points in the string */
Py_hash_t hash; /* Hash value; -1 if not set */
/* If interned is non-zero, the two references from the
dictionary to this object are *not* counted in ob_refcnt.
The possible values here are:
0: Not Interned
1: Interned
2: Interned and Immortal
3: Interned, Immortal, and Static
This categorization allows the runtime to determine the right
cleanup mechanism at runtime shutdown. */
uint8_t interned;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this remain in the state struct. It's okay for a struct to contain both non-bitfield and bitfield members:

  • It avoids a potential unnecessary breakage from moving the field
  • Keeping it in state will make it easier to keep state 32-bits due to alignment.

struct {
/* If interned is non-zero, the two references from the
dictionary to this object are *not* counted in ob_refcnt.
The possible values here are:
0: Not Interned
1: Interned
2: Interned and Immortal
3: Interned, Immortal, and Static
This categorization allows the runtime to determine the right
cleanup mechanism at runtime shutdown. */
unsigned int interned:2;
/* Character size:

- PyUnicode_1BYTE_KIND (1):
Expand Down Expand Up @@ -146,7 +146,7 @@ typedef struct {
unsigned int statically_allocated:1;
/* Padding to ensure that PyUnicode_DATA() is always aligned to
4 bytes (see issue #19537 on m68k). */
unsigned int :24;
unsigned int :10;
corona10 marked this conversation as resolved.
Show resolved Hide resolved
} state;
} PyASCIIObject;

Expand Down Expand Up @@ -195,7 +195,11 @@ typedef struct {

/* Use only if you know it's a string */
static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
return _PyASCIIObject_CAST(op)->state.interned;
#ifdef Py_GIL_DISABLED
return _Py_atomic_load_uint8_relaxed(&(_PyASCIIObject_CAST(op)->interned));
#else
return _PyASCIIObject_CAST(op)->interned;
#endif
}
#define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op))

Expand Down
20 changes: 10 additions & 10 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1409,7 +1409,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
data = unicode + 1;
_PyUnicode_LENGTH(unicode) = size;
_PyUnicode_HASH(unicode) = -1;
_PyUnicode_STATE(unicode).interned = 0;
_PyASCIIObject_CAST(unicode)->interned = 0;
_PyUnicode_STATE(unicode).kind = kind;
_PyUnicode_STATE(unicode).compact = 1;
_PyUnicode_STATE(unicode).ascii = is_ascii;
Expand Down Expand Up @@ -1711,7 +1711,7 @@ unicode_dealloc(PyObject *unicode)
_Py_SetImmortal(unicode);
return;
}
switch (_PyUnicode_STATE(unicode).interned) {
switch (_PyASCIIObject_CAST(unicode)->interned) {
case SSTATE_NOT_INTERNED:
break;
case SSTATE_INTERNED_MORTAL:
Expand Down Expand Up @@ -1739,7 +1739,7 @@ unicode_dealloc(PyObject *unicode)
// so it can't cause trouble (except wasted memory)
// - if it wasn't popped, it'll remain interned
_Py_SetImmortal(unicode);
_PyUnicode_STATE(unicode).interned = SSTATE_INTERNED_IMMORTAL;
_PyASCIIObject_CAST(unicode)->interned = SSTATE_INTERNED_IMMORTAL;
return;
}
if (r == 0) {
Expand Down Expand Up @@ -15470,7 +15470,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
#else
_PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
#endif
_PyUnicode_STATE(self).interned = 0;
_PyASCIIObject_CAST(self)->interned = 0;
_PyUnicode_STATE(self).kind = kind;
_PyUnicode_STATE(self).compact = 0;
_PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii;
Expand Down Expand Up @@ -15689,7 +15689,7 @@ intern_static(PyInterpreterState *interp, PyObject *s /* stolen */)
assert(r == NULL);
/* but just in case (for the non-debug build), handle this */
if (r != NULL && r != s) {
assert(_PyUnicode_STATE(r).interned == SSTATE_INTERNED_IMMORTAL_STATIC);
assert(_PyASCIIObject_CAST(r)->interned == SSTATE_INTERNED_IMMORTAL_STATIC);
assert(_PyUnicode_CHECK(r));
Py_DECREF(s);
return Py_NewRef(r);
Expand All @@ -15699,7 +15699,7 @@ intern_static(PyInterpreterState *interp, PyObject *s /* stolen */)
Py_FatalError("failed to intern static string");
}

_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL_STATIC;
_PyASCIIObject_CAST(s)->interned = SSTATE_INTERNED_IMMORTAL_STATIC;
return s;
}

Expand All @@ -15726,7 +15726,7 @@ immortalize_interned(PyObject *s)
_Py_DecRefTotal(_PyThreadState_GET());
}
#endif
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL;
FT_ATOMIC_STORE_UINT8_RELAXED(_PyASCIIObject_CAST(s)->interned, SSTATE_INTERNED_IMMORTAL);
_Py_SetImmortal(s);
}

Expand Down Expand Up @@ -15833,7 +15833,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,

/* NOT_INTERNED -> INTERNED_MORTAL */

assert(_PyUnicode_STATE(s).interned == SSTATE_NOT_INTERNED);
assert(_PyASCIIObject_CAST(s)->interned == SSTATE_NOT_INTERNED);

if (!_Py_IsImmortal(s)) {
/* The two references in interned dict (key and value) are not counted.
Expand All @@ -15845,7 +15845,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
_Py_DecRefTotal(_PyThreadState_GET());
#endif
}
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
FT_ATOMIC_STORE_UINT8_RELAXED(_PyASCIIObject_CAST(s)->interned, SSTATE_INTERNED_MORTAL);

/* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */

Expand Down Expand Up @@ -15981,7 +15981,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
Py_UNREACHABLE();
}
if (!shared) {
_PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
FT_ATOMIC_STORE_UINT8_RELAXED(_PyASCIIObject_CAST(s)->interned, SSTATE_NOT_INTERNED);
}
}
#ifdef INTERNED_STATS
Expand Down
Loading