From f4fdbf229946aaeb816b31fed7923a7dcf2390e7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 2 Jul 2024 17:42:36 +0200 Subject: [PATCH] gh-102471: Add PyLong import and export API Co-authored-by: Sergey B Kirpichev --- Doc/c-api/long.rst | 153 ++++++++++++++++++ Doc/conf.py | 2 + Doc/using/configure.rst | 3 +- Doc/whatsnew/3.14.rst | 10 ++ Include/cpython/longintrepr.h | 52 +++++- Lib/test/test_capi/test_long.py | 66 ++++++++ ...-07-03-17-26-53.gh-issue-102471.XpmKYk.rst | 9 ++ Modules/_testcapi/long.c | 144 +++++++++++++++++ Objects/longobject.c | 75 +++++++++ Tools/c-analyzer/cpython/ignored.tsv | 1 + 10 files changed, 512 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 9f2c48d98b8344..c2014c7905196c 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -540,6 +540,9 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Exactly what values are considered compact is an implementation detail and is subject to change. + .. versionadded:: 3.12 + + .. c:function:: Py_ssize_t PyUnstable_Long_CompactValue(const PyLongObject* op) If *op* is compact, as determined by :c:func:`PyUnstable_Long_IsCompact`, @@ -547,3 +550,153 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Otherwise, the return value is undefined. + .. versionadded:: 3.12 + + +Export API +^^^^^^^^^^ + +.. versionadded:: 3.14 + +.. c:type:: Py_digit + + A single unsigned digit in the range [``0``; ``PyLong_BASE - 1``]. + + It is usually used in an *array of digits*, such as the + :c:member:`PyLong_DigitArray.digits` array. + + Its size depend on the :c:macro:`!PYLONG_BITS_IN_DIGIT` macro: + see the ``configure`` :option:`--enable-big-digits` option. + + See :c:member:`PyLong_LAYOUT.bits_per_digit` for the number of bits per + digit and :c:member:`PyLong_LAYOUT.digit_size` for the size of a digit (in + bytes). + + +.. c:struct:: PyLong_LAYOUT + + Layout of an array of digits, used by Python :class:`int` object. + + See also :attr:`sys.int_info` which exposes similar information to Python. + + .. c:member:: uint8_t bits_per_digit + + Bits per digit. + + .. c:member:: uint8_t digit_size + + Digit size in bytes. + + .. c:member:: int8_t word_endian + + Word endian: + + - ``1`` for most significant word first (big endian) + - ``-1`` for least significant first (little endian) + + .. c:member:: int8_t array_endian + + Array endian: + + - ``1`` for most significant byte first (big endian) + - ``-1`` for least significant first (little endian) + + +.. c:struct:: PyLong_DigitArray + + A Python :class:`int` object exported as an array of digits. + + See :c:struct:`PyLong_LAYOUT` for the :c:member:`digits` layout. + + .. c:member:: PyObject *obj + + Strong reference to the Python :class:`int` object. + + .. c:member:: int negative + + 1 if the number is negative, 0 otherwise. + + .. c:member:: Py_ssize_t ndigits + + Number of digits in :c:member:`digits` array. + + .. c:member:: const Py_digit *digits + + Read-only array of unsigned digits. + + +.. c:function:: int PyLong_AsDigitArray(PyObject *obj, PyLong_DigitArray *array) + + Export a Python :class:`int` object as an array of digits. + + On success, set *\*array* and return 0. + On error, set an exception and return -1. + + This function always succeeds if *obj* is a Python :class:`int` object or a + subclass. + + :c:func:`PyLong_FreeDigitArray` must be called once done with using + *export*. + + +.. c:function:: void PyLong_FreeDigitArray(PyLong_DigitArray *array) + + Release the export *array* created by :c:func:`PyLong_AsDigitArray`. + + +PyLongWriter API +^^^^^^^^^^^^^^^^ + +The :c:type:`PyLongWriter` API can be used to import an integer. + +.. versionadded:: 3.14 + +.. c:struct:: PyLongWriter + + A Python :class:`int` writer instance. + + The instance must be destroyed by :c:func:`PyLongWriter_Finish`. + + +.. c:function:: PyLongWriter* PyLongWriter_Create(int negative, Py_ssize_t ndigits, Py_digit **digits) + + Create a :c:type:`PyLongWriter`. + + On success, set *\*digits* and return a writer. + On error, set an exception and return ``NULL``. + + *negative* is ``1`` if the number is negative, or ``0`` otherwise. + + *ndigits* is the number of digits in the *digits* array. It must be + positive. + + The caller must initialize the array of digits *digits* and then call + :c:func:`PyLongWriter_Finish` to get a Python :class:`int`. Digits must be + in the range [``0``; ``PyLong_BASE - 1``]. Unused digits must be set to + ``0``. + + See :c:struct:`PyLong_LAYOUT` for the layout of an array of digits. + + +.. c:function:: PyObject* PyLongWriter_Finish(PyLongWriter *writer) + + Finish a :c:type:`PyLongWriter` created by :c:func:`PyLongWriter_Create`. + + On success, return a Python :class:`int` object. + On error, set an exception and return ``NULL``. + + +Example creating an integer from an array of digits:: + + PyObject * + long_import(int negative, Py_ssize_t ndigits, Py_digit *digits) + { + Py_digit *writer_digits; + PyLongWriter *writer = PyLongWriter_Create(negative, ndigits, + &writer_digits); + if (writer == NULL) { + return NULL; + } + memcpy(writer_digits, digits, ndigits * sizeof(digit)); + return PyLongWriter_Finish(writer); + } diff --git a/Doc/conf.py b/Doc/conf.py index 3860d146a27e85..ef4477027b424c 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -141,6 +141,8 @@ ('c:type', 'size_t'), ('c:type', 'ssize_t'), ('c:type', 'time_t'), + ('c:type', 'int8_t'), + ('c:type', 'uint8_t'), ('c:type', 'uint64_t'), ('c:type', 'uintmax_t'), ('c:type', 'uintptr_t'), diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 6a4a52bb6e8b12..d1265ff6da1dbb 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -129,7 +129,8 @@ General Options Define the ``PYLONG_BITS_IN_DIGIT`` to ``15`` or ``30``. - See :data:`sys.int_info.bits_per_digit `. + See :data:`sys.int_info.bits_per_digit ` and the + :c:type:`Py_digit` type. .. option:: --with-suffix=SUFFIX diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index aecc7cabd0d1f5..d857d6d5caea2d 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -405,6 +405,16 @@ New Features (Contributed by Victor Stinner in :gh:`119182`.) +* Add a new import and export API for Python :class:`int` objects: + + * :c:func:`PyLong_AsDigitArray`; + * :c:func:`PyLong_FreeDigitArray`; + * :c:func:`PyLongWriter_Create`; + * :c:func:`PyLongWriter_Finish`; + * :c:struct:`PyLong_LAYOUT`. + + (Contributed by Victor Stinner in :gh:`102471`.) + Porting to Python 3.14 ---------------------- diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h index c60ccc463653f9..a36d823bbeb44f 100644 --- a/Include/cpython/longintrepr.h +++ b/Include/cpython/longintrepr.h @@ -58,8 +58,10 @@ typedef long stwodigits; /* signed variant of twodigits */ #else #error "PYLONG_BITS_IN_DIGIT should be 15 or 30" #endif -#define PyLong_BASE ((digit)1 << PyLong_SHIFT) -#define PyLong_MASK ((digit)(PyLong_BASE - 1)) +#define PyLong_BASE ((Py_digit)1 << PyLong_SHIFT) +#define PyLong_MASK ((Py_digit)(PyLong_BASE - 1)) + +typedef digit Py_digit; /* Long integer representation. @@ -139,6 +141,52 @@ _PyLong_CompactValue(const PyLongObject *op) #define PyUnstable_Long_CompactValue _PyLong_CompactValue +/* --- Import/Export API -------------------------------------------------- */ + +typedef struct PyLongLayout { + // Bits per digit + uint8_t bits_per_digit; + + // Digit size in bytes + uint8_t digit_size; + + // Word endian: + // * 1 for most significant word first (big endian) + // * -1 for least significant first (little endian) + int8_t word_endian; + + // Array endian: + // * 1 for most significant byte first (big endian) + // * -1 for least significant first (little endian) + int8_t array_endian; +} PyLongLayout; + +PyAPI_DATA(const PyLongLayout) PyLong_LAYOUT; + +typedef struct PyLong_DigitArray { + PyObject *obj; + int negative; + Py_ssize_t ndigits; + const Py_digit *digits; +} PyLong_DigitArray; + +PyAPI_FUNC(int) PyLong_AsDigitArray( + PyObject *obj, + PyLong_DigitArray *array); +PyAPI_FUNC(void) PyLong_FreeDigitArray( + PyLong_DigitArray *array); + + +/* --- PyLongWriter API --------------------------------------------------- */ + +typedef struct PyLongWriter PyLongWriter; + +PyAPI_FUNC(PyLongWriter*) PyLongWriter_Create( + int negative, + Py_ssize_t ndigits, + Py_digit **digits); +PyAPI_FUNC(PyObject*) PyLongWriter_Finish(PyLongWriter *writer); + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index bdbdd7bcfe0f2a..b5c713b0d16620 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -631,6 +631,72 @@ def test_long_getsign(self): # CRASHES getsign(NULL) + def test_long_layout(self): + # Test PyLong_LAYOUT + int_info = sys.int_info + layout = _testcapi.get_pylong_layout() + expected = { + 'array_endian': -1, + 'bits_per_digit': int_info.bits_per_digit, + 'digit_size': int_info.sizeof_digit, + 'word_endian': -1 if sys.byteorder == 'little' else 1, + } + self.assertEqual(layout, expected) + + def test_long_export(self): + # Test PyLong_Export() + layout = _testcapi.get_pylong_layout() + base = 2 ** layout['bits_per_digit'] + + pylong_export = _testcapi.pylong_export + self.assertEqual(pylong_export(0), (0, [0])) + self.assertEqual(pylong_export(123), (0, [123])) + self.assertEqual(pylong_export(-123), (1, [123])) + self.assertEqual(pylong_export(base**2 * 3 + base * 2 + 1), + (0, [1, 2, 3])) + + with self.assertRaises(TypeError): + pylong_export(1.0) + with self.assertRaises(TypeError): + pylong_export(0+1j) + with self.assertRaises(TypeError): + pylong_export("abc") + + def test_longwriter_create(self): + # Test PyLong_Import() + layout = _testcapi.get_pylong_layout() + base = 2 ** layout['bits_per_digit'] + + pylongwriter_create = _testcapi.pylongwriter_create + self.assertEqual(pylongwriter_create(0, []), 0) + self.assertEqual(pylongwriter_create(0, [0]), 0) + self.assertEqual(pylongwriter_create(0, [123]), 123) + self.assertEqual(pylongwriter_create(1, [123]), -123) + self.assertEqual(pylongwriter_create(1, [1, 2]), + -(base * 2 + 1)) + self.assertEqual(pylongwriter_create(0, [1, 2, 3]), + base**2 * 3 + base * 2 + 1) + max_digit = base - 1 + self.assertEqual(pylongwriter_create(0, [max_digit, max_digit, max_digit]), + base**2 * max_digit + base * max_digit + max_digit) + + # normalize + self.assertEqual(pylongwriter_create(0, [123, 0, 0]), 123) + + # test singletons + normalize + for num in (-2, 0, 1, 5, 42, 100): + self.assertIs(pylongwriter_create(bool(num < 0), [abs(num), 0]), + num) + + # round trip: Python int -> export -> Python int + pylong_export = _testcapi.pylong_export + numbers = [*range(0, 10), 12345, 0xdeadbeef, 2**100, 2**100-1] + numbers.extend(-num for num in list(numbers)) + for num in numbers: + with self.subTest(num=num): + export = pylong_export(num) + self.assertEqual(pylongwriter_create(*export), num, export) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst b/Misc/NEWS.d/next/C API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst new file mode 100644 index 00000000000000..c29cb25833181c --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst @@ -0,0 +1,9 @@ +Add a new import and export API for Python :class:`int` objects: + +* :c:func:`PyLong_AsDigitArray`; +* :c:func:`PyLong_FreeDigitArray`; +* :c:func:`PyLongWriter_Create`; +* :c:func:`PyLongWriter_Finish`; +* :c:struct:`PyLong_LAYOUT`. + +Patch by Victor Stinner. diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 2b5e85d5707522..ec08e8d2396dc8 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -117,6 +117,147 @@ pylong_aspid(PyObject *module, PyObject *arg) } +static PyObject * +pylong_export(PyObject *module, PyObject *obj) +{ + PyLong_DigitArray array; + if (PyLong_AsDigitArray(obj, &array) < 0) { + return NULL; + } + + PyObject *digits = PyList_New(0); + for (Py_ssize_t i=0; i < array.ndigits; i++) { + PyObject *digit = PyLong_FromUnsignedLong(array.digits[i]); + if (digit == NULL) { + Py_DECREF(digits); + goto error; + } + + if (PyList_Append(digits, digit) < 0) { + Py_DECREF(digits); + Py_DECREF(digit); + goto error; + } + Py_DECREF(digit); + } + + PyObject *res = Py_BuildValue("(iN)", array.negative, digits); + PyLong_FreeDigitArray(&array); + return res; + +error: + PyLong_FreeDigitArray(&array); + return NULL; +} + + +static PyObject * +pylongwriter_create(PyObject *module, PyObject *args) +{ + int negative; + PyObject *list; + if (!PyArg_ParseTuple(args, "iO!", &negative, &PyList_Type, &list)) { + return NULL; + } + Py_ssize_t ndigits = PyList_GET_SIZE(list); + + Py_digit *digits = PyMem_Malloc(ndigits * sizeof(Py_digit)); + if (digits == NULL) { + PyErr_NoMemory(); + return NULL; + } + + for (Py_ssize_t i=0; i < ndigits; i++) { + PyObject *item = PyList_GET_ITEM(list, i); + + long digit = PyLong_AsLong(item); + if (digit == -1 && PyErr_Occurred()) { + goto error; + } + + if (digit < 0 || digit >= PyLong_BASE) { + PyErr_SetString(PyExc_ValueError, "digit doesn't fit into Py_digit"); + goto error; + } + digits[i] = (Py_digit)digit; + } + + Py_digit *writer_digits; + PyLongWriter *writer = PyLongWriter_Create(negative, ndigits, + &writer_digits); + if (writer == NULL) { + goto error; + } + memcpy(writer_digits, digits, ndigits * sizeof(digit)); + PyObject *res = PyLongWriter_Finish(writer); + PyMem_Free(digits); + + return res; + +error: + PyMem_Free(digits); + return NULL; +} + + +static PyObject * +get_pylong_layout(PyObject *module, PyObject *Py_UNUSED(args)) +{ + PyLongLayout layout = PyLong_LAYOUT; + + PyObject *dict = PyDict_New(); + if (dict == NULL) { + goto error; + } + + PyObject *value = PyLong_FromUnsignedLong(layout.bits_per_digit); + if (value == NULL) { + goto error; + } + int res = PyDict_SetItemString(dict, "bits_per_digit", value); + Py_DECREF(value); + if (res < 0) { + goto error; + } + + value = PyLong_FromUnsignedLong(layout.digit_size); + if (value == NULL) { + goto error; + } + res = PyDict_SetItemString(dict, "digit_size", value); + Py_DECREF(value); + if (res < 0) { + goto error; + } + + value = PyLong_FromLong(layout.word_endian); + if (value == NULL) { + goto error; + } + res = PyDict_SetItemString(dict, "word_endian", value); + Py_DECREF(value); + if (res < 0) { + goto error; + } + + value = PyLong_FromLong(layout.array_endian); + if (value == NULL) { + goto error; + } + res = PyDict_SetItemString(dict, "array_endian", value); + Py_DECREF(value); + if (res < 0) { + goto error; + } + + return dict; + +error: + Py_XDECREF(dict); + return NULL; +} + + static PyMethodDef test_methods[] = { _TESTCAPI_CALL_LONG_COMPACT_API_METHODDEF {"pylong_fromunicodeobject", pylong_fromunicodeobject, METH_VARARGS}, @@ -124,6 +265,9 @@ static PyMethodDef test_methods[] = { {"pylong_fromnativebytes", pylong_fromnativebytes, METH_VARARGS}, {"pylong_getsign", pylong_getsign, METH_O}, {"pylong_aspid", pylong_aspid, METH_O}, + {"pylong_export", pylong_export, METH_O}, + {"pylongwriter_create", pylongwriter_create, METH_VARARGS}, + {"get_pylong_layout", get_pylong_layout, METH_NOARGS}, {NULL}, }; diff --git a/Objects/longobject.c b/Objects/longobject.c index 050ce1a7303842..a7ffeffa90a4fc 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6727,3 +6727,78 @@ Py_ssize_t PyUnstable_Long_CompactValue(const PyLongObject* op) { return _PyLong_CompactValue((PyLongObject*)op); } + +const PyLongLayout PyLong_LAYOUT = { + .bits_per_digit = PyLong_SHIFT, + .word_endian = PY_LITTLE_ENDIAN ? -1 : 1, + .array_endian = -1, // least significant first + .digit_size = sizeof(digit), +}; + + +int +PyLong_AsDigitArray(PyObject *obj, PyLong_DigitArray *array) +{ + if (!PyLong_Check(obj)) { + PyErr_Format(PyExc_TypeError, "expect int, got %T", obj); + return -1; + } + PyLongObject *self = (PyLongObject*)obj; + + array->obj = Py_NewRef(obj); + array->negative = _PyLong_IsNegative(self); + array->ndigits = _PyLong_DigitCount(self); + if (array->ndigits == 0) { + array->ndigits = 1; + } + array->digits = self->long_value.ob_digit; + return 0; +} + + +void +PyLong_FreeDigitArray(PyLong_DigitArray *array) +{ + Py_CLEAR(array->obj); + array->negative = 0; + array->ndigits = 0; + array->digits = NULL; +} + + +/* --- PyLongWriter API --------------------------------------------------- */ + +PyLongWriter* PyLongWriter_Create(int negative, Py_ssize_t ndigits, Py_digit **digits) +{ + if (ndigits < 0) { + PyErr_SetString(PyExc_ValueError, "ndigits must be positive"); + return NULL; + } + assert(digits != NULL); + + PyLongObject *obj = _PyLong_New(ndigits); + if (obj == NULL) { + return NULL; + } + if (ndigits == 0) { + assert(obj->long_value.ob_digit[0] == 0); + } + if (negative) { + _PyLong_FlipSign(obj); + } + + *digits = obj->long_value.ob_digit; + return (PyLongWriter*)obj; +} + + +PyObject* PyLongWriter_Finish(PyLongWriter *writer) +{ + PyLongObject *obj = (PyLongObject *)writer; + assert(Py_REFCNT(obj) == 1); + + // Normalize and get singleton if possible + obj = maybe_small_long(long_normalize(obj)); + + return (PyObject*)obj; +} diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 63b640e465ac6b..f17f8cfa0faa44 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -318,6 +318,7 @@ Objects/exceptions.c - static_exceptions - Objects/genobject.c - ASYNC_GEN_IGNORED_EXIT_MSG - Objects/genobject.c - NON_INIT_CORO_MSG - Objects/longobject.c - _PyLong_DigitValue - +Objects/longobject.c - PyLong_LAYOUT - Objects/object.c - _Py_SwappedOp - Objects/object.c - _Py_abstract_hack - Objects/object.c - last_final_reftotal -