diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index 4dc121d7fbaa9b4..97270c434145677 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -5,12 +5,16 @@ PyHash API See also the :c:member:`PyTypeObject.tp_hash` member. +Types +^^^^^ + .. c:type:: Py_hash_t Hash value type: signed integer. .. versionadded:: 3.2 + .. c:type:: Py_uhash_t Hash value type: unsigned integer. @@ -41,8 +45,28 @@ See also the :c:member:`PyTypeObject.tp_hash` member. .. versionadded:: 3.4 +Functions +^^^^^^^^^ + +.. c:function:: Py_hash_t PyHash_Double(double value, PyObject *obj) + + Hash a C double number. + + If *value* is not-a-number (NaN): + + * If *obj* is not ``NULL``, return the hash of the *obj* pointer. + * Otherwise, return :data:`sys.hash_info.nan ` (``0``). + + The function cannot fail: it cannot return ``-1``. + + .. versionadded:: 3.13 + + .. c:function:: PyHash_FuncDef* PyHash_GetFuncDef(void) Get the hash function definition. + .. seealso:: + :pep:`456` "Secure and interchangeable hash algorithm". + .. versionadded:: 3.4 diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 4d24606a1653e52..421c94abcb43a81 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1034,7 +1034,13 @@ always available. .. attribute:: hash_info.nan - (This attribute is no longer used) + The hash value returned for not-a-number (NaN). + + This hash value is only used by the :c:func:`PyHash_Double` C function + when the *obj* argument is ``NULL``. + + .. versionchanged:: 3.10 + This hash value is no longer used to hash numbers in Python. .. attribute:: hash_info.imag diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 136fe901ce39fb8..54724a6fca794ea 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1181,6 +1181,9 @@ New Features :exc:`KeyError` if the key missing. (Contributed by Stefan Behnel and Victor Stinner in :gh:`111262`.) +* Add :c:func:`PyHash_Double` function to hash a C double number. + (Contributed by Victor Stinner in :gh:`111545`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/cpython/pyhash.h b/Include/cpython/pyhash.h index 62ae6084bbcf533..8e3b5a3fc3f516f 100644 --- a/Include/cpython/pyhash.h +++ b/Include/cpython/pyhash.h @@ -11,3 +11,5 @@ typedef struct { } PyHash_FuncDef; PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); + +PyAPI_FUNC(Py_hash_t) PyHash_Double(double value, PyObject *obj); diff --git a/Include/internal/pycore_pyhash.h b/Include/internal/pycore_pyhash.h index 78bf0c7d07eb105..729ff89cb21a04f 100644 --- a/Include/internal/pycore_pyhash.h +++ b/Include/internal/pycore_pyhash.h @@ -32,6 +32,7 @@ PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) #define _PyHASH_INF 314159 +#define _PyHASH_NAN 0 #define _PyHASH_IMAG _PyHASH_MULTIPLIER /* Hash secret diff --git a/Lib/test/test_capi/test_hash.py b/Lib/test/test_capi/test_hash.py index 59dec15bc21445f..3bf1965d8e08766 100644 --- a/Lib/test/test_capi/test_hash.py +++ b/Lib/test/test_capi/test_hash.py @@ -1,9 +1,11 @@ +import math import sys import unittest from test.support import import_helper _testcapi = import_helper.import_module('_testcapi') +NULL = None SIZEOF_PY_HASH_T = _testcapi.SIZEOF_VOID_P @@ -31,3 +33,52 @@ def test_hash_getfuncdef(self): self.assertEqual(func_def.name, hash_info.algorithm) self.assertEqual(func_def.hash_bits, hash_info.hash_bits) self.assertEqual(func_def.seed_bits, hash_info.seed_bits) + + def test_hash_double(self): + # Test PyHash_Double() + hash_double = _testcapi.hash_double + marker = object() + marker_hash = hash(marker) + + # test integers + integers = [ + *range(1, 30), + 2**30 - 1, + 2 ** 233, + int(sys.float_info.max), + ] + for x in integers: + for obj in (NULL, marker): + with self.subTest(x=x, obj=obj): + self.assertEqual(hash_double(float(x), obj), hash(x)) + self.assertEqual(hash_double(float(-x), obj), hash(-x)) + + # test positive and negataive zeros + for obj in (NULL, marker): + with self.subTest(x=x, obj=obj): + self.assertEqual(hash_double(float(0.0), obj), 0) + self.assertEqual(hash_double(float(-0.0), obj), 0) + + # test +inf and -inf + inf = float("inf") + for obj in (NULL, marker): + with self.subTest(obj=obj): + self.assertEqual(hash_double(inf), sys.hash_info.inf) + self.assertEqual(hash_double(-inf), -sys.hash_info.inf) + + # test not-a-number (NaN) + self.assertEqual(hash_double(float('nan'), marker), marker_hash) + self.assertEqual(hash_double(float('nan'), NULL), sys.hash_info.nan) + + # special float values: compare with Python hash() function + special_values = ( + math.nextafter(0.0, 1.0), # smallest positive subnormal number + sys.float_info.min, # smallest positive normal number + sys.float_info.epsilon, + sys.float_info.max, # largest positive finite number + ) + for x in special_values: + for obj in (NULL, marker): + with self.subTest(x=x, obj=obj): + self.assertEqual(hash_double(x, obj), hash(x)) + self.assertEqual(hash_double(-x, obj), hash(-x)) diff --git a/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst b/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst new file mode 100644 index 000000000000000..dc06d282a7bff81 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyHash_Double` function to hash a C double number. Patch by +Victor Stinner. diff --git a/Modules/_testcapi/hash.c b/Modules/_testcapi/hash.c index d0b8127020c5c14..b668704650aa9d7 100644 --- a/Modules/_testcapi/hash.c +++ b/Modules/_testcapi/hash.c @@ -1,6 +1,7 @@ #include "parts.h" #include "util.h" + static PyObject * hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) { @@ -44,8 +45,26 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) return result; } + +static PyObject * +hash_double(PyObject *Py_UNUSED(module), PyObject *args) +{ + double value; + PyObject *obj = NULL; + if (!PyArg_ParseTuple(args, "d|O", &value, &obj)) { + return NULL; + } + NULLABLE(obj); + Py_hash_t hash = PyHash_Double(value, obj); + assert(hash != -1); + Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash)); + return PyLong_FromLongLong(hash); +} + + static PyMethodDef test_methods[] = { {"hash_getfuncdef", hash_getfuncdef, METH_NOARGS}, + {"hash_double", hash_double, METH_VARARGS}, {NULL}, }; diff --git a/Python/pyhash.c b/Python/pyhash.c index f9060b8003a0a7d..b58ec6136f8e02c 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -86,7 +86,7 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; Py_hash_t _Py_HashPointer(const void *); Py_hash_t -_Py_HashDouble(PyObject *inst, double v) +PyHash_Double(double v, PyObject *obj) { int e, sign; double m; @@ -95,8 +95,15 @@ _Py_HashDouble(PyObject *inst, double v) if (!Py_IS_FINITE(v)) { if (Py_IS_INFINITY(v)) return v > 0 ? _PyHASH_INF : -_PyHASH_INF; - else - return _Py_HashPointer(inst); + else { + assert(Py_IS_NAN(v)); + if (obj != NULL) { + return _Py_HashPointer(obj); + } + else { + return _PyHASH_NAN; + } + } } m = frexp(v, &e); @@ -131,6 +138,12 @@ _Py_HashDouble(PyObject *inst, double v) return (Py_hash_t)x; } +Py_hash_t +_Py_HashDouble(PyObject *obj, double v) +{ + return PyHash_Double(v, obj); +} + Py_hash_t _Py_HashPointerRaw(const void *p) { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index c17de44731b7030..11113e21bfb6b7f 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1497,7 +1497,7 @@ get_hash_info(PyThreadState *tstate) PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_INF)); PyStructSequence_SET_ITEM(hash_info, field++, - PyLong_FromLong(0)); // This is no longer used + PyLong_FromLong(_PyHASH_NAN)); PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_IMAG)); PyStructSequence_SET_ITEM(hash_info, field++,