Skip to content

Commit

Permalink
pythongh-111545: Add PyHash_Pointer() function
Browse files Browse the repository at this point in the history
* Keep _Py_HashPointer() function as an alias to PyHash_Pointer().
* Add _Py_rotateright_uintptr() function with tests.
* Add PyHash_Pointer() tests to test_capi.test_hash.
* Remove _Py_HashPointerRaw() function: inline code in
  _Py_hashtable_hash_ptr().
  • Loading branch information
vstinner committed Nov 15, 2023
1 parent d4f83e1 commit 8bb71fe
Show file tree
Hide file tree
Showing 12 changed files with 181 additions and 22 deletions.
9 changes: 9 additions & 0 deletions Doc/c-api/hash.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,12 @@ See also the :c:member:`PyTypeObject.tp_hash` member.
Get the hash function definition.
.. versionadded:: 3.4
.. c:function:: Py_hash_t PyHash_Pointer(const void *ptr)
Hash a pointer.
The function cannot fail (cannot return ``-1``).
.. versionadded:: 3.13
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,9 @@ New Features
:exc:`KeyError` if the key missing.
(Contributed by Stefan Behnel and Victor Stinner in :gh:`111262`.)

* Add :c:func:`PyHash_Pointer` function to hash a pointer.
(Contributed by Victor Stinner in :gh:`111545`.)


Porting to Python 3.13
----------------------
Expand Down
2 changes: 2 additions & 0 deletions Include/cpython/pyhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ typedef struct {
} PyHash_FuncDef;

PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);

PyAPI_FUNC(Py_hash_t) PyHash_Pointer(const void *ptr);
16 changes: 16 additions & 0 deletions Include/internal/pycore_bitutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,22 @@ _Py_bit_length(unsigned long x)
}


// Rotate x bits to the right.
// Function used by Py_HashPointer().
static inline uintptr_t
_Py_rotateright_uintptr(uintptr_t x, const unsigned int bits)
{
assert(bits < (8 * SIZEOF_UINTPTR_T));
#if _Py__has_builtin(__builtin_rotateright64) && SIZEOF_UINTPTR_T == 8
return __builtin_rotateright64(x, bits);
#elif _Py__has_builtin(__builtin_rotateright32) && SIZEOF_UINTPTR_T == 4
return __builtin_rotateright32(x, bits);
#else
return (x >> bits) | (x << (8 * SIZEOF_UINTPTR_T - bits));
#endif
}


#ifdef __cplusplus
}
#endif
Expand Down
7 changes: 2 additions & 5 deletions Include/internal/pycore_pyhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,8 @@
/* Helpers for hash functions */
extern Py_hash_t _Py_HashDouble(PyObject *, double);

// Export for '_decimal' shared extension
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);

// Similar to _Py_HashPointer(), but don't replace -1 with -2
extern Py_hash_t _Py_HashPointerRaw(const void*);
// Kept for backward compatibility
#define _Py_HashPointer PyHash_Pointer

// Export for '_datetime' shared extension
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
Expand Down
27 changes: 27 additions & 0 deletions Lib/test/test_capi/test_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,30 @@ def test_hash_getfuncdef(self):
self.assertEqual(func_def.name, hash_info.algorithm)
self.assertEqual(func_def.hash_bits, hash_info.hash_bits)
self.assertEqual(func_def.seed_bits, hash_info.seed_bits)

def test_hash_pointer(self):
# Test PyHash_Pointer()
hash_pointer = _testcapi.hash_pointer

HASH_BITS = 8 * _testcapi.SIZEOF_VOID_P
UHASH_T_MASK = ((2 ** HASH_BITS) - 1)
HASH_T_MAX = (2 ** (HASH_BITS - 1) - 1)
MAX_PTR = UHASH_T_MASK

def uhash_to_hash(x):
# Convert unsigned Py_uhash_t to signed Py_hash_t
if HASH_T_MAX < x:
x = (~x) + 1
x &= UHASH_T_MASK
x = (~x) + 1
return x

# Known values
self.assertEqual(hash_pointer(0), 0)
self.assertEqual(hash_pointer(MAX_PTR), -2)
self.assertEqual(hash_pointer(0xABCDEF1234567890),
0x0ABCDEF123456789)
self.assertEqual(hash_pointer(0x1234567890ABCDEF),
uhash_to_hash(0xF1234567890ABCDE))
self.assertEqual(hash_pointer(0xFEE4ABEDD1CECA5E),
uhash_to_hash(0xEFEE4ABEDD1CECA5))
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add :c:func:`PyHash_Pointer` function to hash a pointer. Patch by Victor
Stinner.
16 changes: 16 additions & 0 deletions Modules/_testcapi/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,24 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
return result;
}


static PyObject *
hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
{
void *ptr = PyLong_AsVoidPtr(arg);
if (ptr == NULL && PyErr_Occurred()) {
return NULL;
}

Py_hash_t hash = PyHash_Pointer(ptr);
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
return PyLong_FromLongLong(hash);
}


static PyMethodDef test_methods[] = {
{"hash_getfuncdef", hash_getfuncdef, METH_NOARGS},
{"hash_pointer", hash_pointer, METH_O},
{NULL},
};

Expand Down
75 changes: 75 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,80 @@ test_bit_length(PyObject *self, PyObject *Py_UNUSED(args))
}


static int
check_rotateright_uintptr(uintptr_t ptr, unsigned int bits, uintptr_t expected)
{
#if SIZEOF_UINTPTR_T == 8
# define FMT "0x%llx"
#else
# define FMT "0x%lx"
#endif

// Use volatile to prevent the compiler to optimize out the whole test
volatile uintptr_t x = ptr;
uintptr_t y = _Py_rotateright_uintptr(x, bits);
if (y != expected) {
PyErr_Format(PyExc_AssertionError,
"_Py_rotateright_uintptr(" FMT ", %u) returns " FMT ", expected " FMT,
x, bits, y, expected);
return -1;
}
return 0;

#undef FMT
}


static PyObject*
test_rotateright_uintptr(PyObject *self, PyObject *Py_UNUSED(args))
{
#define CHECK(X, BITS, EXPECTED) \
do { \
if (check_rotateright_uintptr(X, BITS, EXPECTED) < 0) { \
return NULL; \
} \
} while (0)

// Test _Py_rotateright_uintptr()
#if SIZEOF_UINTPTR_T == 8
CHECK(UINT64_C(0x1234567890ABCDEF), 4, UINT64_C(0xF1234567890ABCDE));
CHECK(UINT64_C(0x1234567890ABCDEF), 8, UINT64_C(0xEF1234567890ABCD));
CHECK(UINT64_C(0x1234567890ABCDEF), 12, UINT64_C(0xDEF1234567890ABC));
CHECK(UINT64_C(0x1234567890ABCDEF), 16, UINT64_C(0xCDEF1234567890AB));
CHECK(UINT64_C(0x1234567890ABCDEF), 20, UINT64_C(0xBCDEF1234567890A));
CHECK(UINT64_C(0x1234567890ABCDEF), 24, UINT64_C(0xABCDEF1234567890));
CHECK(UINT64_C(0x1234567890ABCDEF), 28, UINT64_C(0x0ABCDEF123456789));
CHECK(UINT64_C(0x1234567890ABCDEF), 32, UINT64_C(0x90ABCDEF12345678));
CHECK(UINT64_C(0x1234567890ABCDEF), 36, UINT64_C(0x890ABCDEF1234567));
CHECK(UINT64_C(0x1234567890ABCDEF), 40, UINT64_C(0x7890ABCDEF123456));
CHECK(UINT64_C(0x1234567890ABCDEF), 44, UINT64_C(0x67890ABCDEF12345));
CHECK(UINT64_C(0x1234567890ABCDEF), 48, UINT64_C(0x567890ABCDEF1234));
CHECK(UINT64_C(0x1234567890ABCDEF), 52, UINT64_C(0x4567890ABCDEF123));
CHECK(UINT64_C(0x1234567890ABCDEF), 56, UINT64_C(0x34567890ABCDEF12));
CHECK(UINT64_C(0x1234567890ABCDEF), 60, UINT64_C(0x234567890ABCDEF1));

CHECK(UINT64_C(0xFEE4ABEDD1CECA5E), 4, UINT64_C(0xEFEE4ABEDD1CECA5));
CHECK(UINT64_C(0xFEE4ABEDD1CECA5E), 32, UINT64_C(0xD1CECA5EFEE4ABED));
#elif SIZEOF_UINTPTR_T == 4
CHECK(UINT32_C(0x12345678), 4, UINT32_C(0x81234567));
CHECK(UINT32_C(0x12345678), 8, UINT32_C(0x78123456));
CHECK(UINT32_C(0x12345678), 12, UINT32_C(0x67812345));
CHECK(UINT32_C(0x12345678), 16, UINT32_C(0x56781234));
CHECK(UINT32_C(0x12345678), 20, UINT32_C(0x45678123));
CHECK(UINT32_C(0x12345678), 24, UINT32_C(0x34567812));
CHECK(UINT32_C(0x12345678), 28, UINT32_C(0x23456781));

CHECK(UINT32_C(0xDEADCAFE), 4, UINT32_C(0xEDEADCAF));
CHECK(UINT32_C(0xDEADCAFE), 16, UINT32_C(0xCAFEDEAD));
#else
# error "unsupported uintptr_t size"
#endif
Py_RETURN_NONE;

#undef CHECK
}


#define TO_PTR(ch) ((void*)(uintptr_t)ch)
#define FROM_PTR(ptr) ((uintptr_t)ptr)
#define VALUE(key) (1 + ((int)(key) - 'a'))
Expand Down Expand Up @@ -1614,6 +1688,7 @@ static PyMethodDef module_functions[] = {
{"test_bswap", test_bswap, METH_NOARGS},
{"test_popcount", test_popcount, METH_NOARGS},
{"test_bit_length", test_bit_length, METH_NOARGS},
{"test_rotateright_uintptr", test_rotateright_uintptr, METH_NOARGS},
{"test_hashtable", test_hashtable, METH_NOARGS},
{"get_config", test_get_config, METH_NOARGS},
{"set_config", test_set_config, METH_O},
Expand Down
2 changes: 2 additions & 0 deletions PC/pyconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,8 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
# define ALIGNOF_MAX_ALIGN_T 8
#endif

#define SIZEOF_UINTPTR_T SIZEOF_VOID_P

#ifdef _DEBUG
# define Py_DEBUG
#endif
Expand Down
17 changes: 14 additions & 3 deletions Python/hashtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
*/

#include "Python.h"
#include "pycore_hashtable.h"
#include "pycore_pyhash.h" // _Py_HashPointerRaw()
#include "pycore_bitutils.h" // _Py_rotateright_uintptr()
#include "pycore_hashtable.h" // export _Py_hashtable_new()

#define HASHTABLE_MIN_SIZE 16
#define HASHTABLE_HIGH 0.50
Expand Down Expand Up @@ -89,10 +89,21 @@ _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
}


// Similar to PyHash_Pointer() but avoid "if (x == -1) x = -2;" for best
// performance. The value (Py_uhash_t)-1 is not special for
// _Py_hashtable_t.hash_func function, there is no need to replace it with -2.
Py_uhash_t
_Py_hashtable_hash_ptr(const void *key)
{
return (Py_uhash_t)_Py_HashPointerRaw(key);
uintptr_t x = (uintptr_t)key;
Py_BUILD_ASSERT(sizeof(x) == sizeof(key));

// Bottom 3 or 4 bits are likely to be 0; rotate x by 4 to the right
// to avoid excessive hash collisions.
x = _Py_rotateright_uintptr(x, 4);

Py_BUILD_ASSERT(sizeof(x) == sizeof(Py_hash_t));
return (Py_uhash_t)x;
}


Expand Down
27 changes: 13 additions & 14 deletions Python/pyhash.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
All the utility functions (_Py_Hash*()) return "-1" to signify an error.
*/
#include "Python.h"
#include "pycore_bitutils.h" // _Py_rotateright_uintptr()
#include "pycore_pyhash.h" // _Py_HashSecret_t

#ifdef __APPLE__
Expand Down Expand Up @@ -132,23 +133,21 @@ _Py_HashDouble(PyObject *inst, double v)
}

Py_hash_t
_Py_HashPointerRaw(const void *p)
PyHash_Pointer(const void *ptr)
{
size_t y = (size_t)p;
/* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
excessive hash collisions for dicts and sets */
y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
return (Py_hash_t)y;
}
uintptr_t x = (uintptr_t)ptr;
Py_BUILD_ASSERT(sizeof(x) == sizeof(ptr));

Py_hash_t
_Py_HashPointer(const void *p)
{
Py_hash_t x = _Py_HashPointerRaw(p);
if (x == -1) {
x = -2;
// Bottom 3 or 4 bits are likely to be 0; rotate x by 4 to the right
// to avoid excessive hash collisions for dicts and sets.
x = _Py_rotateright_uintptr(x, 4);

Py_BUILD_ASSERT(sizeof(x) == sizeof(Py_hash_t));
Py_hash_t result = (Py_hash_t)x;
if (result == -1) {
result = -2;
}
return x;
return result;
}

Py_hash_t
Expand Down

0 comments on commit 8bb71fe

Please sign in to comment.