Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-128150: improve performances of uuid.uuid* constructor functions. #128151

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
0d49ccb
improve performance of UUIDs creation
picnixz Dec 19, 2024
603335f
add What's New entry
picnixz Dec 19, 2024
154ff8b
blurb
picnixz Dec 21, 2024
b965887
fix issue number
picnixz Dec 21, 2024
a8a1894
fix typos
picnixz Dec 21, 2024
c8aa752
ensure 14-bit clock sequence
picnixz Dec 21, 2024
8c9d5cf
Merge branch 'main' into perf/uuid/init-128150
picnixz Dec 21, 2024
a2278b8
add dedicated private fast constructor
picnixz Dec 21, 2024
0710549
revert UUIDv1 construction
picnixz Dec 21, 2024
5b6922f
change eager check into an assertion check for internal constructor
picnixz Dec 22, 2024
e631593
update performance results
picnixz Dec 22, 2024
1c10901
describe constants
picnixz Dec 23, 2024
0bc7321
revert UUIDv1 optimizations to reduce the diff
picnixz Dec 23, 2024
26b1eb1
simplify `_from_int` private constructor as per Pieter's review
picnixz Dec 23, 2024
df50a7a
revert micro-optimization of `not a <= x <= b`
picnixz Dec 23, 2024
c1ffa7d
use built-in `int` when it is not shadowed
picnixz Dec 23, 2024
cff86e9
remove rationale comment for HACL* MD5
picnixz Dec 23, 2024
7095aa4
remove rationale comment for OpenSSL SHA-1
picnixz Dec 23, 2024
4af1535
clear variant and version bits using dedicated mask
picnixz Dec 23, 2024
0d4c008
fix typos
picnixz Dec 25, 2024
9854f69
update benchmarks
picnixz Dec 25, 2024
897902b
remove un-necessary assertions
picnixz Dec 26, 2024
a8a19e1
use `object.__new__` instead of `cls.__new__`
picnixz Dec 26, 2024
ccb972b
Merge branch 'main' into perf/uuid/init-128150
picnixz Dec 27, 2024
e2b8b08
remove dedicated constant folding
picnixz Dec 27, 2024
1d4216a
update benchmarks
picnixz Dec 27, 2024
5c87adf
Always use `hashlib.md5` for consistency as per Petr's comment.
picnixz Jan 12, 2025
ea23629
update benchmarks
picnixz Jan 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,25 @@ io
file's bytes in full. (Contributed by Cody Maloney and Victor Stinner in
:gh:`120754` and :gh:`90102`.)


uuid
----

* Improve generation of :class:`~uuid.UUID` objects via their dedicated
functions:

* :func:`~uuid.uuid3` is 47% faster for 16-byte names and 13% faster
for 1024-byte names. Performances for longer names remain unchanged.
picnixz marked this conversation as resolved.
Show resolved Hide resolved
picnixz marked this conversation as resolved.
Show resolved Hide resolved
* :func:`~uuid.uuid5` is 35% faster for 16-byte names and 24% faster
for 1024-byte names. Performances for longer names remain unchanged.
picnixz marked this conversation as resolved.
Show resolved Hide resolved
* :func:`~uuid.uuid4` is 33% faster and :func:`~uuid.uuid8` is 38% faster.

Overall, dedicated generation of UUID objects version 3, 4, 5, and 8 is
picnixz marked this conversation as resolved.
Show resolved Hide resolved
roughly 20% faster.

(Contributed by Bénédikt Tran in :gh:`128150`.)


Deprecated
==========

Expand Down
82 changes: 58 additions & 24 deletions Lib/uuid.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ class SafeUUID:
unknown = None


_RFC_4122_CLEARFLAGS_MASK = 0xffff_ffff_ffff_0fff_3fff_ffff_ffff_ffff
_RFC_4122_VERSION_1_FLAGS = 0x0000_0000_0000_1000_8000_0000_0000_0000
_RFC_4122_VERSION_3_FLAGS = 0x0000_0000_0000_3000_8000_0000_0000_0000
_RFC_4122_VERSION_4_FLAGS = 0x0000_0000_0000_4000_8000_0000_0000_0000
_RFC_4122_VERSION_5_FLAGS = 0x0000_0000_0000_5000_8000_0000_0000_0000
_RFC_4122_VERSION_8_FLAGS = 0x0000_0000_0000_8000_8000_0000_0000_0000


class UUID:
"""Instances of the UUID class represent UUIDs as specified in RFC 4122.
UUID objects are immutable, hashable, and usable as dictionary keys.
Expand Down Expand Up @@ -174,45 +182,49 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
if [hex, bytes, bytes_le, fields, int].count(None) != 4:
raise TypeError('one of the hex, bytes, bytes_le, fields, '
'or int arguments must be given')
if hex is not None:
if int is not None:
pass
elif hex is not None:
hex = hex.replace('urn:', '').replace('uuid:', '')
hex = hex.strip('{}').replace('-', '')
if len(hex) != 32:
raise ValueError('badly formed hexadecimal UUID string')
int = int_(hex, 16)
if bytes_le is not None:
elif bytes_le is not None:
if len(bytes_le) != 16:
raise ValueError('bytes_le is not a 16-char string')
assert isinstance(bytes_le, bytes_), repr(bytes_le)
bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] +
bytes_le[8-1:6-1:-1] + bytes_le[8:])
if bytes is not None:
int = int_.from_bytes(bytes)
elif bytes is not None:
if len(bytes) != 16:
raise ValueError('bytes is not a 16-char string')
assert isinstance(bytes, bytes_), repr(bytes)
int = int_.from_bytes(bytes) # big endian
if fields is not None:
elif fields is not None:
if len(fields) != 6:
raise ValueError('fields is not a 6-tuple')
(time_low, time_mid, time_hi_version,
clock_seq_hi_variant, clock_seq_low, node) = fields
if not 0 <= time_low < 1<<32:
if time_low < 0 or time_low > 0xffff_ffff:
raise ValueError('field 1 out of range (need a 32-bit value)')
if not 0 <= time_mid < 1<<16:
if time_mid < 0 or time_mid > 0xffff:
raise ValueError('field 2 out of range (need a 16-bit value)')
if not 0 <= time_hi_version < 1<<16:
if time_hi_version < 0 or time_hi_version > 0xffff:
raise ValueError('field 3 out of range (need a 16-bit value)')
if not 0 <= clock_seq_hi_variant < 1<<8:
if clock_seq_hi_variant < 0 or clock_seq_hi_variant > 0xff:
raise ValueError('field 4 out of range (need an 8-bit value)')
if not 0 <= clock_seq_low < 1<<8:
if clock_seq_low < 0 or clock_seq_low > 0xff:
raise ValueError('field 5 out of range (need an 8-bit value)')
if not 0 <= node < 1<<48:
if node < 0 or node > 0xffff_ffff_ffff:
raise ValueError('field 6 out of range (need a 48-bit value)')
clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
int = ((time_low << 96) | (time_mid << 80) |
(time_hi_version << 64) | (clock_seq << 48) | node)
if int is not None:
if not 0 <= int < 1<<128:
raise ValueError('int is out of range (need a 128-bit value)')
# "x < a or int > b" is slightly faster than "not (a <= x <= b)"
if int < 0 or int > 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff:
raise ValueError('int is out of range (need a 128-bit value)')
if version is not None:
if not 1 <= version <= 8:
raise ValueError('illegal version number')
Expand All @@ -225,6 +237,15 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
object.__setattr__(self, 'int', int)
object.__setattr__(self, 'is_safe', is_safe)

@classmethod
def _from_int(cls, int, *, is_safe=SafeUUID.unknown):
picnixz marked this conversation as resolved.
Show resolved Hide resolved
"""Internal use only."""
assert int >= 0 and int <= 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff
self = cls.__new__(cls)
object.__setattr__(self, 'int', int)
object.__setattr__(self, 'is_safe', is_safe)
picnixz marked this conversation as resolved.
Show resolved Hide resolved
return self

def __getstate__(self):
d = {'int': self.int}
if self.is_safe != SafeUUID.unknown:
Expand Down Expand Up @@ -700,24 +721,35 @@ def uuid3(namespace, name):
"""Generate a UUID from the MD5 hash of a namespace UUID and a name."""
if isinstance(name, str):
name = bytes(name, "utf-8")
from hashlib import md5
digest = md5(
namespace.bytes + name,
usedforsecurity=False
).digest()
return UUID(bytes=digest[:16], version=3)
# HACL*-based MD5 is slightly faster than its OpenSSL version,
picnixz marked this conversation as resolved.
Show resolved Hide resolved
# and 'import X; X.Y' is slightly faster than 'from X import Y'.
import _md5
picnixz marked this conversation as resolved.
Show resolved Hide resolved
h = _md5.md5(namespace.bytes + name, usedforsecurity=False)
assert len(h.digest()) == 16
picnixz marked this conversation as resolved.
Show resolved Hide resolved
int_uuid_3 = int_.from_bytes(h.digest())
int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK
int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS
return UUID._from_int(int_uuid_3)

def uuid4():
"""Generate a random UUID."""
return UUID(bytes=os.urandom(16), version=4)
int_uuid_4 = int_.from_bytes(os.urandom(16))
int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK
int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS
return UUID._from_int(int_uuid_4)

def uuid5(namespace, name):
"""Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
if isinstance(name, str):
name = bytes(name, "utf-8")
from hashlib import sha1
hash = sha1(namespace.bytes + name).digest()
return UUID(bytes=hash[:16], version=5)
# OpenSSL-based SHA-1 is slightly faster than its HACL* version,
# and 'import X; X.Y' is slightly faster than 'from X import Y'.
import hashlib
h = hashlib.sha1(namespace.bytes + name, usedforsecurity=False)
int_uuid_5 = int_.from_bytes(h.digest()[:16])
int_uuid_5 &= _RFC_4122_CLEARFLAGS_MASK
int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS
return UUID._from_int(int_uuid_5)

def uuid8(a=None, b=None, c=None):
"""Generate a UUID from three custom blocks.
Expand All @@ -740,7 +772,9 @@ def uuid8(a=None, b=None, c=None):
int_uuid_8 = (a & 0xffff_ffff_ffff) << 80
int_uuid_8 |= (b & 0xfff) << 64
int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff
return UUID(int=int_uuid_8, version=8)
# by construction, the variant and version bits are already cleared
int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS
picnixz marked this conversation as resolved.
Show resolved Hide resolved
return UUID._from_int(int_uuid_8)

def main():
"""Run the uuid command line interface."""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8
via their dedicated functions by 20%. Patch by Bénédikt Tran.
Loading