-
-
Notifications
You must be signed in to change notification settings - Fork 30.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gh-128150: improve performances of uuid.uuid*
constructor functions.
#128151
base: main
Are you sure you want to change the base?
Changes from all commits
0d49ccb
603335f
154ff8b
b965887
a8a1894
c8aa752
8c9d5cf
a2278b8
0710549
5b6922f
e631593
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -668,6 +668,25 @@ io | |||||
file's bytes in full. (Contributed by Cody Maloney and Victor Stinner in | ||||||
:gh:`120754` and :gh:`90102`.) | ||||||
|
||||||
|
||||||
uuid | ||||||
---- | ||||||
|
||||||
* Improve generation of :class:`~uuid.UUID` objects via their dedicated | ||||||
functions: | ||||||
|
||||||
* :func:`~uuid.uuid3` is 47% faster for 16-byte names and 13% faster | ||||||
for 1024-byte names. Performances for longer names remain unchanged. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
* :func:`~uuid.uuid5` is 35% faster for 16-byte names and 24% faster | ||||||
for 1024-byte names. Performances for longer names remain unchanged. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
* :func:`~uuid.uuid4` is 33% faster and :func:`~uuid.uuid8` is 38% faster. | ||||||
|
||||||
Overall, dedicated generation of UUID objects version 3, 4, 5, and 8 is | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you already have the specific improvements I would remove this paragraph (or the other way around) |
||||||
roughly 20% faster. | ||||||
|
||||||
(Contributed by Bénédikt Tran in :gh:`128150`.) | ||||||
|
||||||
|
||||||
Deprecated | ||||||
========== | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -85,6 +85,14 @@ class SafeUUID: | |||||
unknown = None | ||||||
|
||||||
|
||||||
_RFC_4122_CLEARFLAGS_MASK = 0xffff_ffff_ffff_0fff_3fff_ffff_ffff_ffff | ||||||
_RFC_4122_VERSION_1_FLAGS = 0x0000_0000_0000_1000_8000_0000_0000_0000 | ||||||
_RFC_4122_VERSION_3_FLAGS = 0x0000_0000_0000_3000_8000_0000_0000_0000 | ||||||
_RFC_4122_VERSION_4_FLAGS = 0x0000_0000_0000_4000_8000_0000_0000_0000 | ||||||
_RFC_4122_VERSION_5_FLAGS = 0x0000_0000_0000_5000_8000_0000_0000_0000 | ||||||
_RFC_4122_VERSION_8_FLAGS = 0x0000_0000_0000_8000_8000_0000_0000_0000 | ||||||
|
||||||
|
||||||
class UUID: | ||||||
"""Instances of the UUID class represent UUIDs as specified in RFC 4122. | ||||||
UUID objects are immutable, hashable, and usable as dictionary keys. | ||||||
|
@@ -174,45 +182,49 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, | |||||
if [hex, bytes, bytes_le, fields, int].count(None) != 4: | ||||||
raise TypeError('one of the hex, bytes, bytes_le, fields, ' | ||||||
'or int arguments must be given') | ||||||
if hex is not None: | ||||||
if int is not None: | ||||||
pass | ||||||
elif hex is not None: | ||||||
hex = hex.replace('urn:', '').replace('uuid:', '') | ||||||
hex = hex.strip('{}').replace('-', '') | ||||||
if len(hex) != 32: | ||||||
raise ValueError('badly formed hexadecimal UUID string') | ||||||
int = int_(hex, 16) | ||||||
if bytes_le is not None: | ||||||
elif bytes_le is not None: | ||||||
if len(bytes_le) != 16: | ||||||
raise ValueError('bytes_le is not a 16-char string') | ||||||
assert isinstance(bytes_le, bytes_), repr(bytes_le) | ||||||
bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] + | ||||||
bytes_le[8-1:6-1:-1] + bytes_le[8:]) | ||||||
if bytes is not None: | ||||||
int = int_.from_bytes(bytes) | ||||||
elif bytes is not None: | ||||||
if len(bytes) != 16: | ||||||
raise ValueError('bytes is not a 16-char string') | ||||||
assert isinstance(bytes, bytes_), repr(bytes) | ||||||
int = int_.from_bytes(bytes) # big endian | ||||||
if fields is not None: | ||||||
elif fields is not None: | ||||||
if len(fields) != 6: | ||||||
raise ValueError('fields is not a 6-tuple') | ||||||
(time_low, time_mid, time_hi_version, | ||||||
clock_seq_hi_variant, clock_seq_low, node) = fields | ||||||
if not 0 <= time_low < 1<<32: | ||||||
if time_low < 0 or time_low > 0xffff_ffff: | ||||||
raise ValueError('field 1 out of range (need a 32-bit value)') | ||||||
if not 0 <= time_mid < 1<<16: | ||||||
if time_mid < 0 or time_mid > 0xffff: | ||||||
raise ValueError('field 2 out of range (need a 16-bit value)') | ||||||
if not 0 <= time_hi_version < 1<<16: | ||||||
if time_hi_version < 0 or time_hi_version > 0xffff: | ||||||
raise ValueError('field 3 out of range (need a 16-bit value)') | ||||||
if not 0 <= clock_seq_hi_variant < 1<<8: | ||||||
if clock_seq_hi_variant < 0 or clock_seq_hi_variant > 0xff: | ||||||
raise ValueError('field 4 out of range (need an 8-bit value)') | ||||||
if not 0 <= clock_seq_low < 1<<8: | ||||||
if clock_seq_low < 0 or clock_seq_low > 0xff: | ||||||
raise ValueError('field 5 out of range (need an 8-bit value)') | ||||||
if not 0 <= node < 1<<48: | ||||||
if node < 0 or node > 0xffff_ffff_ffff: | ||||||
raise ValueError('field 6 out of range (need a 48-bit value)') | ||||||
clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low | ||||||
int = ((time_low << 96) | (time_mid << 80) | | ||||||
(time_hi_version << 64) | (clock_seq << 48) | node) | ||||||
if int is not None: | ||||||
if not 0 <= int < 1<<128: | ||||||
raise ValueError('int is out of range (need a 128-bit value)') | ||||||
# "x < a or int > b" is slightly faster than "not (a <= x <= b)" | ||||||
if int < 0 or int > 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff: | ||||||
raise ValueError('int is out of range (need a 128-bit value)') | ||||||
if version is not None: | ||||||
if not 1 <= version <= 8: | ||||||
raise ValueError('illegal version number') | ||||||
|
@@ -225,6 +237,15 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, | |||||
object.__setattr__(self, 'int', int) | ||||||
object.__setattr__(self, 'is_safe', is_safe) | ||||||
|
||||||
@classmethod | ||||||
def _from_int(cls, int, *, is_safe=SafeUUID.unknown): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
"""Internal use only.""" | ||||||
assert int >= 0 and int <= 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff | ||||||
self = cls.__new__(cls) | ||||||
object.__setattr__(self, 'int', int) | ||||||
object.__setattr__(self, 'is_safe', is_safe) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
At this moment the argument is unused. Removing it makes the call faster. |
||||||
return self | ||||||
|
||||||
def __getstate__(self): | ||||||
d = {'int': self.int} | ||||||
if self.is_safe != SafeUUID.unknown: | ||||||
|
@@ -700,24 +721,35 @@ def uuid3(namespace, name): | |||||
"""Generate a UUID from the MD5 hash of a namespace UUID and a name.""" | ||||||
if isinstance(name, str): | ||||||
name = bytes(name, "utf-8") | ||||||
from hashlib import md5 | ||||||
digest = md5( | ||||||
namespace.bytes + name, | ||||||
usedforsecurity=False | ||||||
).digest() | ||||||
return UUID(bytes=digest[:16], version=3) | ||||||
# HACL*-based MD5 is slightly faster than its OpenSSL version, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would put these comments in the pr comments and leave them out of the code. (but I do see the value of them) |
||||||
# and 'import X; X.Y' is slightly faster than 'from X import Y'. | ||||||
import _md5 | ||||||
h = _md5.md5(namespace.bytes + name, usedforsecurity=False) | ||||||
assert len(h.digest()) == 16 | ||||||
int_uuid_3 = int_.from_bytes(h.digest()) | ||||||
int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK | ||||||
int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS | ||||||
return UUID._from_int(int_uuid_3) | ||||||
|
||||||
def uuid4(): | ||||||
"""Generate a random UUID.""" | ||||||
return UUID(bytes=os.urandom(16), version=4) | ||||||
int_uuid_4 = int_.from_bytes(os.urandom(16)) | ||||||
int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK | ||||||
int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS | ||||||
return UUID._from_int(int_uuid_4) | ||||||
|
||||||
def uuid5(namespace, name): | ||||||
"""Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" | ||||||
if isinstance(name, str): | ||||||
name = bytes(name, "utf-8") | ||||||
from hashlib import sha1 | ||||||
hash = sha1(namespace.bytes + name).digest() | ||||||
return UUID(bytes=hash[:16], version=5) | ||||||
# OpenSSL-based SHA-1 is slightly faster than its HACL* version, | ||||||
# and 'import X; X.Y' is slightly faster than 'from X import Y'. | ||||||
import hashlib | ||||||
h = hashlib.sha1(namespace.bytes + name, usedforsecurity=False) | ||||||
int_uuid_5 = int_.from_bytes(h.digest()[:16]) | ||||||
int_uuid_5 &= _RFC_4122_CLEARFLAGS_MASK | ||||||
int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS | ||||||
return UUID._from_int(int_uuid_5) | ||||||
|
||||||
def uuid8(a=None, b=None, c=None): | ||||||
"""Generate a UUID from three custom blocks. | ||||||
|
@@ -740,7 +772,9 @@ def uuid8(a=None, b=None, c=None): | |||||
int_uuid_8 = (a & 0xffff_ffff_ffff) << 80 | ||||||
int_uuid_8 |= (b & 0xfff) << 64 | ||||||
int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff | ||||||
return UUID(int=int_uuid_8, version=8) | ||||||
# by construction, the variant and version bits are already cleared | ||||||
int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS | ||||||
return UUID._from_int(int_uuid_8) | ||||||
|
||||||
def main(): | ||||||
"""Run the uuid command line interface.""" | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8 | ||
via their dedicated functions by 20%. Patch by Bénédikt Tran. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.