-
-
Notifications
You must be signed in to change notification settings - Fork 34.5k
gh-89083: add support for UUID version 7 (RFC 9562) #121119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
42d55b4
6826fa1
edc2cab
c6d26b6
2ddb4b8
bcd1417
4630c8f
cd80afb
c3d4745
392d289
26889ea
44b66e6
7be6dc4
8ba3d8b
a14ae9b
7a169c9
b082c90
94c70e9
05b7a2b
275deb7
5e97cc3
051f34e
bdf9a77
00661fc
0474de4
a446d53
2e39072
ebc1a07
694e07f
965dbc8
7ff4368
7c3cab6
e758741
c18d0c4
2df6f41
6fcb6a1
f6048c9
be3f024
99c6761
06befca
2aacadf
f7f536e
aee2898
1a5ac19
8764b28
af0baef
939b5a8
ef85b20
2d08821
eaa9ad4
571d2fe
f9ac658
a756b9d
4406796
d4eeded
0e54a72
40ab2fa
5ee85ad
3ce8943
59e6d7e
437d8cf
2d917b0
73ab656
54d07ae
6d76389
bd4ab55
e9ddb74
8755de0
12d7ad4
560d87c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -1,8 +1,9 @@ | ||||||||
| r"""UUID objects (universally unique identifiers) according to RFC 4122. | ||||||||
|
|
||||||||
| This module provides immutable UUID objects (class UUID) and the functions | ||||||||
| uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5 | ||||||||
| UUIDs as specified in RFC 4122. | ||||||||
| uuid1(), uuid3(), uuid4(), uuid5(), and uuid7() for generating version 1, 3, | ||||||||
| 4, 5, and 7 UUIDs as specified in RFC 4122 (superseeded by RFC 9562 but still | ||||||||
| referred to as RFC 4122 for compatibility purposes). | ||||||||
|
|
||||||||
| If all you want is a unique ID, you should probably call uuid1() or uuid4(). | ||||||||
| Note that uuid1() may compromise privacy since it creates a UUID containing | ||||||||
|
|
@@ -129,7 +130,7 @@ class UUID: | |||||||
| variant the UUID variant (one of the constants RESERVED_NCS, | ||||||||
| RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE) | ||||||||
|
|
||||||||
| version the UUID version number (1 through 5, meaningful only | ||||||||
| version the UUID version number (1, 3, 4, 5 and 7, meaningful only | ||||||||
| when the variant is RFC_4122) | ||||||||
|
|
||||||||
| is_safe An enum indicating whether the UUID has been generated in | ||||||||
|
|
@@ -214,7 +215,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, | |||||||
| if not 0 <= int < 1<<128: | ||||||||
| raise ValueError('int is out of range (need a 128-bit value)') | ||||||||
| if version is not None: | ||||||||
| if not 1 <= version <= 5: | ||||||||
| if not 1 <= version <= 7: | ||||||||
| raise ValueError('illegal version number') | ||||||||
| # Set the variant to RFC 4122. | ||||||||
| int &= ~(0xc000 << 48) | ||||||||
|
|
@@ -656,7 +657,7 @@ def getnode(): | |||||||
| assert False, '_random_getnode() returned invalid value: {}'.format(_node) | ||||||||
|
|
||||||||
|
|
||||||||
| _last_timestamp = None | ||||||||
| _last_timestamp_v1 = None | ||||||||
|
|
||||||||
| def uuid1(node=None, clock_seq=None): | ||||||||
| """Generate a UUID from a host ID, sequence number, and the current time. | ||||||||
|
|
@@ -674,15 +675,15 @@ def uuid1(node=None, clock_seq=None): | |||||||
| is_safe = SafeUUID.unknown | ||||||||
| return UUID(bytes=uuid_time, is_safe=is_safe) | ||||||||
|
|
||||||||
| global _last_timestamp | ||||||||
| global _last_timestamp_v1 | ||||||||
| import time | ||||||||
| nanoseconds = time.time_ns() | ||||||||
| # 0x01b21dd213814000 is the number of 100-ns intervals between the | ||||||||
| # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. | ||||||||
| timestamp = nanoseconds // 100 + 0x01b21dd213814000 | ||||||||
| if _last_timestamp is not None and timestamp <= _last_timestamp: | ||||||||
| timestamp = _last_timestamp + 1 | ||||||||
| _last_timestamp = timestamp | ||||||||
| if _last_timestamp_v1 is not None and timestamp <= _last_timestamp_v1: | ||||||||
| timestamp = _last_timestamp_v1 + 1 | ||||||||
| _last_timestamp_v1 = timestamp | ||||||||
| if clock_seq is None: | ||||||||
| import random | ||||||||
| clock_seq = random.getrandbits(14) # instead of stable storage | ||||||||
|
|
@@ -719,14 +720,72 @@ def uuid5(namespace, name): | |||||||
| hash = sha1(namespace.bytes + name).digest() | ||||||||
| return UUID(bytes=hash[:16], version=5) | ||||||||
|
|
||||||||
| _last_timestamp_v7 = None | ||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to apply a PEP-8 change in a separate PR because the module has inconsistencies. It seems a bit weird to only PEP-8ify this part of the code while the rest is not really PEP-8ified. See #121119 (comment).
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. python-dev doesn’t have a practice of doing reformatting-only PRs. Instead, follow good conventions in code that is added or already changed.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well... if a core dev endorses the change, I think it's fine. I don't mind endorsing it. I didn't do it for uuid6() nor for uuid8() when I wrote the function as there were more 1-blank lines separations rather than 2 blank lines separations. But if you insist on adding 2 blank lines, I'll also add them around the other functions because I prefer being consistent in this case (honestly, having 2 blank lines around only UUIDv7 makes it harder to read IMO).
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say PEP-8 tells me that we can also ignore the PEP if the surrounding code already breaks it. But I will make a commit to just add blank lines around the functions I've added (uuid6 to uuid8).
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that it's worth it to reformat the whole uuid.py file to PEP 8, but respecting PEP 8 for new code (or code near changed code) is a good practice.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, adding a few blank lines is innocuous (it does not change git blame, or risk changing the meaning of code), so it’s fine to do in existing code in this PR. Generally people saying they want to «apply PEP 8» think of more bigger changes. [note: marking this convo as unresolved just to help Victor or Hugo see it, not because there’s something left to do for the PR author]
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is about for example methods using camelCase in unittest or logging, not spaces! |
||||||||
| _last_counter_v7 = 0 # 42-bit counter | ||||||||
|
|
||||||||
| def uuid7(): | ||||||||
|
picnixz marked this conversation as resolved.
|
||||||||
| """Generate a UUID from a Unix timestamp in milliseconds and random bits. | ||||||||
|
|
||||||||
| UUIDv7 objects feature monotonicity within a millisecond. | ||||||||
| """ | ||||||||
| # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - | ||||||||
| # unix_ts_ms | version | counter_hi | variant | counter_lo | random | ||||||||
| # | ||||||||
| # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed | ||||||||
| # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0. | ||||||||
| # | ||||||||
| # 'random' is a 32-bit random value regenerated for every new UUID. | ||||||||
| # | ||||||||
| # If multiple UUIDs are generated within the same millisecond, the LSB | ||||||||
| # of 'counter' is incremented by 1. When overflowing, the timestamp is | ||||||||
| # advanced and the counter is reset to a random 42-bit integer with MSB | ||||||||
| # set to 0. | ||||||||
|
|
||||||||
| def get_counter_and_tail(): | ||||||||
|
picnixz marked this conversation as resolved.
Outdated
|
||||||||
| rand = int.from_bytes(os.urandom(10)) | ||||||||
| # 42-bit counter with MSB set to 0 | ||||||||
| counter = (rand >> 32) & 0x1ffffffffff | ||||||||
| # 32-bit random data | ||||||||
| tail = rand & 0xffffffff | ||||||||
|
picnixz marked this conversation as resolved.
Outdated
|
||||||||
| return counter, tail | ||||||||
|
|
||||||||
| global _last_timestamp_v7 | ||||||||
| global _last_counter_v7 | ||||||||
|
|
||||||||
| import time | ||||||||
|
picnixz marked this conversation as resolved.
Outdated
|
||||||||
| nanoseconds = time.time_ns() | ||||||||
| timestamp_ms, _ = divmod(nanoseconds, 1_000_000) | ||||||||
|
|
||||||||
| if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: | ||||||||
| counter, tail = get_counter_and_tail() | ||||||||
| else: | ||||||||
| if timestamp_ms < _last_timestamp_v7: | ||||||||
| timestamp_ms = _last_timestamp_v7 + 1 | ||||||||
|
hugovk marked this conversation as resolved.
|
||||||||
| # advance the counter | ||||||||
| counter = _last_counter_v7 + 1 | ||||||||
| if counter > 0x3ffffffffff: | ||||||||
| timestamp_ms += 1 # advance the timestamp | ||||||||
| counter, tail = get_counter_and_tail() | ||||||||
| else: | ||||||||
| tail = int.from_bytes(os.urandom(4)) | ||||||||
|
picnixz marked this conversation as resolved.
|
||||||||
|
|
||||||||
| _last_timestamp_v7 = timestamp_ms | ||||||||
|
picnixz marked this conversation as resolved.
|
||||||||
| _last_counter_v7 = counter | ||||||||
|
picnixz marked this conversation as resolved.
Outdated
|
||||||||
|
|
||||||||
| int_uuid_7 = (timestamp_ms & 0xffffffffffff) << 80 | ||||||||
| int_uuid_7 |= ((counter >> 30) & 0xfff) << 64 | ||||||||
| int_uuid_7 |= (counter & 0x3fffffff) << 32 | ||||||||
| int_uuid_7 |= tail & 0xffffffff | ||||||||
| return UUID(int=int_uuid_7, version=7) | ||||||||
|
|
||||||||
| def main(): | ||||||||
| """Run the uuid command line interface.""" | ||||||||
| uuid_funcs = { | ||||||||
| "uuid1": uuid1, | ||||||||
| "uuid3": uuid3, | ||||||||
| "uuid4": uuid4, | ||||||||
| "uuid5": uuid5 | ||||||||
| "uuid5": uuid5, | ||||||||
| "uuid7": uuid7, | ||||||||
| } | ||||||||
| uuid_namespace_funcs = ("uuid3", "uuid5") | ||||||||
| namespaces = { | ||||||||
|
|
||||||||
Uh oh!
There was an error while loading. Please reload this page.