From 98726fc9b75d9abaec8bb4b305531e79d16bf6b0 Mon Sep 17 00:00:00 2001 From: "Andrey M. Borodin" Date: Sun, 20 Aug 2023 23:55:31 +0300 Subject: [PATCH v11] Implement UUID v7 as per IETF draft This commit adds function to generate UUID v7. This function optionally accepts datetime used to generate next UUID. Also we add a function to extract timestamp from UUID v7. Authors: Andrey Borodin, Sergey Prokhorenko --- doc/src/sgml/func.sgml | 36 ++++- src/backend/utils/adt/uuid.c | 195 +++++++++++++++++++++++ src/include/catalog/pg_proc.dat | 19 +++ src/test/regress/expected/opr_sanity.out | 13 +- src/test/regress/expected/uuid.out | 88 ++++++++++ src/test/regress/sql/uuid.sql | 35 ++++ 6 files changed, 380 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 210c7c0b02..1d4d48d7cb 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -14130,13 +14130,43 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple gen_random_uuid + + uuidv7 + + + + uuid_extract_time + + - PostgreSQL includes one function to generate a UUID: + PostgreSQL includes several functions to generate a UUID: + gen_random_uuid, uuidv4, and uuidv7. gen_random_uuid () uuid +uuidv4 () uuid + + Both functions return a version 4 (random) UUID. This is the most commonly + used type of UUID and is appropriate when random distribution of keys does + not affect performance of an application. + +uuidv7 () uuid + + This function returns a version 7 (time-ordered + random) UUID. This UUID + version should be used when application prefers locality of identifiers. + +uuid_extract_time (uuid) timestamptz + + This function extracts a timestamptz from UUID versions 1,6 and 7. For other + versions and variants this function returns NULL. + +uuid_extract_ver (uuid) int2 + + This function extracts a version bits from UUID of variants described by + IETF standard (b10xx variant). For other variants this function returns NULL. + +uuid_extract_var (uuid) int2 - This function returns a version 4 (random) UUID. This is the most commonly - used type of UUID and is appropriate for most applications. + This function extracts a vartiant bits from UUID. diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index 73dfd711c7..6125061b35 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -13,13 +13,18 @@ #include "postgres.h" +#include + +#include "access/xlog.h" #include "common/hashfn.h" #include "lib/hyperloglog.h" #include "libpq/pqformat.h" #include "port/pg_bswap.h" #include "utils/builtins.h" +#include "utils/datetime.h" #include "utils/guc.h" #include "utils/sortsupport.h" +#include "utils/timestamp.h" #include "utils/uuid.h" /* sortsupport for uuid */ @@ -421,3 +426,193 @@ gen_random_uuid(PG_FUNCTION_ARGS) PG_RETURN_UUID_P(uuid); } + +static uint32_t sequence_counter; +static uint64_t previous_timestamp = 0; +static bool external_times_used = false; + + +Datum +uuidv7(PG_FUNCTION_ARGS) +{ + pg_uuid_t *uuid = palloc(UUID_LEN); + TimestampTz ts; + uint64_t tms; + struct timeval tp; + bool increment_counter; + + if (PG_NARGS() == 0 || PG_ARGISNULL(0)) + { + gettimeofday(&tp, NULL); + tms = ((uint64_t)tp.tv_sec) * 1000 + (tp.tv_usec) / 1000; + /* time from clock is protected from backward leaps */ + increment_counter = (tms <= previous_timestamp) && !external_times_used; + external_times_used = false; + } + else + { + ts = PG_GETARG_TIMESTAMPTZ(0); + tms = (ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC) / 1000; + /* + * The time can leap backwards when provided by the user, so we use + * counter only when called with exactly same unix_ts_ms argument. + */ + increment_counter = (tms == previous_timestamp); + external_times_used = true; + } + + if (increment_counter) + { + /* Time did not increment from the previous generation, we must increment counter */ + ++sequence_counter; + if (sequence_counter > 0x3ffff) + { + /* We only have 18-bit counter */ + sequence_counter = 0; + previous_timestamp++; + } + + /* protection from leap backward */ + tms = previous_timestamp; + + /* fill everything after the timestamp and counter with random bytes */ + if (!pg_strong_random(&uuid->data[8], UUID_LEN - 8)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not generate random values"))); + + /* most significant 4 bits of 18-bit counter */ + uuid->data[6] = (unsigned char)(sequence_counter >> 14); + /* next 8 bits */ + uuid->data[7] = (unsigned char)(sequence_counter >> 6); + /* least significant 6 bits */ + uuid->data[8] = (unsigned char)(sequence_counter); + } + else + { + /* fill everything after the timestamp with random bytes */ + if (!pg_strong_random(&uuid->data[6], UUID_LEN - 6)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not generate random values"))); + + /* + * Left-most counter bits are initialized as zero for the sole purpose + * of guarding against counter rollovers. + * See section "Fixed-Length Dedicated Counter Seeding" + * https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis-09#monotonicity_counters + */ + uuid->data[6] = (uuid->data[6] & 0xf7); + + sequence_counter = ((uint32_t)uuid->data[8] & 0x3f) + + (((uint32_t)uuid->data[7]) << 6) + + (((uint32_t)uuid->data[6] & 0x0f) << 14); + + previous_timestamp = tms; + } + + /* Fill in time part */ + uuid->data[0] = (unsigned char)(tms >> 40); + uuid->data[1] = (unsigned char)(tms >> 32); + uuid->data[2] = (unsigned char)(tms >> 24); + uuid->data[3] = (unsigned char)(tms >> 16); + uuid->data[4] = (unsigned char)(tms >> 8); + uuid->data[5] = (unsigned char)tms; + + /* + * Set magic numbers for a "version 7" (pseudorandom) UUID, see + * http://tools.ietf.org/html/rfc ??? + * https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format#name-creating-a-uuidv7-value + */ + /* set version field, top four bits are 0, 1, 1, 1 */ + uuid->data[6] = (uuid->data[6] & 0x0f) | 0x70; + /* set variant field, top two bits are 1, 0 */ + uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; + + PG_RETURN_UUID_P(uuid); +} + +Datum +uuid_extract_time(PG_FUNCTION_ARGS) +{ + pg_uuid_t *uuid = PG_GETARG_UUID_P(0); + TimestampTz ts; + uint64_t tms; + + if ((uuid->data[8] & 0xc0) != 0x80) + PG_RETURN_NULL(); + + if ((uuid->data[6] & 0xf0) == 0x70) + { + tms = uuid->data[5]; + tms += ((uint64_t)uuid->data[4]) << 8; + tms += ((uint64_t)uuid->data[3]) << 16; + tms += ((uint64_t)uuid->data[2]) << 24; + tms += ((uint64_t)uuid->data[1]) << 32; + tms += ((uint64_t)uuid->data[0]) << 40; + + ts = (TimestampTz) (tms * 1000) - /* convert ms to us, than adjust */ + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC; + + PG_RETURN_TIMESTAMPTZ(ts); + } + + if ((uuid->data[6] & 0xf0) == 0x10) + { + tms = ((uint64_t)uuid->data[0]) << 24; + tms += ((uint64_t)uuid->data[1]) << 16; + tms += ((uint64_t)uuid->data[2]) << 8; + tms += ((uint64_t)uuid->data[3]); + tms += ((uint64_t)uuid->data[4]) << 40; + tms += ((uint64_t)uuid->data[5]) << 32; + tms += (((uint64_t)uuid->data[6])&0xf) << 56; + tms += ((uint64_t)uuid->data[7]) << 48; + + ts = (TimestampTz) (tms / 10) - /* convert 100-ns intervals to us, than adjust */ + ((uint64_t)POSTGRES_EPOCH_JDATE - date2j(1582,10,15)) * SECS_PER_DAY * USECS_PER_SEC; + + PG_RETURN_TIMESTAMPTZ(ts); + } + + if ((uuid->data[6] & 0xf0) == 0x60) + { + tms = ((uint64_t)uuid->data[0]) << 52; + tms += ((uint64_t)uuid->data[1]) << 44; + tms += ((uint64_t)uuid->data[2]) << 36; + tms += ((uint64_t)uuid->data[3]) << 28; + tms += ((uint64_t)uuid->data[4]) << 20; + tms += ((uint64_t)uuid->data[5]) << 12; + tms += (((uint64_t)uuid->data[6])&0xf) << 8; + tms += ((uint64_t)uuid->data[7]); + + ts = (TimestampTz) (tms / 10) - /* convert 100-ns intervals to us, than adjust */ + ((uint64_t)POSTGRES_EPOCH_JDATE - date2j(1582,10,15)) * SECS_PER_DAY * USECS_PER_SEC; + + PG_RETURN_TIMESTAMPTZ(ts); + } + + PG_RETURN_NULL(); +} + +Datum +uuid_extract_ver(PG_FUNCTION_ARGS) +{ + pg_uuid_t *uuid = PG_GETARG_UUID_P(0); + uint16_t result; + + if ((uuid->data[8] & 0xc0) != 0x80) + PG_RETURN_NULL(); + result = uuid->data[6] >> 4; + + PG_RETURN_UINT16(result); +} + +Datum +uuid_extract_var(PG_FUNCTION_ARGS) +{ + pg_uuid_t *uuid = PG_GETARG_UUID_P(0); + uint16_t result; + result = uuid->data[8] >> 6; + + PG_RETURN_UINT16(result); +} \ No newline at end of file diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 58811a6530..956fb08ce9 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -9174,6 +9174,25 @@ { oid => '3432', descr => 'generate random UUID', proname => 'gen_random_uuid', proleakproof => 't', provolatile => 'v', prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' }, +{ oid => '9895', descr => 'generate random UUID', + proname => 'uuidv4', proleakproof => 't', provolatile => 'v', + prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' }, +{ oid => '9896', descr => 'generate UUID version 7', + proname => 'uuidv7', proleakproof => 't', provolatile => 'v', + prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' }, +{ oid => '9897', descr => 'generate UUID version 7', proisstrict => 'f', + proname => 'uuidv7', proleakproof => 't', provolatile => 'v', + prorettype => 'uuid', proargtypes => 'timestamptz', prosrc => 'uuidv7', + proargnames => '{unix_ts_ms}', proargmodes => '{i}' }, +{ oid => '9898', descr => 'extract timestamp from UUID version 7', + proname => 'uuid_extract_time', proleakproof => 't', + prorettype => 'timestamptz', proargtypes => 'uuid', prosrc => 'uuid_extract_time' }, +{ oid => '9899', descr => 'extract version from RFC 4122 UUID', + proname => 'uuid_extract_ver', proleakproof => 't', + prorettype => 'int2', proargtypes => 'uuid', prosrc => 'uuid_extract_ver' }, +{ oid => '9900', descr => 'extract variant from UUID', + proname => 'uuid_extract_var', proleakproof => 't', + prorettype => 'int2', proargtypes => 'uuid', prosrc => 'uuid_extract_var' }, # pg_lsn { oid => '3229', descr => 'I/O', diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 7610b011d6..1c37533975 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -126,9 +126,10 @@ WHERE p1.oid < p2.oid AND p1.proretset != p2.proretset OR p1.provolatile != p2.provolatile OR p1.pronargs != p2.pronargs); - oid | proname | oid | proname ------+---------+-----+--------- -(0 rows) + oid | proname | oid | proname +------+---------+------+--------- + 9896 | uuidv7 | 9897 | uuidv7 +(1 row) -- Look for uses of different type OIDs in the argument/result type fields -- for different aliases of the same built-in function. @@ -872,6 +873,12 @@ xid8ge(xid8,xid8) xid8eq(xid8,xid8) xid8ne(xid8,xid8) xid8cmp(xid8,xid8) +uuidv4() +uuidv7() +uuidv7(timestamp with time zone) +uuid_extract_time(uuid) +uuid_extract_ver(uuid) +uuid_extract_var(uuid) -- restore normal output mode \a\t -- List of functions used by libpq's fe-lobj.c diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out index 8e7f21910d..df78fd0385 100644 --- a/src/test/regress/expected/uuid.out +++ b/src/test/regress/expected/uuid.out @@ -168,5 +168,93 @@ SELECT count(DISTINCT guid_field) FROM guid1; 2 (1 row) +-- test of uuidv4() alias +TRUNCATE guid1; +INSERT INTO guid1 (guid_field) VALUES (uuidv4()); +INSERT INTO guid1 (guid_field) VALUES (uuidv4()); +SELECT count(DISTINCT guid_field) FROM guid1; + count +------- + 2 +(1 row) + +-- generation test for v7 +TRUNCATE guid1; +INSERT INTO guid1 (guid_field) VALUES (uuidv7()); +INSERT INTO guid1 (guid_field) VALUES (uuidv7()); +SELECT count(DISTINCT guid_field) FROM guid1; + count +------- + 2 +(1 row) + +-- generation test for v7 with same unix_ts_ms +TRUNCATE guid1; +INSERT INTO guid1 (guid_field) VALUES (uuidv7(now())); +INSERT INTO guid1 (guid_field) VALUES (uuidv7(now())); +SELECT count(DISTINCT guid_field) FROM guid1; + count +------- + 2 +(1 row) + +-- check that timestamp is extracted correctly +SELECT uuid_extract_time(uuidv7(TIMESTAMP '2024-01-16 13:37:00')) - TIMESTAMP '2024-01-16 13:37:00'; + ?column? +---------- + @ 0 +(1 row) + +-- support functions for UUID versions and variants +SELECT uuid_extract_ver(uuidv7()); + uuid_extract_ver +------------------ + 7 +(1 row) + +SELECT uuid_extract_ver('{11111111-1111-1111-1111-111111111111}') IS NULL; + ?column? +---------- + t +(1 row) + +SELECT uuid_extract_ver('{11111111-1111-5111-8111-111111111111}'); + uuid_extract_ver +------------------ + 5 +(1 row) + +SELECT uuid_extract_var(uuidv7()); + uuid_extract_var +------------------ + 2 +(1 row) + +-- uuid_extract_time() must refuse to accept non-UUIDv7 +SELECT uuid_extract_time(gen_random_uuid()); + uuid_extract_time +------------------- + +(1 row) + +-- extract UUID v1, v6 and v7 timestamp +SELECT uuid_extract_time('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; + ?column? +---------- + t +(1 row) + +SELECT uuid_extract_time('1EC9414C-232A-6B00-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; + ?column? +---------- + t +(1 row) + +SELECT uuid_extract_time('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; + ?column? +---------- + t +(1 row) + -- clean up DROP TABLE guid1, guid2 CASCADE; diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql index 9a8f437c7d..c7a09dd21d 100644 --- a/src/test/regress/sql/uuid.sql +++ b/src/test/regress/sql/uuid.sql @@ -85,5 +85,40 @@ INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid()); INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid()); SELECT count(DISTINCT guid_field) FROM guid1; +-- test of uuidv4() alias +TRUNCATE guid1; +INSERT INTO guid1 (guid_field) VALUES (uuidv4()); +INSERT INTO guid1 (guid_field) VALUES (uuidv4()); +SELECT count(DISTINCT guid_field) FROM guid1; + +-- generation test for v7 +TRUNCATE guid1; +INSERT INTO guid1 (guid_field) VALUES (uuidv7()); +INSERT INTO guid1 (guid_field) VALUES (uuidv7()); +SELECT count(DISTINCT guid_field) FROM guid1; + +-- generation test for v7 with same unix_ts_ms +TRUNCATE guid1; +INSERT INTO guid1 (guid_field) VALUES (uuidv7(now())); +INSERT INTO guid1 (guid_field) VALUES (uuidv7(now())); +SELECT count(DISTINCT guid_field) FROM guid1; + +-- check that timestamp is extracted correctly +SELECT uuid_extract_time(uuidv7(TIMESTAMP '2024-01-16 13:37:00')) - TIMESTAMP '2024-01-16 13:37:00'; + +-- support functions for UUID versions and variants +SELECT uuid_extract_ver(uuidv7()); +SELECT uuid_extract_ver('{11111111-1111-1111-1111-111111111111}') IS NULL; +SELECT uuid_extract_ver('{11111111-1111-5111-8111-111111111111}'); +SELECT uuid_extract_var(uuidv7()); + +-- uuid_extract_time() must refuse to accept non-UUIDv7 +SELECT uuid_extract_time(gen_random_uuid()); + +-- extract UUID v1, v6 and v7 timestamp +SELECT uuid_extract_time('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; +SELECT uuid_extract_time('1EC9414C-232A-6B00-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; +SELECT uuid_extract_time('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; + -- clean up DROP TABLE guid1, guid2 CASCADE; -- 2.37.1 (Apple Git-137.1)