From a0348e8dedb9a7ee53af8f88adbba0e5aeff577d Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 9 Jan 2026 19:31:19 +0200 Subject: [PATCH 1/1] Make the deduplication in ginExtractEntries() a little faster The idea is to remove NULLs from the array first, and use qsort to deduplicate only the non-NULL items. That simplifies the comparison function a little. --- src/backend/access/gin/ginutil.c | 139 +++++++++++++------------------ src/include/access/gin_private.h | 2 +- 2 files changed, 61 insertions(+), 80 deletions(-) diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index d205093e21d..29966b9d05b 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -387,19 +387,6 @@ GinInitMetabuffer(Buffer b) ((char *) metadata + sizeof(GinMetaPageData)) - (char *) page; } -/* - * Support for sorting key datums in ginExtractEntries - * - * Note: we only have to worry about null and not-null keys here; - * ginExtractEntries never generates more than one placeholder null, - * so it doesn't have to sort those. - */ -typedef struct -{ - Datum datum; - bool isnull; -} keyEntryData; - typedef struct { FmgrInfo *cmpDatumFunc; @@ -410,24 +397,14 @@ typedef struct static int cmpEntries(const void *a, const void *b, void *arg) { - const keyEntryData *aa = (const keyEntryData *) a; - const keyEntryData *bb = (const keyEntryData *) b; + const Datum *aa = (const Datum *) a; + const Datum *bb = (const Datum *) b; cmpEntriesArg *data = (cmpEntriesArg *) arg; int res; - if (aa->isnull) - { - if (bb->isnull) - res = 0; /* NULL "=" NULL */ - else - res = 1; /* NULL ">" not-NULL */ - } - else if (bb->isnull) - res = -1; /* not-NULL "<" NULL */ - else - res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc, - data->collation, - aa->datum, bb->datum)); + res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc, + data->collation, + *aa, *bb)); /* * Detect if we have any duplicates. If there are equal keys, qsort must @@ -450,11 +427,13 @@ cmpEntries(const void *a, const void *b, void *arg) Datum * ginExtractEntries(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, - int32 *nentries, GinNullCategory **categories) + int32 *nentries_p, GinNullCategory **categories_p) { Datum *entries; bool *nullFlags; - int32 i; + GinNullCategory *categories; + bool hasNull; + int32 nentries; /* * We don't call the extractValueFn on a null item. Instead generate a @@ -462,42 +441,60 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum, */ if (isNull) { - *nentries = 1; + *nentries_p = 1; entries = palloc_object(Datum); entries[0] = (Datum) 0; - *categories = palloc_object(GinNullCategory); - (*categories)[0] = GIN_CAT_NULL_ITEM; + *categories_p = palloc_object(GinNullCategory); + (*categories_p)[0] = GIN_CAT_NULL_ITEM; return entries; } /* OK, call the opclass's extractValueFn */ nullFlags = NULL; /* in case extractValue doesn't set it */ + nentries = 0; entries = (Datum *) DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1], ginstate->supportCollation[attnum - 1], value, - PointerGetDatum(nentries), + PointerGetDatum(&nentries), PointerGetDatum(&nullFlags))); /* * Generate a placeholder if the item contained no keys. */ - if (entries == NULL || *nentries <= 0) + if (entries == NULL || nentries <= 0) { - *nentries = 1; + *nentries_p = 1; entries = palloc_object(Datum); entries[0] = (Datum) 0; - *categories = palloc_object(GinNullCategory); - (*categories)[0] = GIN_CAT_EMPTY_ITEM; + *categories_p = palloc_object(GinNullCategory); + (*categories_p)[0] = GIN_CAT_EMPTY_ITEM; return entries; } /* - * If the extractValueFn didn't create a nullFlags array, create one, - * assuming that everything's non-null. + * Scan the items for any NULLs. All NULLs are considered equal, so we + * just need to check and remember if there are any. We remove them from + * the array here, and if necessary, put back one NULL entry to represent + * them all after deduplication. */ - if (nullFlags == NULL) - nullFlags = (bool *) palloc0(*nentries * sizeof(bool)); + hasNull = false; + if (nullFlags) + { + int32 numNonNulls = 0; + + for (int32 i = 0; i < nentries; i++) + { + if (nullFlags[i]) + hasNull = true; + else + { + entries[numNonNulls] = entries[i]; + numNonNulls++; + } + } + nentries = numNonNulls; + } /* * If there's more than one key, sort and unique-ify. @@ -506,63 +503,47 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum, * pretty bad too. For small numbers of keys it'd likely be better to use * a simple insertion sort. */ - if (*nentries > 1) + if (nentries > 1) { - keyEntryData *keydata; cmpEntriesArg arg; - keydata = palloc_array(keyEntryData, *nentries); - for (i = 0; i < *nentries; i++) - { - keydata[i].datum = entries[i]; - keydata[i].isnull = nullFlags[i]; - } - arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1]; arg.collation = ginstate->supportCollation[attnum - 1]; arg.haveDups = false; - qsort_arg(keydata, *nentries, sizeof(keyEntryData), + qsort_arg(entries, nentries, sizeof(Datum), cmpEntries, &arg); if (arg.haveDups) { /* there are duplicates, must get rid of 'em */ - int32 j; + int32 j = 1; - entries[0] = keydata[0].datum; - nullFlags[0] = keydata[0].isnull; - j = 1; - for (i = 1; i < *nentries; i++) + for (int32 i = 1; i < nentries; i++) { - if (cmpEntries(&keydata[i - 1], &keydata[i], &arg) != 0) - { - entries[j] = keydata[i].datum; - nullFlags[j] = keydata[i].isnull; - j++; - } + if (cmpEntries(&entries[i - 1], &entries[i], &arg) != 0) + entries[j++] = entries[i]; } - *nentries = j; + nentries = j; } - else - { - /* easy, no duplicates */ - for (i = 0; i < *nentries; i++) - { - entries[i] = keydata[i].datum; - nullFlags[i] = keydata[i].isnull; - } - } - - pfree(keydata); } /* - * Create GinNullCategory representation from nullFlags. + * Create GinNullCategory representation. */ - *categories = (GinNullCategory *) palloc0(*nentries * sizeof(GinNullCategory)); - for (i = 0; i < *nentries; i++) - (*categories)[i] = (nullFlags[i] ? GIN_CAT_NULL_KEY : GIN_CAT_NORM_KEY); + categories = (GinNullCategory *) palloc((nentries + (hasNull ? 1 : 0)) * sizeof(GinNullCategory)); + for (int32 i = 0; i < nentries; i++) + categories[i] = GIN_CAT_NORM_KEY; + + /* Put back a NULL entry, if there were any */ + if (hasNull) + { + entries[nentries] = (Datum) 0; + categories[nentries] = GIN_CAT_NULL_KEY; + nentries++; + } + *nentries_p = nentries; + *categories_p = categories; return entries; } diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index e155045ce8a..1e97e6cb3c9 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -99,7 +99,7 @@ extern void GinInitPage(Page page, uint32 f, Size pageSize); extern void GinInitMetabuffer(Buffer b); extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, - int32 *nentries, GinNullCategory **categories); + int32 *nentries_p, GinNullCategory **categories_p); extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple); extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, -- 2.47.3