From ca5aec2c40c33d0a8768d8d78b11872c56613d96 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Fri, 5 Jul 2024 20:58:37 +0200 Subject: [PATCH v20240712 09/10] Reduce the size of GinTuple by 12 bytes The size of a Gin tuple can't be larger than what we can allocate, which is just shy of 1GB; this reduces the number of useful bits in size fields to 30 bits; so int will be enough here. Next, a key must fit in a single page (up to 32KB), so uint16 should be enough for the keylen attribute. Then, re-organize the fields to minimize alignment losses, while maintaining an order that does make logical grouping sense. Finally, use the first posting list to get the first stored ItemPointer; this deduplicates stored data and thus improves performance again. In passing, adjust the alignment of the first GinPostingList in GinTuple from MAXALIGN to SHORTALIGN. --- src/backend/access/gin/gininsert.c | 21 ++++++++++++--------- src/include/access/gin_tuple.h | 19 +++++++++++++++---- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index e02cb6d0e6..b9444b6db7 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -1550,7 +1550,8 @@ GinBufferMergeTuple(GinBuffer *buffer, GinTuple *tup) * when merging non-overlapping lists, e.g. in each parallel worker. */ if ((buffer->nitems > 0) && - (ItemPointerCompare(&buffer->items[buffer->nitems - 1], &tup->first) == 0)) + (ItemPointerCompare(&buffer->items[buffer->nitems - 1], + GinTupleGetFirst(tup)) == 0)) buffer->nfrozen = buffer->nitems; /* @@ -1567,7 +1568,8 @@ GinBufferMergeTuple(GinBuffer *buffer, GinTuple *tup) for (int i = buffer->nfrozen; i < buffer->nitems; i++) { /* Is the TID after the first TID of the new tuple? Can't freeze. */ - if (ItemPointerCompare(&buffer->items[i], &tup->first) > 0) + if (ItemPointerCompare(&buffer->items[i], + GinTupleGetFirst(tup)) > 0) break; buffer->nfrozen++; @@ -2176,7 +2178,7 @@ _gin_build_tuple(GinBuildState *state, * alignment, to allow direct access to compressed segments (those require * SHORTALIGN, but we do MAXALING anyway). */ - tuplen = MAXALIGN(offsetof(GinTuple, data) + keylen) + compresslen; + tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen; /* * Allocate space for the whole GIN tuple. @@ -2191,7 +2193,6 @@ _gin_build_tuple(GinBuildState *state, tuple->category = category; tuple->keylen = keylen; tuple->nitems = nitems; - tuple->first = items[0]; /* key type info */ tuple->typlen = typlen; @@ -2222,7 +2223,7 @@ _gin_build_tuple(GinBuildState *state, } /* finally, copy the TIDs into the array */ - ptr = (char *) tuple + MAXALIGN(offsetof(GinTuple, data) + keylen); + ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen); /* copy in the compressed data, and free the segments */ dlist_foreach_modify(iter, &segments) @@ -2292,8 +2293,8 @@ _gin_parse_tuple_items(GinTuple *a) int ndecoded; ItemPointer items; - len = a->tuplen - MAXALIGN(offsetof(GinTuple, data) + a->keylen); - ptr = (char *) a + MAXALIGN(offsetof(GinTuple, data) + a->keylen); + len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen); + ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen); items = ginPostingListDecodeAllSegments((GinPostingList *) ptr, len, &ndecoded); @@ -2355,8 +2356,10 @@ _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup) &ssup[a->attrnum - 1]); /* if the key is the same, consider the first TID in the array */ - return (r != 0) ? r : ItemPointerCompare(&a->first, &b->first); + return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a), + GinTupleGetFirst(b)); } - return ItemPointerCompare(&a->first, &b->first); + return ItemPointerCompare(GinTupleGetFirst(a), + GinTupleGetFirst(b)); } diff --git a/src/include/access/gin_tuple.h b/src/include/access/gin_tuple.h index 4ac8cfcc2b..f4dbdfd3f7 100644 --- a/src/include/access/gin_tuple.h +++ b/src/include/access/gin_tuple.h @@ -10,10 +10,12 @@ #ifndef GIN_TUPLE_ #define GIN_TUPLE_ +#include "access/ginblock.h" #include "storage/itemptr.h" #include "utils/sortsupport.h" /* + * XXX: Update description with new architecture * Each worker sees tuples in CTID order, so if we track the first TID and * compare that when combining results in the worker, we would not need to * do an expensive sort in workers (the mergesort is already smart about @@ -24,17 +26,26 @@ */ typedef struct GinTuple { - Size tuplen; /* length of the whole tuple */ - Size keylen; /* bytes in data for key value */ + int tuplen; /* length of the whole tuple */ + OffsetNumber attrnum; /* attnum of index key */ + uint16 keylen; /* bytes in data for key value */ int16 typlen; /* typlen for key */ bool typbyval; /* typbyval for key */ - OffsetNumber attrnum; /* attnum of index key */ signed char category; /* category: normal or NULL? */ - ItemPointerData first; /* first TID in the array */ int nitems; /* number of TIDs in the data */ char data[FLEXIBLE_ARRAY_MEMBER]; } GinTuple; +static inline ItemPointer +GinTupleGetFirst(GinTuple *tup) +{ + GinPostingList *list; + + list = (GinPostingList *) SHORTALIGN(tup->data + tup->keylen); + + return &list->first; +} + typedef struct GinBuffer GinBuffer; extern int _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup); -- 2.39.2