From ea88f2340887d7ae984b0a96664f7667bc8db50a Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Fri, 13 Jan 2023 15:42:41 +0100 Subject: [PATCH v9 6/6] btree specialization for variable-length multi-attribute keys The default code path is relatively slow at O(n^2), so with multiple attributes we accept the increased startup cost in favour of lower costs for later attributes. Note that this will only be used for indexes that use at least one variable-length key attribute (except as last key attribute in specific cases). --- src/backend/access/nbtree/README | 10 +- src/backend/access/nbtree/nbtree_spec.c | 3 + src/include/access/itup_attiter.h | 199 ++++++++++++++++++++++++ src/include/access/nbtree.h | 11 +- src/include/access/nbtree_spec.h | 48 +++++- 5 files changed, 260 insertions(+), 11 deletions(-) create mode 100644 src/include/access/itup_attiter.h diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index 6864902637..2219c58242 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -1104,15 +1104,13 @@ in the index AM to call the specialized functions, increasing the performance of those hot paths. Optimized code paths exist for the following cases, in order of preference: - - indexes with only a single key attribute - - multi-column indexes that could benefit from the attcacheoff optimization + - indexes with only a single key attribute, + - multi-column indexes that cannot pre-calculate the offsets of all key + attributes in the tuple data section, + - multi-column indexes that do benefit from the attcacheoff optimization NB: This is also the default path, and is comparatively slow for uncachable attribute offsets. -Future work will optimize for multi-column indexes that don't benefit -from the attcacheoff optimization by improving on the O(n^2) nature of -index_getattr through storing attribute offsets. - Notes About Data Representation ------------------------------- diff --git a/src/backend/access/nbtree/nbtree_spec.c b/src/backend/access/nbtree/nbtree_spec.c index 21635397ed..699197dfa7 100644 --- a/src/backend/access/nbtree/nbtree_spec.c +++ b/src/backend/access/nbtree/nbtree_spec.c @@ -33,6 +33,9 @@ _bt_specialize(Relation rel) case NBTS_CTX_CACHED: _bt_specialize_cached(rel); break; + case NBTS_CTX_UNCACHED: + _bt_specialize_uncached(rel); + break; case NBTS_CTX_SINGLE_KEYATT: _bt_specialize_single_keyatt(rel); break; diff --git a/src/include/access/itup_attiter.h b/src/include/access/itup_attiter.h new file mode 100644 index 0000000000..c8fb6954bc --- /dev/null +++ b/src/include/access/itup_attiter.h @@ -0,0 +1,199 @@ +/*------------------------------------------------------------------------- + * + * itup_attiter.h + * POSTGRES index tuple attribute iterator definitions. + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/itup_attiter.h + * + *------------------------------------------------------------------------- + */ +#ifndef ITUP_ATTITER_H +#define ITUP_ATTITER_H + +#include "access/itup.h" +#include "varatt.h" + +typedef struct IAttrIterStateData +{ + int offset; + bool slow; + bool isNull; +} IAttrIterStateData; + +typedef IAttrIterStateData * IAttrIterState; + +/* ---------------- + * index_attiterinit + * + * This gets called many times, so we macro the cacheable and NULL + * lookups, and call nocache_index_attiterinit() for the rest. + * + * tup - the tuple being iterated on + * attnum - the attribute number that we start the iteration with + * in the first index_attiternext call + * tupdesc - the tuple description + * + * ---------------- + */ +#define index_attiterinit(tup, attnum, tupleDesc, iter) \ +do { \ + if ((attnum) == 1) \ + { \ + *(iter) = ((IAttrIterStateData) { \ + 0 /* Offset of attribute 1 is always 0 */, \ + false /* slow */, \ + false /* isNull */ \ + }); \ + } \ + else if (!IndexTupleHasNulls(tup) && \ + TupleDescAttr((tupleDesc), (attnum)-1)->attcacheoff >= 0) \ + { \ + *(iter) = ((IAttrIterStateData) { \ + TupleDescAttr((tupleDesc), (attnum)-1)->attcacheoff, /* offset */ \ + false, /* slow */ \ + false /* isNull */ \ + }); \ + } \ + else \ + nocache_index_attiterinit((tup), (attnum) - 1, (tupleDesc), (iter)); \ +} while (false); + +/* + * Initiate an index attribute iterator to attribute attnum, + * and return the corresponding datum. + * + * This is nearly the same as index_deform_tuple, except that this + * returns the internal state up to attnum, instead of populating the + * datum- and isnull-arrays + */ +static inline void +nocache_index_attiterinit(IndexTuple tup, AttrNumber attnum, TupleDesc tupleDesc, IAttrIterState iter) +{ + bool hasnulls = IndexTupleHasNulls(tup); + int curatt; + char *tp; /* ptr to tuple data */ + int off; /* offset in tuple data */ + bits8 *bp; /* ptr to null bitmap in tuple */ + bool slow = false; /* can we use/set attcacheoff? */ + bool null = false; + + /* Assert to protect callers */ + Assert(PointerIsValid(iter)); + Assert(tupleDesc->natts <= INDEX_MAX_KEYS); + Assert(attnum <= tupleDesc->natts); + Assert(attnum > 0); + + /* XXX "knows" t_bits are just after fixed tuple header! */ + bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData)); + + tp = (char *) tup + IndexInfoFindDataOffset(tup->t_info); + off = 0; + + for (curatt = 0; curatt < attnum; curatt++) + { + Form_pg_attribute thisatt = TupleDescAttr(tupleDesc, curatt); + + if (hasnulls && att_isnull(curatt, bp)) + { + null = true; + slow = true; /* can't use attcacheoff anymore */ + continue; + } + + null = false; + + if (!slow && thisatt->attcacheoff >= 0) + off = thisatt->attcacheoff; + else if (thisatt->attlen == -1) + { + off = att_align_pointer(off, thisatt->attalign, -1, + tp + off); + slow = true; + } + else + { + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, thisatt->attalign); + } + + off = att_addlength_pointer(off, thisatt->attlen, tp + off); + + if (thisatt->attlen <= 0) + slow = true; /* can't use attcacheoff anymore */ + } + + iter->isNull = null; + iter->offset = off; + iter->slow = slow; +} + +/* ---------------- + * index_attiternext() - get the next attribute of an index tuple + * + * This gets called many times, so we do the least amount of work + * possible. + * + * The code does not attempt to update attcacheoff; as it is unlikely + * to reach a situation where the cached offset matters a lot. + * If the cached offset do matter, the caller should make sure that + * PopulateTupleDescCacheOffsets() was called on the tuple descriptor + * to populate the attribute offset cache. + * + * ---------------- + */ +static inline Datum +index_attiternext(IndexTuple tup, AttrNumber attnum, TupleDesc tupleDesc, IAttrIterState iter) +{ + bool hasnulls = IndexTupleHasNulls(tup); + char *tp; /* ptr to tuple data */ + bits8 *bp; /* ptr to null bitmap in tuple */ + Datum datum; + Form_pg_attribute thisatt = TupleDescAttr(tupleDesc, attnum - 1); + + Assert(PointerIsValid(iter)); + Assert(tupleDesc->natts <= INDEX_MAX_KEYS); + Assert(attnum <= tupleDesc->natts); + Assert(attnum > 0); + + bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData)); + + tp = (char *) tup + IndexInfoFindDataOffset(tup->t_info); + + if (hasnulls && att_isnull(attnum - 1, bp)) + { + iter->isNull = true; + iter->slow = true; + return (Datum) 0; + } + + iter->isNull = false; + + if (!iter->slow && thisatt->attcacheoff >= 0) + iter->offset = thisatt->attcacheoff; + else if (thisatt->attlen == -1) + { + iter->offset = att_align_pointer(iter->offset, thisatt->attalign, -1, + tp + iter->offset); + iter->slow = true; + } + else + { + /* not varlena, so safe to use att_align_nominal */ + iter->offset = att_align_nominal(iter->offset, thisatt->attalign); + } + + datum = fetchatt(thisatt, tp + iter->offset); + + iter->offset = att_addlength_pointer(iter->offset, thisatt->attlen, tp + iter->offset); + + if (thisatt->attlen <= 0) + iter->slow = true; /* can't use attcacheoff anymore */ + + return datum; +} + +#endif /* ITUP_ATTITER_H */ diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 4628c41e9a..d5ed38bb71 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -16,6 +16,7 @@ #include "access/amapi.h" #include "access/itup.h" +#include "access/itup_attiter.h" #include "access/sdir.h" #include "access/tableam.h" #include "access/xlogreader.h" @@ -1124,18 +1125,26 @@ typedef struct BTOptions typedef enum NBTS_CTX { NBTS_CTX_SINGLE_KEYATT, + NBTS_CTX_UNCACHED, NBTS_CTX_CACHED, NBTS_CTX_DEFAULT, /* fallback */ } NBTS_CTX; static inline NBTS_CTX _nbt_spec_context(Relation irel) { + AttrNumber nKeyAtts; + if (!PointerIsValid(irel)) return NBTS_CTX_DEFAULT; - if (IndexRelationGetNumberOfKeyAttributes(irel) == 1) + nKeyAtts = IndexRelationGetNumberOfKeyAttributes(irel); + + if (nKeyAtts == 1) return NBTS_CTX_SINGLE_KEYATT; + if (TupleDescAttr(irel->rd_att, nKeyAtts - 1)->attcacheoff < -1) + return NBTS_CTX_UNCACHED; + return NBTS_CTX_CACHED; } diff --git a/src/include/access/nbtree_spec.h b/src/include/access/nbtree_spec.h index 3ad64aad39..a57d69f588 100644 --- a/src/include/access/nbtree_spec.h +++ b/src/include/access/nbtree_spec.h @@ -44,6 +44,7 @@ * Macros used in the nbtree specialization code. */ #define NBTS_TYPE_SINGLE_KEYATT single_keyatt +#define NBTS_TYPE_UNCACHED uncached #define NBTS_TYPE_CACHED cached #define NBTS_TYPE_DEFAULT default #define NBTS_CTX_NAME __nbts_ctx @@ -52,8 +53,10 @@ #define NBTS_MAKE_CTX(rel) const NBTS_CTX NBTS_CTX_NAME = _nbt_spec_context(rel) #define NBTS_SPECIALIZE_NAME(name) ( \ (NBTS_CTX_NAME) == NBTS_CTX_SINGLE_KEYATT ? (NBTS_MAKE_NAME(name, NBTS_TYPE_SINGLE_KEYATT)) : ( \ - (NBTS_CTX_NAME) == NBTS_CTX_CACHED ? (NBTS_MAKE_NAME(name, NBTS_TYPE_CACHED)) : ( \ - NBTS_MAKE_NAME(name, NBTS_TYPE_DEFAULT) \ + (NBTS_CTX_NAME) == NBTS_CTX_UNCACHED ? (NBTS_MAKE_NAME(name, NBTS_TYPE_UNCACHED)) : ( \ + (NBTS_CTX_NAME) == NBTS_CTX_CACHED ? (NBTS_MAKE_NAME(name, NBTS_TYPE_CACHED)) : ( \ + NBTS_MAKE_NAME(name, NBTS_TYPE_DEFAULT) \ + ) \ ) \ ) \ ) @@ -68,9 +71,12 @@ do { \ Assert(PointerIsValid(rel)); \ if (unlikely((rel)->rd_indam->aminsert == btinsert_default)) \ { \ - nbts_prep_ctx(rel); \ Assert(PointerIsValid(rel)); \ - _bt_specialize(rel); \ + PopulateTupleDescCacheOffsets(rel->rd_att); \ + { \ + nbts_prep_ctx(rel); \ + _bt_specialize(rel); \ + } \ } \ } while (false) @@ -216,6 +222,40 @@ do { \ #undef nbts_attiter_nextattdatum #undef nbts_attiter_curattisnull +/* + * Multiple key columns, but attcacheoff -optimization doesn't apply. + */ +#define NBTS_SPECIALIZING_UNCACHED +#define NBTS_TYPE NBTS_TYPE_UNCACHED + +#define nbts_attiterdeclare(itup) \ + IAttrIterStateData NBTS_MAKE_NAME(itup, iter) + +#define nbts_attiterinit(itup, initAttNum, tupDesc) \ + index_attiterinit((itup), (initAttNum), (tupDesc), &(NBTS_MAKE_NAME(itup, iter))) + +#define nbts_foreachattr(initAttNum, endAttNum) \ + for (int spec_i = (initAttNum); spec_i <= (endAttNum); spec_i++) + +#define nbts_attiter_attnum spec_i + +#define nbts_attiter_nextattdatum(itup, tupDesc) \ + index_attiternext((itup), spec_i, (tupDesc), &(NBTS_MAKE_NAME(itup, iter))) + +#define nbts_attiter_curattisnull(itup) \ + NBTS_MAKE_NAME(itup, iter).isNull + +#include NBT_SPECIALIZE_FILE + +#undef NBTS_TYPE +#undef NBTS_SPECIALIZING_UNCACHED +#undef nbts_attiterdeclare +#undef nbts_attiterinit +#undef nbts_foreachattr +#undef nbts_attiter_attnum +#undef nbts_attiter_nextattdatum +#undef nbts_attiter_curattisnull + /* * All subsequent contexts are from non-templated code, so * they need to actually include the context. -- 2.39.0