*** a/contrib/pageinspect/Makefile --- b/contrib/pageinspect/Makefile *************** *** 4,10 **** MODULE_big = pageinspect OBJS = rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o brinfuncs.o $(WIN32RES) EXTENSION = pageinspect ! DATA = pageinspect--1.2.sql pageinspect--1.0--1.1.sql \ pageinspect--1.1--1.2.sql pageinspect--unpackaged--1.0.sql PGFILEDESC = "pageinspect - functions to inspect contents of database pages" --- 4,11 ---- OBJS = rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o brinfuncs.o $(WIN32RES) EXTENSION = pageinspect ! DATA = pageinspect--1.3.sql pageinspect--1.0--1.1.sql \ ! pageinspect--1.2--1.3.sql \ pageinspect--1.1--1.2.sql pageinspect--unpackaged--1.0.sql PGFILEDESC = "pageinspect - functions to inspect contents of database pages" *** a/contrib/pageinspect/brinfuncs.c --- b/contrib/pageinspect/brinfuncs.c *************** *** 45,51 **** typedef struct brin_page_state bool unusedItem; bool done; AttrNumber attno; ! DeformedBrTuple *dtup; brin_column_state *columns[FLEXIBLE_ARRAY_MEMBER]; } brin_page_state; --- 45,51 ---- bool unusedItem; bool done; AttrNumber attno; ! BrinMemTuple *dtup; brin_column_state *columns[FLEXIBLE_ARRAY_MEMBER]; } brin_page_state; *************** *** 207,214 **** brin_page_items(PG_FUNCTION_ARGS) if (!state->done) { HeapTuple result; ! Datum values[5]; ! bool nulls[5]; /* * This loop is called once for every attribute of every tuple in the --- 207,214 ---- if (!state->done) { HeapTuple result; ! Datum values[6]; ! bool nulls[6]; /* * This loop is called once for every attribute of every tuple in the *************** *** 218,224 **** brin_page_items(PG_FUNCTION_ARGS) */ if (state->dtup == NULL) { ! BrTuple *tup; MemoryContext mctx; ItemId itemId; --- 218,224 ---- */ if (state->dtup == NULL) { ! BrinTuple *tup; MemoryContext mctx; ItemId itemId; *************** *** 229,235 **** brin_page_items(PG_FUNCTION_ARGS) itemId = PageGetItemId(state->page, state->offset); if (ItemIdIsUsed(itemId)) { ! tup = (BrTuple *) PageGetItem(state->page, PageGetItemId(state->page, state->offset)); state->dtup = brin_deform_tuple(state->bdesc, tup); --- 229,235 ---- itemId = PageGetItemId(state->page, state->offset); if (ItemIdIsUsed(itemId)) { ! tup = (BrinTuple *) PageGetItem(state->page, PageGetItemId(state->page, state->offset)); state->dtup = brin_deform_tuple(state->bdesc, tup); *************** *** 253,258 **** brin_page_items(PG_FUNCTION_ARGS) --- 253,259 ---- nulls[2] = true; nulls[3] = true; nulls[4] = true; + nulls[5] = true; } else { *************** *** 260,270 **** brin_page_items(PG_FUNCTION_ARGS) values[0] = UInt16GetDatum(state->offset); values[1] = UInt16GetDatum(state->attno); ! values[2] = BoolGetDatum(state->dtup->dt_columns[att].allnulls); ! values[3] = BoolGetDatum(state->dtup->dt_columns[att].hasnulls); ! if (!state->dtup->dt_columns[att].allnulls) { ! BrinValues *bvalues = &state->dtup->dt_columns[att]; StringInfoData s; bool first; int i; --- 261,272 ---- values[0] = UInt16GetDatum(state->offset); values[1] = UInt16GetDatum(state->attno); ! values[2] = BoolGetDatum(state->dtup->bt_columns[att].bv_allnulls); ! values[3] = BoolGetDatum(state->dtup->bt_columns[att].bv_hasnulls); ! values[4] = BoolGetDatum(state->dtup->bt_placeholder); ! if (!state->dtup->bt_columns[att].bv_allnulls) { ! BrinValues *bvalues = &state->dtup->bt_columns[att]; StringInfoData s; bool first; int i; *************** *** 281,298 **** brin_page_items(PG_FUNCTION_ARGS) appendStringInfoString(&s, " .. "); first = false; val = OutputFunctionCall(&state->columns[att]->outputFn[i], ! bvalues->values[i]); appendStringInfoString(&s, val); pfree(val); } appendStringInfoChar(&s, '}'); ! values[4] = CStringGetTextDatum(s.data); pfree(s.data); } else { ! nulls[4] = true; } } --- 283,300 ---- appendStringInfoString(&s, " .. "); first = false; val = OutputFunctionCall(&state->columns[att]->outputFn[i], ! bvalues->bv_values[i]); appendStringInfoString(&s, val); pfree(val); } appendStringInfoChar(&s, '}'); ! values[5] = CStringGetTextDatum(s.data); pfree(s.data); } else { ! nulls[5] = true; } } *** /dev/null --- b/contrib/pageinspect/pageinspect--1.2--1.3.sql *************** *** 0 **** --- 1,42 ---- + /* contrib/pageinspect/pageinspect--1.2--1.3.sql */ + + -- complain if script is sourced in psql, rather than via ALTER EXTENSION + \echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.3'" to load this file. \quit + + -- + -- brin_page_type() + -- + CREATE FUNCTION brin_page_type(IN page bytea) + RETURNS text + AS 'MODULE_PATHNAME', 'brin_page_type' + LANGUAGE C STRICT; + + -- + -- brin_metapage_info() + -- + CREATE FUNCTION brin_metapage_info(IN page bytea, OUT magic text, + OUT version integer, OUT pagesperrange integer, OUT lastrevmappage bigint) + AS 'MODULE_PATHNAME', 'brin_metapage_info' + LANGUAGE C STRICT; + + -- + -- brin_page_items() + -- + CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid oid, + OUT itemoffset int, + OUT attnum int, + OUT allnulls bool, + OUT hasnulls bool, + OUT placeholder bool, + OUT value text) + RETURNS SETOF record + AS 'MODULE_PATHNAME', 'brin_page_items' + LANGUAGE C STRICT; + + -- + -- brin_revmap_data() + CREATE FUNCTION brin_revmap_data(IN page bytea, + OUT pages tid) + RETURNS SETOF tid + AS 'MODULE_PATHNAME', 'brin_revmap_data' + LANGUAGE C STRICT; *** a/contrib/pageinspect/pageinspect--1.2.sql --- /dev/null *************** *** 1,144 **** - /* contrib/pageinspect/pageinspect--1.2.sql */ - - -- complain if script is sourced in psql, rather than via CREATE EXTENSION - \echo Use "CREATE EXTENSION pageinspect" to load this file. \quit - - -- - -- get_raw_page() - -- - CREATE FUNCTION get_raw_page(text, int4) - RETURNS bytea - AS 'MODULE_PATHNAME', 'get_raw_page' - LANGUAGE C STRICT; - - CREATE FUNCTION get_raw_page(text, text, int4) - RETURNS bytea - AS 'MODULE_PATHNAME', 'get_raw_page_fork' - LANGUAGE C STRICT; - - -- - -- page_header() - -- - CREATE FUNCTION page_header(IN page bytea, - OUT lsn pg_lsn, - OUT checksum smallint, - OUT flags smallint, - OUT lower smallint, - OUT upper smallint, - OUT special smallint, - OUT pagesize smallint, - OUT version smallint, - OUT prune_xid xid) - AS 'MODULE_PATHNAME', 'page_header' - LANGUAGE C STRICT; - - -- - -- heap_page_items() - -- - CREATE FUNCTION heap_page_items(IN page bytea, - OUT lp smallint, - OUT lp_off smallint, - OUT lp_flags smallint, - OUT lp_len smallint, - OUT t_xmin xid, - OUT t_xmax xid, - OUT t_field3 int4, - OUT t_ctid tid, - OUT t_infomask2 integer, - OUT t_infomask integer, - OUT t_hoff smallint, - OUT t_bits text, - OUT t_oid oid) - RETURNS SETOF record - AS 'MODULE_PATHNAME', 'heap_page_items' - LANGUAGE C STRICT; - - -- - -- bt_metap() - -- - CREATE FUNCTION bt_metap(IN relname text, - OUT magic int4, - OUT version int4, - OUT root int4, - OUT level int4, - OUT fastroot int4, - OUT fastlevel int4) - AS 'MODULE_PATHNAME', 'bt_metap' - LANGUAGE C STRICT; - - -- - -- bt_page_stats() - -- - CREATE FUNCTION bt_page_stats(IN relname text, IN blkno int4, - OUT blkno int4, - OUT type "char", - OUT live_items int4, - OUT dead_items int4, - OUT avg_item_size int4, - OUT page_size int4, - OUT free_size int4, - OUT btpo_prev int4, - OUT btpo_next int4, - OUT btpo int4, - OUT btpo_flags int4) - AS 'MODULE_PATHNAME', 'bt_page_stats' - LANGUAGE C STRICT; - - -- - -- bt_page_items() - -- - CREATE FUNCTION bt_page_items(IN relname text, IN blkno int4, - OUT itemoffset smallint, - OUT ctid tid, - OUT itemlen smallint, - OUT nulls bool, - OUT vars bool, - OUT data text) - RETURNS SETOF record - AS 'MODULE_PATHNAME', 'bt_page_items' - LANGUAGE C STRICT; - - -- - -- brin_page_type() - -- - CREATE FUNCTION brin_page_type(IN page bytea) - RETURNS text - AS 'MODULE_PATHNAME', 'brin_page_type' - LANGUAGE C STRICT; - - -- - -- brin_metapage_info() - -- - CREATE FUNCTION brin_metapage_info(IN page bytea, OUT magic text, - OUT version integer, OUT pagesperrange integer, OUT lastrevmappage bigint) - AS 'MODULE_PATHNAME', 'brin_metapage_info' - LANGUAGE C STRICT; - - -- - -- brin_page_items() - -- - CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid oid, - OUT itemoffset int, - OUT attnum int, - OUT allnulls bool, - OUT hasnulls bool, - OUT value text) - RETURNS SETOF record - AS 'MODULE_PATHNAME', 'brin_page_items' - LANGUAGE C STRICT; - - -- - -- brin_revmap_data() - CREATE FUNCTION brin_revmap_data(IN page bytea, - OUT pages tid) - RETURNS SETOF tid - AS 'MODULE_PATHNAME', 'brin_revmap_data' - LANGUAGE C STRICT; - - -- - -- fsm_page_contents() - -- - CREATE FUNCTION fsm_page_contents(IN page bytea) - RETURNS text - AS 'MODULE_PATHNAME', 'fsm_page_contents' - LANGUAGE C STRICT; --- 0 ---- *** /dev/null --- b/contrib/pageinspect/pageinspect--1.3.sql *************** *** 0 **** --- 1,145 ---- + /* contrib/pageinspect/pageinspect--1.3.sql */ + + -- complain if script is sourced in psql, rather than via CREATE EXTENSION + \echo Use "CREATE EXTENSION pageinspect" to load this file. \quit + + -- + -- get_raw_page() + -- + CREATE FUNCTION get_raw_page(text, int4) + RETURNS bytea + AS 'MODULE_PATHNAME', 'get_raw_page' + LANGUAGE C STRICT; + + CREATE FUNCTION get_raw_page(text, text, int4) + RETURNS bytea + AS 'MODULE_PATHNAME', 'get_raw_page_fork' + LANGUAGE C STRICT; + + -- + -- page_header() + -- + CREATE FUNCTION page_header(IN page bytea, + OUT lsn pg_lsn, + OUT checksum smallint, + OUT flags smallint, + OUT lower smallint, + OUT upper smallint, + OUT special smallint, + OUT pagesize smallint, + OUT version smallint, + OUT prune_xid xid) + AS 'MODULE_PATHNAME', 'page_header' + LANGUAGE C STRICT; + + -- + -- heap_page_items() + -- + CREATE FUNCTION heap_page_items(IN page bytea, + OUT lp smallint, + OUT lp_off smallint, + OUT lp_flags smallint, + OUT lp_len smallint, + OUT t_xmin xid, + OUT t_xmax xid, + OUT t_field3 int4, + OUT t_ctid tid, + OUT t_infomask2 integer, + OUT t_infomask integer, + OUT t_hoff smallint, + OUT t_bits text, + OUT t_oid oid) + RETURNS SETOF record + AS 'MODULE_PATHNAME', 'heap_page_items' + LANGUAGE C STRICT; + + -- + -- bt_metap() + -- + CREATE FUNCTION bt_metap(IN relname text, + OUT magic int4, + OUT version int4, + OUT root int4, + OUT level int4, + OUT fastroot int4, + OUT fastlevel int4) + AS 'MODULE_PATHNAME', 'bt_metap' + LANGUAGE C STRICT; + + -- + -- bt_page_stats() + -- + CREATE FUNCTION bt_page_stats(IN relname text, IN blkno int4, + OUT blkno int4, + OUT type "char", + OUT live_items int4, + OUT dead_items int4, + OUT avg_item_size int4, + OUT page_size int4, + OUT free_size int4, + OUT btpo_prev int4, + OUT btpo_next int4, + OUT btpo int4, + OUT btpo_flags int4) + AS 'MODULE_PATHNAME', 'bt_page_stats' + LANGUAGE C STRICT; + + -- + -- bt_page_items() + -- + CREATE FUNCTION bt_page_items(IN relname text, IN blkno int4, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT nulls bool, + OUT vars bool, + OUT data text) + RETURNS SETOF record + AS 'MODULE_PATHNAME', 'bt_page_items' + LANGUAGE C STRICT; + + -- + -- brin_page_type() + -- + CREATE FUNCTION brin_page_type(IN page bytea) + RETURNS text + AS 'MODULE_PATHNAME', 'brin_page_type' + LANGUAGE C STRICT; + + -- + -- brin_metapage_info() + -- + CREATE FUNCTION brin_metapage_info(IN page bytea, OUT magic text, + OUT version integer, OUT pagesperrange integer, OUT lastrevmappage bigint) + AS 'MODULE_PATHNAME', 'brin_metapage_info' + LANGUAGE C STRICT; + + -- + -- brin_page_items() + -- + CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid oid, + OUT itemoffset int, + OUT attnum int, + OUT allnulls bool, + OUT hasnulls bool, + OUT placeholder bool, + OUT value text) + RETURNS SETOF record + AS 'MODULE_PATHNAME', 'brin_page_items' + LANGUAGE C STRICT; + + -- + -- brin_revmap_data() + CREATE FUNCTION brin_revmap_data(IN page bytea, + OUT pages tid) + RETURNS SETOF tid + AS 'MODULE_PATHNAME', 'brin_revmap_data' + LANGUAGE C STRICT; + + -- + -- fsm_page_contents() + -- + CREATE FUNCTION fsm_page_contents(IN page bytea) + RETURNS text + AS 'MODULE_PATHNAME', 'fsm_page_contents' + LANGUAGE C STRICT; *** a/doc/src/sgml/brin.sgml --- b/doc/src/sgml/brin.sgml *************** *** 17,24 **** in which certain columns have some natural correlation with their physical location within the table. A block range is a group of pages that are physically ! adjacent in the table. ! For each block range, some summary info is stored by the index. For example, a table storing a store's sale orders might have a date column on which each order was placed, and most of the time the entries for earlier orders will appear earlier in the table as well; --- 17,24 ---- in which certain columns have some natural correlation with their physical location within the table. A block range is a group of pages that are physically ! adjacent in the table; for each block range, some summary info is stored ! by the index. For example, a table storing a store's sale orders might have a date column on which each order was placed, and most of the time the entries for earlier orders will appear earlier in the table as well; *************** *** 27,57 **** ! BRIN indexes can satisfy queries via the bitmap ! scanning facility, and will return all tuples in all pages within ! each range if the summary info stored by the index is ! consistent with the query conditions. ! The query executor ! is in charge of rechecking these tuples and discarding those that do not ! match the query conditions — in other words, these indexes are lossy. ! This enables them to work as very fast sequential scan helpers to avoid ! scanning blocks that are known not to contain matching tuples. The specific data that a BRIN index will store, as well as the specific queries that the index will be able to satisfy, ! depend on the operator class selected for the data type. ! Datatypes having a linear sort order can have operator classes that store the minimum and maximum value within each block range, for instance; geometrical types might store the bounding box for all the objects in the block range. ! The size of the block range is determined at index creation time with ! the pages_per_range storage parameter. ! The smaller the number, the larger the index becomes (because of the need to store more index entries), but at the same time the summary data stored can be more precise and more data blocks can be skipped during an index scan. --- 27,60 ---- ! BRIN indexes can satisfy queries via regular bitmap ! index scans, and will return all tuples in all pages within each range if ! the summary info stored by the index is consistent with the ! query conditions. ! The query executor is in charge of rechecking these tuples and discarding ! those that do not match the query conditions — in other words, these ! indexes are lossy. ! Because a BRIN index is very small, scanning the index ! adds little overhead compared to a sequential scan, but may avoid scanning ! large parts of the table that are known not to contain matching tuples. The specific data that a BRIN index will store, as well as the specific queries that the index will be able to satisfy, ! depend on the operator class selected for each column of the index. ! Data types having a linear sort order can have operator classes that store the minimum and maximum value within each block range, for instance; geometrical types might store the bounding box for all the objects in the block range. ! The size of the block range is determined at index creation time by ! the pages_per_range storage parameter. The number of index ! entries will be equal to the size of the relation in pages divided by ! the selected value for pages_per_range. Therefore, the smaller ! the number, the larger the index becomes (because of the need to store more index entries), but at the same time the summary data stored can be more precise and more data blocks can be skipped during an index scan. *************** *** 63,69 **** The core PostgreSQL distribution includes includes the BRIN operator classes shown in ! . --- 66,72 ---- The core PostgreSQL distribution includes includes the BRIN operator classes shown in ! . *************** *** 477,483 **** typedef struct BrinOpcInfo To implement these methods in a generic way, the operator class defines its own internal support functions. (For instance, min/max operator classes implements ! support functions for the four inequality operators for the datatype.) Additionally, the operator class must supply appropriate operator entries, to enable the optimizer to use the index when those operators are --- 480,486 ---- To implement these methods in a generic way, the operator class defines its own internal support functions. (For instance, min/max operator classes implements ! support functions for the four inequality operators for the data type.) Additionally, the operator class must supply appropriate operator entries, to enable the optimizer to use the index when those operators are *** a/src/backend/access/brin/README --- b/src/backend/access/brin/README *************** *** 6,168 **** BRIN indexes intend to enable very fast scanning of extremely large tables. The essential idea of a BRIN index is to keep track of summarizing values in consecutive groups of heap pages (page ranges); for example, the minimum and maximum values for datatypes with a btree opclass, or the bounding box for ! geometric types. These values can be used to avoid scanning such pages, ! depending on query quals. - The cost of this is having to update the stored summary values of each - page range as tuples are inserted into them. Access Method Design -------------------- Since item pointers are not stored inside indexes of this type, it is not possible to support the amgettuple interface. Instead, we only provide ! amgetbitmap support; scanning a relation using this index always requires a ! recheck node on top. The amgetbitmap routine returns a TIDBitmap comprising ! all pages in those page groups that match the query qualifications. The ! recheck node prunes tuples that are not visible according to the query ! qualifications. ! ! For each supported datatype, we need an operator class with the following ! catalog entries: ! ! - support procedures (pg_amproc): ! * "opcinfo" (procno 1) initializes a structure for index creation or scanning ! * "addValue" (procno 2) takes an index tuple and a heap item, and possibly ! changes the index tuple so that it includes the heap item values ! * "consistent" (procno 3) takes an index tuple and query quals, and returns ! whether the index tuple values match the query quals. ! * "union" (procno 4) takes two index tuples and modifies the first one so that ! it represents the union of the two. ! * For minmax, proc numbers 5-8 are used for the functions implementing ! inequality operators for the type, in this order: less than, less or equal, ! greater or equal, greater than. Opclasses using a different design will ! require different additional procedure numbers. ! - support operators (pg_amop): for minmax, the same operators as btree (<=, <, ! =, >=, >) so that the index is chosen by the optimizer on queries. ! ! In each index tuple (corresponding to one page range), we store: ! - for each indexed column of a datatype with a btree-opclass: * minimum value across all tuples in the range * maximum value across all tuples in the range - * are there nulls present in any tuple? - * are null all the values in all tuples in the range? - - Different datatypes store other values instead of min/max, for example - geometric types might store a bounding box. The NULL bits are always present. - - These null bits are stored in a single null bitmask of length 2x number of - columns. - - With the default INDEX_MAX_KEYS of 32, and considering columns of 8-byte length - types such as timestamptz or bigint, each tuple would be 522 bytes in length, - which seems reasonable. There are 6 extra bytes for padding between the null - bitmask and the first data item, assuming 64-bit alignment; so the total size - for such an index tuple would actually be 528 bytes. ! This maximum index tuple size is calculated as: mt_info (2 bytes) + null bitmap ! (8 bytes) + data value (8 bytes) * 32 * 2 - (Of course, larger columns are possible, such as varchar, but creating BRIN - indexes on such columns seems of little practical usefulness. Also, the - usefulness of an index containing so many columns is dubious.) ! There can be gaps where some pages have no covering index entry. ! ! The Range Reverse Map ! --------------------- To find the index tuple for a particular page range, we have an internal ! structure we call the range reverse map. This stores one TID per page range, ! which is the address of the index tuple summarizing that range. Since these ! map entries are fixed size, it is possible to compute the address of the range ! map entry for any given heap page by simple arithmetic. When a new heap tuple is inserted in a summarized page range, we compare the ! existing index tuple with the new heap tuple. If the heap tuple is outside the ! summarization data given by the index tuple for any indexed column (or if the ! new heap tuple contains null values but the index tuple indicates there are no ! nulls), it is necessary to create a new index tuple with the new values. To do ! this, a new index tuple is inserted, and the reverse range map is updated to ! point to it; the old index tuple is removed. ! ! If the reverse range map points to an invalid TID, the corresponding page range ! is considered to be not summarized. When tuples are added to unsummarized pages, nothing needs to happen. ! To scan a table following a BRIN index, we scan the reverse range map ! sequentially. This yields index tuples in ascending page range order. Query ! quals are matched to each index tuple; if they match, each page within the page ! range is returned as part of the output TID bitmap. If there's no match, they ! are skipped. Reverse range map entries returning invalid index TIDs, that is unsummarized page ranges, are also returned in the TID bitmap. ! The revmap is stored in the first few blocks of the index main fork, immediately ! following the metapage. Whenever the revmap needs to be extended by another ! page, existing tuples in that page are moved to some other page. Heap tuples can be removed from anywhere without restriction. It might be ! useful to mark the corresponding index tuple somehow, if the heap tuple is one ! of the constraining values of the summary data (i.e. either min or max in the ! case of a btree-opclass-bearing datatype), so that in the future we are aware ! of the need to re-execute summarization on that range, leading to a possible ! tightening of the summary values. Summarization ------------- At index creation time, the whole table is scanned; for each page range the summarizing values of each indexed column and nulls bitmap are collected and ! stored in the index. ! Once in a while, it is necessary to summarize a bunch of unsummarized pages ! (because the table has grown since the index was created), or re-summarize a ! range that has been marked invalid. This is simple: scan the page range ! calculating the summary values for each indexed column, then insert the new ! index entry at the end of the index. We do this during vacuum. Vacuuming --------- ! Vacuuming a table that has a BRIN index does not represent a significant ! challenge. Since no heap TIDs are stored, it's not necessary to scan the index ! when heap tuples are removed. It might be that some summary values can be ! tightened if heap tuples have been deleted; but this would represent an optimization opportunity only, not a correctness issue. It's simpler to ! represent this as the need to re-run summarization on the affected page ! range rather than "subtracting" values from the existing one. Note that if there are no indexes on the table other than the BRIN index, usage of maintenance_work_mem by vacuum can be decreased significantly, because no detailed index scan needs to take place (and thus it's not necessary for vacuum to save TIDs to remove). It's unlikely that BRIN would be the only ! indexes in a table, though, because primary keys can be btrees only. Optimizer --------- ! In order to make this all work, the only thing we need to do is ensure we have a ! good enough opclass and amcostestimate. With this, the optimizer is able to pick ! up the index on its own. ! Open questions ! -------------- ! * Same-size page ranges? ! Current related literature seems to consider that each "index entry" in a ! BRIN index must cover the same number of pages. There doesn't seem to be a ! hard reason for this to be so; it might make sense to allow the index to ! self-tune so that some index entries cover smaller page ranges, if this allows ! the summary values to be more compact. This would incur larger BRIN ! overhead for the index itself, but might allow better pruning of page ranges ! during scan. In the limit of one index tuple per page, the index itself would ! occupy too much space, even though we would be able to skip reading the most ! heap pages, because the summary values are tight; in the opposite limit of ! a single tuple that summarizes the whole table, we wouldn't be able to prune ! anything even though the index is very small. This can probably be made to work ! by using the reverse range map as an index in itself. * More compact representation for TIDBitmap? TIDBitmap is the structure used to represent bitmap scans. The --- 6,178 ---- The essential idea of a BRIN index is to keep track of summarizing values in consecutive groups of heap pages (page ranges); for example, the minimum and maximum values for datatypes with a btree opclass, or the bounding box for ! geometric types. These values can be used to avoid scanning such pages ! during a table scan, depending on query quals. ! ! The cost of this is having to update the stored summary values of each page ! range as tuples are inserted into them. Access Method Design -------------------- Since item pointers are not stored inside indexes of this type, it is not possible to support the amgettuple interface. Instead, we only provide ! amgetbitmap support. The amgetbitmap routine returns a lossy TIDBitmap ! comprising all pages in those page ranges that match the query ! qualifications. The recheck step in the BitmapHeapScan node prunes tuples ! that are not visible according to the query qualifications. ! ! An operator class must have the following entries: ! ! - generic support procedures (pg_amproc), identical to all opclasses: ! * "opcinfo" (BRIN_PROCNUM_OPCINFO) initializes a structure for index ! creation or scanning ! * "addValue" (BRIN_PROCNUM_ADDVALUE) takes an index tuple and a heap item, ! and possibly changes the index tuple so that it includes the heap item ! values ! * "consistent" (BRIN_PROCNUM_CONSISTENT) takes an index tuple and query ! quals, and returns whether the index tuple values match the query quals. ! * "union" (BRIN_PROCNUM_UNION) takes two index tuples and modifies the first ! one so that it represents the union of the two. ! Procedure numbers up to 10 are reserved for future expansion. ! ! Additionally, each opclass needs additional support functions: ! - Minmax-style operator classes: ! * Proc numbers 11-14 are used for the functions implementing inequality ! operators for the type, in this order: less than, less or equal, ! greater or equal, greater than. ! ! Opclasses using a different design will require different additional procedure ! numbers. ! ! Operator classes also need to have operator (pg_amop) entries so that the ! optimizer can choose the index to execute queries. ! - Minmax-style operator classes: ! * The same operators as btree (<=, <, =, >=, >) ! ! Each index tuple stores some NULL bits and some opclass-specified values, which ! are stored in a single null bitmask of length twice the number of columns. The ! generic NULL bits indicate, for each column: ! * bt_hasnulls: Whether there's any NULL value at all in the page range ! * bt_allnulls: Whether all values are NULLs in the page range ! ! The opclass-specified values are: ! - Minmax-style operator classes * minimum value across all tuples in the range * maximum value across all tuples in the range ! Note that the addValue and Union support procedures must be careful to ! datumCopy() the values they want to store in the in-memory BRIN tuple, and ! must pfree() the old copies when replacing older ones. Since some values ! referenced from the tuple persist and others go away, there is no ! well-defined lifetime for a memory context that would make this automatic. ! The Range Map ! ------------- To find the index tuple for a particular page range, we have an internal ! structure we call the range map, or "revmap" for short. This stores one TID ! per page range, which is the address of the index tuple summarizing that ! range. Since the map entries are fixed size, it is possible to compute the ! address of the range map entry for any given heap page by simple arithmetic. When a new heap tuple is inserted in a summarized page range, we compare the ! existing index tuple with the new heap tuple. If the heap tuple is outside ! the summarization data given by the index tuple for any indexed column (or ! if the new heap tuple contains null values but the index tuple indicates ! there are no nulls), the index is updated with the new values. In many ! cases it is possible to update the index tuple in-place, but if the new ! index tuple is larger than the old one and there's not enough space in the ! page, it is necessary to create a new index tuple with the new values. The ! range map can be updated quickly to point to it; the old index tuple is ! removed. ! ! If the range map points to an invalid TID, the corresponding page range is ! considered to be not summarized. When tuples are added to unsummarized pages, nothing needs to happen. ! To scan a table following a BRIN index, we scan the range map sequentially. ! This yields index tuples in ascending page range order. Query quals are ! matched to each index tuple; if they match, each page within the page range ! is returned as part of the output TID bitmap. If there's no match, they are ! skipped. Range map entries returning invalid index TIDs, that is unsummarized page ranges, are also returned in the TID bitmap. ! The revmap is stored in the first few blocks of the index main fork, ! immediately following the metapage. Whenever the revmap needs to be ! extended by another page, existing tuples in that page are moved to some ! other page. Heap tuples can be removed from anywhere without restriction. It might be ! useful to mark the corresponding index tuple somehow, if the heap tuple is ! one of the constraining values of the summary data (i.e. either min or max ! in the case of a btree-opclass-bearing datatype), so that in the future we ! are aware of the need to re-execute summarization on that range, leading to ! a possible tightening of the summary values. Summarization ------------- At index creation time, the whole table is scanned; for each page range the summarizing values of each indexed column and nulls bitmap are collected and ! stored in the index. The partially-filled page range at the end of the ! table is also summarized. ! ! As new tuples get inserted at the end of the table, they may update the ! index tuple that summarizes the partial page range at the end. Eventually ! that page range is complete and new tuples belong in a new page range that ! hasn't yet been summarized. Those insertions do not create a new index ! entry; instead, the page range remains unsummarized until later. ! Wehn VACUUM is run on the table, all unsummarized page ranges are ! summarized. This action can also be invoked by the user via ! brin_summarize_new_values(). Both these procedures scan all the ! unsummarized ranges, and create a summary tuple. Again, this includes the ! partially-filled page range at the end of the table. Vacuuming --------- ! Since no heap TIDs are stored in a BRIN index, it's not necessary to scan the ! index when heap tuples are removed. It might be that some summary values can ! be tightened if heap tuples have been deleted; but this would represent an optimization opportunity only, not a correctness issue. It's simpler to ! represent this as the need to re-run summarization on the affected page range ! rather than "subtracting" values from the existing one. This is not ! currently implemented. Note that if there are no indexes on the table other than the BRIN index, usage of maintenance_work_mem by vacuum can be decreased significantly, because no detailed index scan needs to take place (and thus it's not necessary for vacuum to save TIDs to remove). It's unlikely that BRIN would be the only ! indexes in a table, though, because primary keys can be btrees only, and so ! we don't implement this optimization. Optimizer --------- ! The optimizer selects the index based on the operator class' pg_amop ! entries for the column. ! Future improvements ! ------------------- ! * Different-size page ranges? ! In the current design, each "index entry" in a BRIN index covers the same ! number of pages. There's no hard reason for this; it might make sense to ! allow the index to self-tune so that some index entries cover smaller page ! ranges, if this allows the summary values to be more compact. This would incur ! larger BRIN overhead for the index itself, but might allow better pruning of ! page ranges during scan. In the limit of one index tuple per page, the index ! itself would occupy too much space, even though we would be able to skip ! reading the most heap pages, because the summary values are tight; in the ! opposite limit of a single tuple that summarizes the whole table, we wouldn't ! be able to prune anything even though the index is very small. This can ! probably be made to work by using the range map as an index in itself. * More compact representation for TIDBitmap? TIDBitmap is the structure used to represent bitmap scans. The *************** *** 175,179 **** Open questions It might be useful to enable passing more useful info to BRIN indexes during vacuuming about tuples that are deleted, i.e. do not require the callback to pass each tuple's TID. For instance we might need a callback that passes a ! block number instead. That would help determine when to re-run summarization ! on blocks that have seen lots of tuple deletions. --- 185,189 ---- It might be useful to enable passing more useful info to BRIN indexes during vacuuming about tuples that are deleted, i.e. do not require the callback to pass each tuple's TID. For instance we might need a callback that passes a ! block number instead of a TID. That would help determine when to re-run ! summarization on blocks that have seen lots of tuple deletions. *** a/src/backend/access/brin/brin.c --- b/src/backend/access/brin/brin.c *************** *** 2,7 **** --- 2,9 ---- * brin.c * Implementation of BRIN indexes for Postgres * + * See src/backend/access/brin/README for details. + * * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * *************** *** 33,39 **** /* * We use a BrinBuildState during initial construction of a BRIN index. ! * The running state is kept in a DeformedBrTuple. */ typedef struct BrinBuildState { --- 35,41 ---- /* * We use a BrinBuildState during initial construction of a BRIN index. ! * The running state is kept in a BrinMemTuple. */ typedef struct BrinBuildState { *************** *** 45,51 **** typedef struct BrinBuildState brinRmAccess *bs_rmAccess; BrinDesc *bs_bdesc; bool seentup; ! DeformedBrTuple *dtuple; } BrinBuildState; /* --- 47,53 ---- brinRmAccess *bs_rmAccess; BrinDesc *bs_bdesc; bool seentup; ! BrinMemTuple *dtuple; } BrinBuildState; /* *************** *** 58,71 **** typedef struct BrinOpaque BrinDesc *bo_bdesc; } BrinOpaque; static BrinBuildState *initialize_brin_buildstate(Relation idxRel, brinRmAccess *rmAccess, BlockNumber pagesPerRange); static void terminate_brin_buildstate(BrinBuildState *state); ! static void summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, ! BlockNumber heapBlk); static void form_and_insert_tuple(BrinBuildState *state); ! static void union_tuples(BrinDesc *bdesc, DeformedBrTuple *a, ! BrTuple *b); /* --- 60,75 ---- BrinDesc *bo_bdesc; } BrinOpaque; + PG_FUNCTION_INFO_V1(brin_summarize_new_values); + static BrinBuildState *initialize_brin_buildstate(Relation idxRel, brinRmAccess *rmAccess, BlockNumber pagesPerRange); static void terminate_brin_buildstate(BrinBuildState *state); ! static void brinsummarize(Relation index, Relation heapRel, double *numSummarized, ! double *numExisting); static void form_and_insert_tuple(BrinBuildState *state); ! static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a, ! BrinTuple *b); /* *************** *** 90,95 **** brininsert(PG_FUNCTION_ARGS) --- 94,101 ---- BrinDesc *bdesc = NULL; brinRmAccess *rmAccess; Buffer buf = InvalidBuffer; + MemoryContext tupcxt; + MemoryContext oldcxt; rmAccess = brinRevmapAccessInit(idxRel, &pagesPerRange); *************** *** 97,108 **** brininsert(PG_FUNCTION_ARGS) { bool need_insert = false; OffsetNumber off; ! BrTuple *brtup; ! DeformedBrTuple *dtup; BlockNumber heapBlk; int keyno; ! BrTuple *tmptup PG_USED_FOR_ASSERTS_ONLY; ! Size tmpsiz PG_USED_FOR_ASSERTS_ONLY; CHECK_FOR_INTERRUPTS(); --- 103,115 ---- { bool need_insert = false; OffsetNumber off; ! BrinTuple *brtup; ! BrinMemTuple *dtup; BlockNumber heapBlk; int keyno; ! BrinTuple *tmptup PG_USED_FOR_ASSERTS_ONLY; ! BrinMemTuple *tmpdtup PG_USED_FOR_ASSERTS_ONLY; ! Size tmpsiz PG_USED_FOR_ASSERTS_ONLY; CHECK_FOR_INTERRUPTS(); *************** *** 116,137 **** brininsert(PG_FUNCTION_ARGS) if (!brtup) break; if (bdesc == NULL) bdesc = brin_build_desc(idxRel); dtup = brin_deform_tuple(bdesc, brtup); #ifdef USE_ASSERT_CHECKING { - DeformedBrTuple *tmpdtup; - /* * When assertions are enabled, we use this as an opportunity to * test the "union" method, which would otherwise be used very ! * rarely: first create a placeholder tuple, then addValue the * value we just got into it. Then union the existing index tuple * with the updated placeholder tuple. The tuple resulting from * that union should be identical to the one resulting from the ! * regular operation below. * * Here we create the tuple to compare with; the actual comparison * is below. --- 123,152 ---- if (!brtup) break; + /* First time through? */ if (bdesc == NULL) + { bdesc = brin_build_desc(idxRel); + tupcxt = AllocSetContextCreate(CurrentMemoryContext, + "brininsert cxt", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldcxt = MemoryContextSwitchTo(tupcxt); + } + dtup = brin_deform_tuple(bdesc, brtup); #ifdef USE_ASSERT_CHECKING { /* * When assertions are enabled, we use this as an opportunity to * test the "union" method, which would otherwise be used very ! * rarely: first create a placeholder tuple, and addValue the * value we just got into it. Then union the existing index tuple * with the updated placeholder tuple. The tuple resulting from * that union should be identical to the one resulting from the ! * regular operation (straight addValue) below. * * Here we create the tuple to compare with; the actual comparison * is below. *************** *** 143,149 **** brininsert(PG_FUNCTION_ARGS) BrinValues *bval; FmgrInfo *addValue; ! bval = &tmpdtup->dt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); FunctionCall4Coll(addValue, --- 158,164 ---- BrinValues *bval; FmgrInfo *addValue; ! bval = &tmpdtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); FunctionCall4Coll(addValue, *************** *** 153,160 **** brininsert(PG_FUNCTION_ARGS) values[keyno], nulls[keyno]); } union_tuples(bdesc, tmpdtup, brtup); ! tmpdtup->dt_placeholder = dtup->dt_placeholder; tmptup = brin_form_tuple(bdesc, heapBlk, tmpdtup, &tmpsiz); } #endif --- 168,177 ---- values[keyno], nulls[keyno]); } + union_tuples(bdesc, tmpdtup, brtup); ! ! tmpdtup->bt_placeholder = dtup->bt_placeholder; tmptup = brin_form_tuple(bdesc, heapBlk, tmpdtup, &tmpsiz); } #endif *************** *** 172,178 **** brininsert(PG_FUNCTION_ARGS) BrinValues *bval; FmgrInfo *addValue; ! bval = &dtup->dt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, --- 189,195 ---- BrinValues *bval; FmgrInfo *addValue; ! bval = &dtup->bt_columns[keyno]; addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, *************** *** 188,198 **** brininsert(PG_FUNCTION_ARGS) #ifdef USE_ASSERT_CHECKING { /* ! * Now we can compare the tuple produced by the union function with ! * the one from plain addValue. */ ! BrTuple *cmptup; ! Size cmpsz; cmptup = brin_form_tuple(bdesc, heapBlk, dtup, &cmpsz); Assert(brin_tuples_equal(tmptup, tmpsiz, cmptup, cmpsz)); --- 205,215 ---- #ifdef USE_ASSERT_CHECKING { /* ! * Now we can compare the tuple produced by the union function ! * with the one from plain addValue. */ ! BrinTuple *cmptup; ! Size cmpsz; cmptup = brin_form_tuple(bdesc, heapBlk, dtup, &cmpsz); Assert(brin_tuples_equal(tmptup, tmpsiz, cmptup, cmpsz)); *************** *** 206,221 **** brininsert(PG_FUNCTION_ARGS) * to do. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); - brin_free_dtuple(bdesc, dtup); } else { Page page = BufferGetPage(buf); ItemId lp = PageGetItemId(page, off); Size origsz; ! BrTuple *origtup; Size newsz; ! BrTuple *newtup; bool samepage; /* --- 223,237 ---- * to do. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); } else { Page page = BufferGetPage(buf); ItemId lp = PageGetItemId(page, off); Size origsz; ! BrinTuple *origtup; Size newsz; ! BrinTuple *newtup; bool samepage; /* *************** *** 236,242 **** brininsert(PG_FUNCTION_ARGS) LockBuffer(buf, BUFFER_LOCK_UNLOCK); newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz); - brin_free_dtuple(bdesc, dtup); /* * Try to update the tuple. If this doesn't work for whatever --- 252,257 ---- *** a/src/backend/access/brin/brin_minmax.c --- b/src/backend/access/brin/brin_minmax.c *************** *** 25,40 **** * brin_internal.h. Note we only need inequality functions. */ #define MINMAX_NUM_PROCNUMS 4 /* # support procs we need */ ! #define PROCNUM_LESS 5 ! #define PROCNUM_LESSEQUAL 6 ! #define PROCNUM_GREATEREQUAL 7 ! #define PROCNUM_GREATER 8 /* * Subtract this from procnum to obtain index in MinmaxOpaque arrays * (Must be equal to minimum of private procnums) */ ! #define PROCNUM_BASE 5 static FmgrInfo *minmax_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum); --- 25,40 ---- * brin_internal.h. Note we only need inequality functions. */ #define MINMAX_NUM_PROCNUMS 4 /* # support procs we need */ ! #define PROCNUM_LESS 11 ! #define PROCNUM_LESSEQUAL 12 ! #define PROCNUM_GREATEREQUAL 13 ! #define PROCNUM_GREATER 14 /* * Subtract this from procnum to obtain index in MinmaxOpaque arrays * (Must be equal to minimum of private procnums) */ ! #define PROCNUM_BASE 11 static FmgrInfo *minmax_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum); *************** *** 54,65 **** typedef struct MinmaxOpaque Datum minmaxOpcInfo(PG_FUNCTION_ARGS) { ! Oid typoid = PG_GETARG_OID(0); BrinOpcInfo *result; /* ! * opaque->operators is initialized lazily, as indicated by 'inited' ! * which is initialized to all false by palloc0. */ result = palloc0(MAXALIGN(SizeofBrinOpcInfo(2)) + --- 54,65 ---- Datum minmaxOpcInfo(PG_FUNCTION_ARGS) { ! Oid typoid = PG_GETARG_OID(0); BrinOpcInfo *result; /* ! * opaque->operators is initialized lazily, as indicated by 'inited' which ! * is initialized to all false by palloc0. */ result = palloc0(MAXALIGN(SizeofBrinOpcInfo(2)) + *************** *** 100,124 **** minmaxAddValue(PG_FUNCTION_ARGS) */ if (isnull) { ! if (column->hasnulls) PG_RETURN_BOOL(false); ! column->hasnulls = true; PG_RETURN_BOOL(true); } ! attno = column->attno; attr = bdesc->bd_tupdesc->attrs[attno - 1]; /* * If the recorded value is null, store the new value (which we know to be * not null) as both minimum and maximum, and we're done. */ ! if (column->allnulls) { ! column->values[0] = datumCopy(newval, attr->attbyval, attr->attlen); ! column->values[1] = datumCopy(newval, attr->attbyval, attr->attlen); ! column->allnulls = false; PG_RETURN_BOOL(true); } --- 100,124 ---- */ if (isnull) { ! if (column->bv_hasnulls) PG_RETURN_BOOL(false); ! column->bv_hasnulls = true; PG_RETURN_BOOL(true); } ! attno = column->bv_attno; attr = bdesc->bd_tupdesc->attrs[attno - 1]; /* * If the recorded value is null, store the new value (which we know to be * not null) as both minimum and maximum, and we're done. */ ! if (column->bv_allnulls) { ! column->bv_values[0] = datumCopy(newval, attr->attbyval, attr->attlen); ! column->bv_values[1] = datumCopy(newval, attr->attbyval, attr->attlen); ! column->bv_allnulls = false; PG_RETURN_BOOL(true); } *************** *** 128,139 **** minmaxAddValue(PG_FUNCTION_ARGS) * existing minimum. */ cmpFn = minmax_get_procinfo(bdesc, attno, PROCNUM_LESS); ! compar = FunctionCall2Coll(cmpFn, colloid, newval, column->values[0]); if (DatumGetBool(compar)) { if (!attr->attbyval) ! pfree(DatumGetPointer(column->values[0])); ! column->values[0] = datumCopy(newval, attr->attbyval, attr->attlen); updated = true; } --- 128,139 ---- * existing minimum. */ cmpFn = minmax_get_procinfo(bdesc, attno, PROCNUM_LESS); ! compar = FunctionCall2Coll(cmpFn, colloid, newval, column->bv_values[0]); if (DatumGetBool(compar)) { if (!attr->attbyval) ! pfree(DatumGetPointer(column->bv_values[0])); ! column->bv_values[0] = datumCopy(newval, attr->attbyval, attr->attlen); updated = true; } *************** *** 141,152 **** minmaxAddValue(PG_FUNCTION_ARGS) * And now compare it to the existing maximum. */ cmpFn = minmax_get_procinfo(bdesc, attno, PROCNUM_GREATER); ! compar = FunctionCall2Coll(cmpFn, colloid, newval, column->values[1]); if (DatumGetBool(compar)) { if (!attr->attbyval) ! pfree(DatumGetPointer(column->values[1])); ! column->values[1] = datumCopy(newval, attr->attbyval, attr->attlen); updated = true; } --- 141,152 ---- * And now compare it to the existing maximum. */ cmpFn = minmax_get_procinfo(bdesc, attno, PROCNUM_GREATER); ! compar = FunctionCall2Coll(cmpFn, colloid, newval, column->bv_values[1]); if (DatumGetBool(compar)) { if (!attr->attbyval) ! pfree(DatumGetPointer(column->bv_values[1])); ! column->bv_values[1] = datumCopy(newval, attr->attbyval, attr->attlen); updated = true; } *************** *** 169,182 **** minmaxConsistent(PG_FUNCTION_ARGS) Datum value; Datum matches; ! Assert(key->sk_attno == column->attno); /* handle IS NULL/IS NOT NULL tests */ if (key->sk_flags & SK_ISNULL) { if (key->sk_flags & SK_SEARCHNULL) { ! if (column->allnulls || column->hasnulls) PG_RETURN_BOOL(true); PG_RETURN_BOOL(false); } --- 169,182 ---- Datum value; Datum matches; ! Assert(key->sk_attno == column->bv_attno); /* handle IS NULL/IS NOT NULL tests */ if (key->sk_flags & SK_ISNULL) { if (key->sk_flags & SK_SEARCHNULL) { ! if (column->bv_allnulls || column->bv_hasnulls) PG_RETURN_BOOL(true); PG_RETURN_BOOL(false); } *************** *** 186,192 **** minmaxConsistent(PG_FUNCTION_ARGS) * only nulls. */ Assert(key->sk_flags & SK_SEARCHNOTNULL); ! PG_RETURN_BOOL(!column->allnulls); } attno = key->sk_attno; --- 186,192 ---- * only nulls. */ Assert(key->sk_flags & SK_SEARCHNOTNULL); ! PG_RETURN_BOOL(!column->bv_allnulls); } attno = key->sk_attno; *************** *** 196,207 **** minmaxConsistent(PG_FUNCTION_ARGS) case BTLessStrategyNumber: matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_LESS), ! colloid, column->values[0], value); break; case BTLessEqualStrategyNumber: matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_LESSEQUAL), ! colloid, column->values[0], value); break; case BTEqualStrategyNumber: --- 196,207 ---- case BTLessStrategyNumber: matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_LESS), ! colloid, column->bv_values[0], value); break; case BTLessEqualStrategyNumber: matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_LESSEQUAL), ! colloid, column->bv_values[0], value); break; case BTEqualStrategyNumber: *************** *** 212,234 **** minmaxConsistent(PG_FUNCTION_ARGS) */ matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_LESSEQUAL), ! colloid, column->values[0], value); if (!DatumGetBool(matches)) break; /* max() >= scankey */ matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_GREATEREQUAL), ! colloid, column->values[1], value); break; case BTGreaterEqualStrategyNumber: matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_GREATEREQUAL), ! colloid, column->values[1], value); break; case BTGreaterStrategyNumber: matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_GREATER), ! colloid, column->values[1], value); break; default: /* shouldn't happen */ --- 212,234 ---- */ matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_LESSEQUAL), ! colloid, column->bv_values[0], value); if (!DatumGetBool(matches)) break; /* max() >= scankey */ matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_GREATEREQUAL), ! colloid, column->bv_values[1], value); break; case BTGreaterEqualStrategyNumber: matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_GREATEREQUAL), ! colloid, column->bv_values[1], value); break; case BTGreaterStrategyNumber: matches = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_GREATER), ! colloid, column->bv_values[1], value); break; default: /* shouldn't happen */ *************** *** 255,293 **** minmaxUnion(PG_FUNCTION_ARGS) Form_pg_attribute attr; bool needsadj; ! Assert(col_a->attno == col_b->attno); ! attno = col_a->attno; attr = bdesc->bd_tupdesc->attrs[attno - 1]; ! /* Adjust null flags */ ! if (!col_b->allnulls && col_a->allnulls) ! col_a->allnulls = false; ! if (col_b->hasnulls && !col_a->hasnulls) ! col_a->hasnulls = true; ! /* Adjust minimum, if b's min is less than a's min */ needsadj = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_LESS), ! colloid, col_b->values[0], col_a->values[0]); if (needsadj) { if (!attr->attbyval) ! pfree(DatumGetPointer(col_a->values[0])); ! col_a->values[0] = datumCopy(col_b->values[0], ! attr->attbyval, attr->attlen); } ! /* Adjust maximum, if b's max is greater than a's max */ needsadj = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_GREATER), ! colloid, col_b->values[1], col_a->values[1]); if (needsadj) { if (!attr->attbyval) ! pfree(DatumGetPointer(col_a->values[1])); ! col_a->values[1] = datumCopy(col_b->values[1], ! attr->attbyval, attr->attlen); } PG_RETURN_VOID(); --- 255,310 ---- Form_pg_attribute attr; bool needsadj; ! Assert(col_a->bv_attno == col_b->bv_attno); ! /* If there are no values in B, there's nothing to do */ ! if (col_b->bv_allnulls) ! PG_RETURN_VOID(); ! ! attno = col_a->bv_attno; attr = bdesc->bd_tupdesc->attrs[attno - 1]; ! /* Adjust "hasnulls" */ ! if (col_b->bv_hasnulls && !col_a->bv_hasnulls) ! col_a->bv_hasnulls = true; ! ! /* ! * Adjust "allnulls". If B has values but A doesn't, just copy the values ! * from B into A, and we're done. (We cannot run the operators in this ! * case, because values in A might contain garbage.) ! */ ! if (!col_b->bv_allnulls && col_a->bv_allnulls) ! { ! col_a->bv_allnulls = false; ! col_a->bv_values[0] = datumCopy(col_b->bv_values[0], ! attr->attbyval, attr->attlen); ! col_a->bv_values[1] = datumCopy(col_b->bv_values[1], ! attr->attbyval, attr->attlen); ! PG_RETURN_VOID(); ! } ! /* Adjust minimum, if B's min is less than A's min */ needsadj = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_LESS), ! colloid, col_b->bv_values[0], col_a->bv_values[0]); if (needsadj) { if (!attr->attbyval) ! pfree(DatumGetPointer(col_a->bv_values[0])); ! col_a->bv_values[0] = datumCopy(col_b->bv_values[0], ! attr->attbyval, attr->attlen); } ! /* Adjust maximum, if B's max is greater than A's max */ needsadj = FunctionCall2Coll(minmax_get_procinfo(bdesc, attno, PROCNUM_GREATER), ! colloid, col_b->bv_values[1], col_a->bv_values[1]); if (needsadj) { if (!attr->attbyval) ! pfree(DatumGetPointer(col_a->bv_values[1])); ! col_a->bv_values[1] = datumCopy(col_b->bv_values[1], ! attr->attbyval, attr->attlen); } PG_RETURN_VOID(); *** a/src/backend/access/brin/brin_pageops.c --- b/src/backend/access/brin/brin_pageops.c *************** *** 43,55 **** bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, brinRmAccess *rmAccess, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, ! const BrTuple *origtup, Size origsz, ! const BrTuple *newtup, Size newsz, bool samepage) { Page oldpage; ItemId oldlp; ! BrTuple *oldtup; Size oldsz; Buffer newbuf; BrinSpecialSpace *special; --- 43,55 ---- brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, brinRmAccess *rmAccess, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, ! const BrinTuple *origtup, Size origsz, ! const BrinTuple *newtup, Size newsz, bool samepage) { Page oldpage; ItemId oldlp; ! BrinTuple *oldtup; Size oldsz; Buffer newbuf; BrinSpecialSpace *special; *************** *** 98,104 **** brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, } oldsz = ItemIdGetLength(oldlp); ! oldtup = (BrTuple *) PageGetItem(oldpage, oldlp); /* * ... or it might have been updated in place to different contents. --- 98,104 ---- } oldsz = ItemIdGetLength(oldlp); ! oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp); /* * ... or it might have been updated in place to different contents. *************** *** 282,288 **** brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz) OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, brinRmAccess *rmAccess, Buffer *buffer, BlockNumber heapBlk, ! BrTuple *tup, Size itemsz) { Page page; BlockNumber blk; --- 282,288 ---- OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, brinRmAccess *rmAccess, Buffer *buffer, BlockNumber heapBlk, ! BrinTuple *tup, Size itemsz) { Page page; BlockNumber blk; *************** *** 482,499 **** brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, { OffsetNumber off; OffsetNumber maxoff; - BrinSpecialSpace *special; Page page; page = BufferGetPage(buf); - special = (BrinSpecialSpace *) PageGetSpecialPointer(page); ! Assert(special->flags & BRIN_EVACUATE_PAGE); maxoff = PageGetMaxOffsetNumber(page); for (off = FirstOffsetNumber; off <= maxoff; off++) { ! BrTuple *tup; Size sz; ItemId lp; --- 482,498 ---- { OffsetNumber off; OffsetNumber maxoff; Page page; page = BufferGetPage(buf); ! Assert(((BrinSpecialSpace *) ! PageGetSpecialPointer(page))->flags & BRIN_EVACUATE_PAGE); maxoff = PageGetMaxOffsetNumber(page); for (off = FirstOffsetNumber; off <= maxoff; off++) { ! BrinTuple *tup; Size sz; ItemId lp; *************** *** 503,509 **** brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, if (ItemIdIsUsed(lp)) { sz = ItemIdGetLength(lp); ! tup = (BrTuple *) PageGetItem(page, lp); tup = brin_copy_tuple(tup, sz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); --- 502,508 ---- if (ItemIdIsUsed(lp)) { sz = ItemIdGetLength(lp); ! tup = (BrinTuple *) PageGetItem(page, lp); tup = brin_copy_tuple(tup, sz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); *************** *** 601,608 **** brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, /* * We lock the old buffer first, if it's earlier than the new one; but * before we do, we need to check that it hasn't been turned into a ! * revmap page concurrently; if we detect that it happened, give up and ! * tell caller to start over. */ if (BufferIsValid(oldbuf) && oldblk < newblk) { --- 600,607 ---- /* * We lock the old buffer first, if it's earlier than the new one; but * before we do, we need to check that it hasn't been turned into a ! * revmap page concurrently; if we detect that it happened, give up ! * and tell caller to start over. */ if (BufferIsValid(oldbuf) && oldblk < newblk) { *** a/src/backend/access/brin/brin_revmap.c --- b/src/backend/access/brin/brin_revmap.c *************** *** 1,12 **** /* * brin_revmap.c ! * Reverse range map for BRIN indexes * ! * The reverse range map (revmap) is a translation structure for BRIN indexes: ! * for each page range there is one summary tuple, and its location is tracked ! * by the revmap. Whenever a new tuple is inserted into a table that violates ! * the previously recorded summary values, a new tuple is inserted into the ! * index and the revmap is updated to point to it. * * The revmap is stored in the first pages of the index, immediately following * the metapage. When the revmap needs to be expanded, all tuples on the --- 1,12 ---- /* * brin_revmap.c ! * Range map for BRIN indexes * ! * The range map (revmap) is a translation structure for BRIN indexes: for each ! * page range there is one summary tuple, and its location is tracked by the ! * revmap. Whenever a new tuple is inserted into a table that violates the ! * previously recorded summary values, a new tuple is inserted into the index ! * and the revmap is updated to point to it. * * The revmap is stored in the first pages of the index, immediately following * the metapage. When the revmap needs to be expanded, all tuples on the *************** *** 61,69 **** static BlockNumber rm_get_phys_blkno(brinRmAccess *rmAccess, static void revmap_physical_extend(brinRmAccess *rmAccess); /* ! * Initialize an access object for a reverse range map, which can be used to ! * read stuff from it. This must be freed by brinRevmapAccessTerminate when caller ! * is done with it. */ brinRmAccess * brinRevmapAccessInit(Relation idxrel, BlockNumber *pagesPerRange) --- 61,69 ---- static void revmap_physical_extend(brinRmAccess *rmAccess); /* ! * Initialize an access object for a range map, which can be used to read stuff ! * from it. This must be freed by brinRevmapAccessTerminate when caller is ! * done with it. */ brinRmAccess * brinRevmapAccessInit(Relation idxrel, BlockNumber *pagesPerRange) *************** *** 191,197 **** brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, } /* ! * Fetch the BrTuple for a given heap block. * * The buffer containing the tuple is locked, and returned in *buf. As an * optimization, the caller can pass a pinned buffer *buf on entry, which will --- 191,197 ---- } /* ! * Fetch the BrinTuple for a given heap block. * * The buffer containing the tuple is locked, and returned in *buf. As an * optimization, the caller can pass a pinned buffer *buf on entry, which will *************** *** 204,210 **** brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, * The output tuple offset within the buffer is returned in *off, and its size * is returned in *size. */ ! BrTuple * brinGetTupleForHeapBlock(brinRmAccess *rmAccess, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode) { --- 204,210 ---- * The output tuple offset within the buffer is returned in *off, and its size * is returned in *size. */ ! BrinTuple * brinGetTupleForHeapBlock(brinRmAccess *rmAccess, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode) { *************** *** 215,221 **** brinGetTupleForHeapBlock(brinRmAccess *rmAccess, BlockNumber heapBlk, BlockNumber blk; Page page; ItemId lp; ! BrTuple *tup; ItemPointerData previptr; /* normalize the heap block number to be the first page in the range */ --- 215,221 ---- BlockNumber blk; Page page; ItemId lp; ! BrinTuple *tup; ItemPointerData previptr; /* normalize the heap block number to be the first page in the range */ *************** *** 274,280 **** brinGetTupleForHeapBlock(brinRmAccess *rmAccess, BlockNumber heapBlk, LockBuffer(rmAccess->currBuf, BUFFER_LOCK_UNLOCK); ! /* Ok, got a pointer to where the BrTuple should be. Fetch it. */ if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != blk) { if (BufferIsValid(*buf)) --- 274,280 ---- LockBuffer(rmAccess->currBuf, BUFFER_LOCK_UNLOCK); ! /* Ok, got a pointer to where the BrinTuple should be. Fetch it. */ if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != blk) { if (BufferIsValid(*buf)) *************** *** 290,296 **** brinGetTupleForHeapBlock(brinRmAccess *rmAccess, BlockNumber heapBlk, lp = PageGetItemId(page, *off); if (ItemIdIsUsed(lp)) { ! tup = (BrTuple *) PageGetItem(page, lp); if (tup->bt_blkno == heapBlk) { --- 290,296 ---- lp = PageGetItemId(page, *off); if (ItemIdIsUsed(lp)) { ! tup = (BrinTuple *) PageGetItem(page, lp); if (tup->bt_blkno == heapBlk) { *** a/src/backend/access/brin/brin_tuple.c --- b/src/backend/access/brin/brin_tuple.c *************** *** 3,9 **** * Method implementations for tuples in BRIN indexes. * * Intended usage is that code outside this file only deals with ! * DeformedBrTuples, and convert to and from the on-disk representation through * functions in this file. * * NOTES --- 3,9 ---- * Method implementations for tuples in BRIN indexes. * * Intended usage is that code outside this file only deals with ! * BrinMemTuples, and convert to and from the on-disk representation through * functions in this file. * * NOTES *************** *** 36,46 **** #include "access/tupdesc.h" #include "access/tupmacs.h" #include "utils/datum.h" ! static inline void br_deconstruct_tuple(BrinDesc *brdesc, ! char *tp, bits8 *nullbits, bool nulls, ! Datum *values, bool *allnulls, bool *hasnulls); /* --- 36,47 ---- #include "access/tupdesc.h" #include "access/tupmacs.h" #include "utils/datum.h" + #include "utils/memutils.h" ! static inline void brin_deconstruct_tuple(BrinDesc *brdesc, ! char *tp, bits8 *nullbits, bool nulls, ! Datum *values, bool *allnulls, bool *hasnulls); /* *************** *** 56,61 **** brtuple_disk_tupdesc(BrinDesc *brdesc) --- 57,66 ---- int j; AttrNumber attno = 1; TupleDesc tupdesc; + MemoryContext oldcxt; + + /* make sure it's in the bdesc's context */ + oldcxt = MemoryContextSwitchTo(brdesc->bd_context); tupdesc = CreateTemplateTupleDesc(brdesc->bd_totalstored, false); *************** *** 67,72 **** brtuple_disk_tupdesc(BrinDesc *brdesc) --- 72,79 ---- -1, 0); } + MemoryContextSwitchTo(oldcxt); + brdesc->bd_disktdesc = tupdesc; } *************** *** 78,91 **** brtuple_disk_tupdesc(BrinDesc *brdesc) * * See brin_form_placeholder_tuple if you touch this. */ ! BrTuple * brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, ! DeformedBrTuple *tuple, Size *size) { Datum *values; bool *nulls; bool anynulls = false; ! BrTuple *rettuple; int keyno; int idxattno; uint16 phony_infomask; --- 85,98 ---- * * See brin_form_placeholder_tuple if you touch this. */ ! BrinTuple * brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, ! BrinMemTuple *tuple, Size *size) { Datum *values; bool *nulls; bool anynulls = false; ! BrinTuple *rettuple; int keyno; int idxattno; uint16 phony_infomask; *************** *** 113,119 **** brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, * column; when this happens, there is no data to store. Thus set the * nullable bits for all data elements of this column and we're done. */ ! if (tuple->dt_columns[keyno].allnulls) { for (datumno = 0; datumno < brdesc->bd_info[keyno]->oi_nstored; --- 120,126 ---- * column; when this happens, there is no data to store. Thus set the * nullable bits for all data elements of this column and we're done. */ ! if (tuple->bt_columns[keyno].bv_allnulls) { for (datumno = 0; datumno < brdesc->bd_info[keyno]->oi_nstored; *************** *** 128,140 **** brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, * data. We still need to store a real value, but the presence of * this means we need a null bitmap. */ ! if (tuple->dt_columns[keyno].hasnulls) anynulls = true; for (datumno = 0; datumno < brdesc->bd_info[keyno]->oi_nstored; datumno++) ! values[idxattno++] = tuple->dt_columns[keyno].values[datumno]; } /* compute total space needed */ --- 135,147 ---- * data. We still need to store a real value, but the presence of * this means we need a null bitmap. */ ! if (tuple->bt_columns[keyno].bv_hasnulls) anynulls = true; for (datumno = 0; datumno < brdesc->bd_info[keyno]->oi_nstored; datumno++) ! values[idxattno++] = tuple->bt_columns[keyno].bv_values[datumno]; } /* compute total space needed */ *************** *** 208,214 **** brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, bitmask = 1; } ! if (!tuple->dt_columns[keyno].allnulls) continue; *bitP |= bitmask; --- 215,221 ---- bitmask = 1; } ! if (!tuple->bt_columns[keyno].bv_allnulls) continue; *bitP |= bitmask; *************** *** 225,231 **** brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, bitmask = 1; } ! if (!tuple->dt_columns[keyno].hasnulls) continue; *bitP |= bitmask; --- 232,238 ---- bitmask = 1; } ! if (!tuple->bt_columns[keyno].bv_hasnulls) continue; *bitP |= bitmask; *************** *** 233,239 **** brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, bitP = ((bits8 *) (rettuple + SizeOfBrinTuple)) - 1; } ! if (tuple->dt_placeholder) rettuple->bt_info |= BRIN_PLACEHOLDER_MASK; *size = len; --- 240,246 ---- bitP = ((bits8 *) (rettuple + SizeOfBrinTuple)) - 1; } ! if (tuple->bt_placeholder) rettuple->bt_info |= BRIN_PLACEHOLDER_MASK; *size = len; *************** *** 245,256 **** brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, * * This is a cut-down version of brin_form_tuple. */ ! BrTuple * brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size) { Size len; Size hoff; ! BrTuple *rettuple; int keyno; bits8 *bitP; int bitmask; --- 252,263 ---- * * This is a cut-down version of brin_form_tuple. */ ! BrinTuple * brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size) { Size len; Size hoff; ! BrinTuple *rettuple; int keyno; bits8 *bitP; int bitmask; *************** *** 291,305 **** brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size) * Free a tuple created by brin_form_tuple */ void ! brin_free_tuple(BrTuple *tuple) { pfree(tuple); } ! BrTuple * ! brin_copy_tuple(BrTuple *tuple, Size len) { ! BrTuple *newtup; newtup = palloc(len); memcpy(newtup, tuple, len); --- 298,312 ---- * Free a tuple created by brin_form_tuple */ void ! brin_free_tuple(BrinTuple *tuple) { pfree(tuple); } ! BrinTuple * ! brin_copy_tuple(BrinTuple *tuple, Size len) { ! BrinTuple *newtup; newtup = palloc(len); memcpy(newtup, tuple, len); *************** *** 308,314 **** brin_copy_tuple(BrTuple *tuple, Size len) } bool ! brin_tuples_equal(const BrTuple *a, Size alen, const BrTuple *b, Size blen) { if (alen != blen) return false; --- 315,321 ---- } bool ! brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen) { if (alen != blen) return false; *************** *** 318,383 **** brin_tuples_equal(const BrTuple *a, Size alen, const BrTuple *b, Size blen) } /* ! * Create a new DeformedBrTuple from scratch, and initialize it to an empty * state. */ ! DeformedBrTuple * brin_new_dtuple(BrinDesc *brdesc) { ! DeformedBrTuple *dtup; char *currdatum; long basesize; int i; ! basesize = MAXALIGN(sizeof(DeformedBrTuple) + sizeof(BrinValues) * brdesc->bd_tupdesc->natts); dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored); currdatum = (char *) dtup + basesize; for (i = 0; i < brdesc->bd_tupdesc->natts; i++) { ! dtup->dt_columns[i].attno = i + 1; ! dtup->dt_columns[i].allnulls = true; ! dtup->dt_columns[i].hasnulls = false; ! dtup->dt_columns[i].values = (Datum *) currdatum; currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored; } return dtup; } /* ! * Reset a DeformedBrTuple to initial state */ void ! brin_dtuple_initialize(DeformedBrTuple *dtuple, BrinDesc *brdesc) { int i; - int j; for (i = 0; i < brdesc->bd_tupdesc->natts; i++) { ! if (!brdesc->bd_tupdesc->attrs[i]->attbyval && ! !dtuple->dt_columns[i].allnulls) ! for (j = 0; j < brdesc->bd_info[i]->oi_nstored; j++) ! pfree(DatumGetPointer(dtuple->dt_columns[i].values[j])); ! dtuple->dt_columns[i].allnulls = true; ! dtuple->dt_columns[i].hasnulls = false; ! memset(dtuple->dt_columns[i].values, 0, ! sizeof(Datum) * brdesc->bd_info[i]->oi_nstored); } } /* ! * Convert a BrTuple back to a DeformedBrTuple. This is the reverse of * brin_form_tuple. * * Note we don't need the "on disk tupdesc" here; we rely on our own routine to * deconstruct the tuple from the on-disk format. */ ! DeformedBrTuple * ! brin_deform_tuple(BrinDesc *brdesc, BrTuple *tuple) { ! DeformedBrTuple *dtup; Datum *values; bool *allnulls; bool *hasnulls; --- 325,392 ---- } /* ! * Create a new BrinMemTuple from scratch, and initialize it to an empty * state. + * + * Note: we don't provide any means to free a deformed tuple, so make sure to + * use a temporary memory context. */ ! BrinMemTuple * brin_new_dtuple(BrinDesc *brdesc) { ! BrinMemTuple *dtup; char *currdatum; long basesize; int i; ! basesize = MAXALIGN(sizeof(BrinMemTuple) + sizeof(BrinValues) * brdesc->bd_tupdesc->natts); dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored); currdatum = (char *) dtup + basesize; for (i = 0; i < brdesc->bd_tupdesc->natts; i++) { ! dtup->bt_columns[i].bv_attno = i + 1; ! dtup->bt_columns[i].bv_allnulls = true; ! dtup->bt_columns[i].bv_hasnulls = false; ! dtup->bt_columns[i].bv_values = (Datum *) currdatum; currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored; } + dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext, + "brin dtuple", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); return dtup; } /* ! * Reset a BrinMemTuple to initial state */ void ! brin_dtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc) { int i; for (i = 0; i < brdesc->bd_tupdesc->natts; i++) { ! MemoryContextReset(dtuple->bt_context); ! dtuple->bt_columns[i].bv_allnulls = true; ! dtuple->bt_columns[i].bv_hasnulls = false; } } /* ! * Convert a BrinTuple back to a BrinMemTuple. This is the reverse of * brin_form_tuple. * * Note we don't need the "on disk tupdesc" here; we rely on our own routine to * deconstruct the tuple from the on-disk format. */ ! BrinMemTuple * ! brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple) { ! BrinMemTuple *dtup; Datum *values; bool *allnulls; bool *hasnulls; *************** *** 385,395 **** brin_deform_tuple(BrinDesc *brdesc, BrTuple *tuple) bits8 *nullbits; int keyno; int valueno; dtup = brin_new_dtuple(brdesc); if (BrinTupleIsPlaceholder(tuple)) ! dtup->dt_placeholder = true; values = palloc(sizeof(Datum) * brdesc->bd_totalstored); allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); --- 394,405 ---- bits8 *nullbits; int keyno; int valueno; + MemoryContext oldcxt; dtup = brin_new_dtuple(brdesc); if (BrinTupleIsPlaceholder(tuple)) ! dtup->bt_placeholder = true; values = palloc(sizeof(Datum) * brdesc->bd_totalstored); allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); *************** *** 401,414 **** brin_deform_tuple(BrinDesc *brdesc, BrTuple *tuple) nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple); else nullbits = NULL; ! br_deconstruct_tuple(brdesc, ! tp, nullbits, BrinTupleHasNulls(tuple), ! values, allnulls, hasnulls); /* * Iterate to assign each of the values to the corresponding item in the ! * values array of each column. */ for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) { int i; --- 411,425 ---- nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple); else nullbits = NULL; ! brin_deconstruct_tuple(brdesc, ! tp, nullbits, BrinTupleHasNulls(tuple), ! values, allnulls, hasnulls); /* * Iterate to assign each of the values to the corresponding item in the ! * values array of each column. The copies occur in the tuple's context. */ + oldcxt = MemoryContextSwitchTo(dtup->bt_context); for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) { int i; *************** *** 421,439 **** brin_deform_tuple(BrinDesc *brdesc, BrTuple *tuple) /* * We would like to skip datumCopy'ing the values datum in some cases, ! * caller permitting, but this would make life harder for ! * brin_free_dtuple and brin_dtuple_initialize, so refrain. */ for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++) ! dtup->dt_columns[keyno].values[i] = datumCopy(values[valueno++], brdesc->bd_tupdesc->attrs[keyno]->attbyval, brdesc->bd_tupdesc->attrs[keyno]->attlen); ! dtup->dt_columns[keyno].hasnulls = hasnulls[keyno]; ! dtup->dt_columns[keyno].allnulls = false; } pfree(values); pfree(allnulls); pfree(hasnulls); --- 432,451 ---- /* * We would like to skip datumCopy'ing the values datum in some cases, ! * caller permitting ... */ for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++) ! dtup->bt_columns[keyno].bv_values[i] = datumCopy(values[valueno++], brdesc->bd_tupdesc->attrs[keyno]->attbyval, brdesc->bd_tupdesc->attrs[keyno]->attlen); ! dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno]; ! dtup->bt_columns[keyno].bv_allnulls = false; } + MemoryContextSwitchTo(oldcxt); + pfree(values); pfree(allnulls); pfree(hasnulls); *************** *** 441,466 **** brin_deform_tuple(BrinDesc *brdesc, BrTuple *tuple) return dtup; } - /* free resources allocated in a deformed tuple */ - void - brin_free_dtuple(BrinDesc *bdesc, DeformedBrTuple *dtup) - { - int i; - int j; - - /* if we had a mcxt to reset here .. */ - for (i = 0; i < bdesc->bd_tupdesc->natts; i++) - { - if (!bdesc->bd_tupdesc->attrs[i]->attbyval && - !dtup->dt_columns[i].allnulls) - for (j = 0; j < bdesc->bd_info[i]->oi_nstored; j++) - pfree(DatumGetPointer(dtup->dt_columns[i].values[j])); - } - pfree(dtup); - } - /* ! * br_deconstruct_tuple * Guts of attribute extraction from an on-disk BRIN tuple. * * Its arguments are: --- 453,460 ---- return dtup; } /* ! * brin_deconstruct_tuple * Guts of attribute extraction from an on-disk BRIN tuple. * * Its arguments are: *************** *** 475,483 **** brin_free_dtuple(BrinDesc *bdesc, DeformedBrTuple *dtup) * Output arrays must have been allocated by caller. */ static inline void ! br_deconstruct_tuple(BrinDesc *brdesc, ! char *tp, bits8 *nullbits, bool nulls, ! Datum *values, bool *allnulls, bool *hasnulls) { int attnum; int stored; --- 469,477 ---- * Output arrays must have been allocated by caller. */ static inline void ! brin_deconstruct_tuple(BrinDesc *brdesc, ! char *tp, bits8 *nullbits, bool nulls, ! Datum *values, bool *allnulls, bool *hasnulls) { int attnum; int stored; *** a/src/backend/access/brin/brin_xlog.c --- b/src/backend/access/brin/brin_xlog.c *************** *** 54,130 **** brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record) */ static void brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record, xl_brin_insert *xlrec, ! BrTuple *tuple, int tuplen) { BlockNumber blkno; Buffer buffer; Page page; OffsetNumber offnum; ! /* If we have a full-page image, restore it */ ! if (record->xl_info & XLR_BKP_BLOCK(0)) { ! (void) RestoreBackupBlock(lsn, record, 0, false, false); } else { Assert(tuple->bt_blkno == xlrec->heapBlk); ! blkno = ItemPointerGetBlockNumber(&(xlrec->tid)); ! if (record->xl_info & XLOG_BRIN_INIT_PAGE) ! { ! buffer = XLogReadBuffer(xlrec->node, blkno, true); ! Assert(BufferIsValid(buffer)); ! page = (Page) BufferGetPage(buffer); ! ! brin_page_init(page, BRIN_PAGETYPE_REGULAR); ! } ! else ! { ! buffer = XLogReadBuffer(xlrec->node, blkno, false); ! } ! if (BufferIsValid(buffer)) ! { ! page = (Page) BufferGetPage(buffer); ! ! if (lsn > PageGetLSN(page)) ! { ! offnum = ItemPointerGetOffsetNumber(&(xlrec->tid)); ! if (PageGetMaxOffsetNumber(page) + 1 < offnum) ! elog(PANIC, "brin_xlog_insert_update: invalid max offset number"); ! ! offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false); ! if (offnum == InvalidOffsetNumber) ! elog(PANIC, "brin_xlog_insert_update: failed to add tuple"); ! ! PageSetLSN(page, lsn); ! MarkBufferDirty(buffer); ! } ! UnlockReleaseBuffer(buffer); ! } } /* update the revmap */ ! if (record->xl_info & XLR_BKP_BLOCK(1)) { ! (void) RestoreBackupBlock(lsn, record, 1, false, false); ! } ! else ! { ! buffer = XLogReadBuffer(xlrec->node, xlrec->revmapBlk, false); ! if (BufferIsValid(buffer)) ! { ! page = (Page) BufferGetPage(buffer); ! ! if (lsn > PageGetLSN(page)) ! { ! brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, xlrec->tid); ! PageSetLSN(page, lsn); ! MarkBufferDirty(buffer); ! } ! UnlockReleaseBuffer(buffer); ! } } /* XXX no FSM updates here ... */ } --- 54,121 ---- */ static void brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record, xl_brin_insert *xlrec, ! BrinTuple *tuple, int tuplen) { BlockNumber blkno; Buffer buffer; Page page; OffsetNumber offnum; + XLogRedoAction action; ! blkno = ItemPointerGetBlockNumber(&xlrec->tid); ! ! /* ! * If we inserted the first and only tuple on the page, re-initialize the ! * page from scratch. ! */ ! if (record->xl_info & XLOG_BRIN_INIT_PAGE) { ! XLogReadBufferForRedoExtended(lsn, record, 0, ! xlrec->node, MAIN_FORKNUM, blkno, ! RBM_ZERO, false, &buffer); ! page = BufferGetPage(buffer); ! brin_page_init(page, BRIN_PAGETYPE_REGULAR); ! action = BLK_NEEDS_REDO; } else { + action = XLogReadBufferForRedo(lsn, record, 0, + xlrec->node, blkno, &buffer); + } + + if (action == BLK_NEEDS_REDO) + { Assert(tuple->bt_blkno == xlrec->heapBlk); ! page = (Page) BufferGetPage(buffer); ! ! offnum = ItemPointerGetOffsetNumber(&(xlrec->tid)); ! if (PageGetMaxOffsetNumber(page) + 1 < offnum) ! elog(PANIC, "brin_xlog_insert_update: invalid max offset number"); ! ! offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false); ! if (offnum == InvalidOffsetNumber) ! elog(PANIC, "brin_xlog_insert_update: failed to add tuple"); ! ! PageSetLSN(page, lsn); ! MarkBufferDirty(buffer); } + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); /* update the revmap */ ! action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node, ! xlrec->revmapBlk, &buffer); ! if (action == BLK_NEEDS_REDO) { ! page = (Page) BufferGetPage(buffer); ! ! brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, xlrec->tid); ! PageSetLSN(page, lsn); ! MarkBufferDirty(buffer); } + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); /* XXX no FSM updates here ... */ } *************** *** 133,143 **** static void brin_xlog_insert(XLogRecPtr lsn, XLogRecord *record) { xl_brin_insert *xlrec = (xl_brin_insert *) XLogRecGetData(record); ! BrTuple *newtup; int tuplen; tuplen = record->xl_len - SizeOfBrinInsert; ! newtup = (BrTuple *) ((char *) xlrec + SizeOfBrinInsert); brin_xlog_insert_update(lsn, record, xlrec, newtup, tuplen); } --- 124,134 ---- brin_xlog_insert(XLogRecPtr lsn, XLogRecord *record) { xl_brin_insert *xlrec = (xl_brin_insert *) XLogRecGetData(record); ! BrinTuple *newtup; int tuplen; tuplen = record->xl_len - SizeOfBrinInsert; ! newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinInsert); brin_xlog_insert_update(lsn, record, xlrec, newtup, tuplen); } *************** *** 147,191 **** brin_xlog_update(XLogRecPtr lsn, XLogRecord *record) { xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record); BlockNumber blkno; - OffsetNumber offnum; Buffer buffer; ! Page page; ! BrTuple *newtup; int tuplen; tuplen = record->xl_len - SizeOfBrinUpdate; ! newtup = (BrTuple *) ((char *) xlrec + SizeOfBrinUpdate); /* First insert the new tuple and update revmap, like in an insertion. */ brin_xlog_insert_update(lsn, record, &xlrec->new, newtup, tuplen); /* Then remove the old tuple */ ! if (record->xl_info & XLR_BKP_BLOCK(2)) { ! (void) RestoreBackupBlock(lsn, record, 2, false, false); ! } ! else ! { ! blkno = ItemPointerGetBlockNumber(&(xlrec->oldtid)); ! buffer = XLogReadBuffer(xlrec->new.node, blkno, false); ! if (BufferIsValid(buffer)) ! { ! page = (Page) BufferGetPage(buffer); ! ! if (lsn > PageGetLSN(page)) ! { ! offnum = ItemPointerGetOffsetNumber(&(xlrec->oldtid)); ! if (PageGetMaxOffsetNumber(page) + 1 < offnum) ! elog(PANIC, "brin_xlog_update: invalid max offset number"); ! ! PageIndexDeleteNoCompact(page, &offnum, 1); ! ! PageSetLSN(page, lsn); ! MarkBufferDirty(buffer); ! } ! UnlockReleaseBuffer(buffer); ! } } } /* --- 138,176 ---- { xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record); BlockNumber blkno; Buffer buffer; ! BrinTuple *newtup; int tuplen; + XLogRedoAction action; tuplen = record->xl_len - SizeOfBrinUpdate; ! newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinUpdate); /* First insert the new tuple and update revmap, like in an insertion. */ brin_xlog_insert_update(lsn, record, &xlrec->new, newtup, tuplen); /* Then remove the old tuple */ ! blkno = ItemPointerGetBlockNumber(&(xlrec->oldtid)); ! action = XLogReadBufferForRedo(lsn, record, 2, xlrec->new.node, ! blkno, &buffer); ! if (action == BLK_NEEDS_REDO) { ! Page page; ! OffsetNumber offnum; ! ! page = (Page) BufferGetPage(buffer); ! ! offnum = ItemPointerGetOffsetNumber(&(xlrec->oldtid)); ! if (PageGetMaxOffsetNumber(page) + 1 < offnum) ! elog(PANIC, "brin_xlog_update: invalid max offset number"); ! ! PageIndexDeleteNoCompact(page, &offnum, 1); ! ! PageSetLSN(page, lsn); ! MarkBufferDirty(buffer); } + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); } /* *************** *** 197,241 **** brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record) xl_brin_samepage_update *xlrec = (xl_brin_samepage_update *) XLogRecGetData(record); BlockNumber blkno; Buffer buffer; ! Page page; ! OffsetNumber offnum; ! /* If we have a full-page image, restore it */ ! if (record->xl_info & XLR_BKP_BLOCK(0)) { - (void) RestoreBackupBlock(lsn, record, 0, false, false); - } - else - { - BrTuple *mmtuple; int tuplen; tuplen = record->xl_len - SizeOfBrinSamepageUpdate; ! mmtuple = (BrTuple *) ((char *) xlrec + SizeOfBrinSamepageUpdate); ! ! blkno = ItemPointerGetBlockNumber(&(xlrec->tid)); ! buffer = XLogReadBuffer(xlrec->node, blkno, false); ! if (BufferIsValid(buffer)) ! { ! page = (Page) BufferGetPage(buffer); ! ! if (lsn > PageGetLSN(page)) ! { ! offnum = ItemPointerGetOffsetNumber(&(xlrec->tid)); ! if (PageGetMaxOffsetNumber(page) + 1 < offnum) ! elog(PANIC, "brin_xlog_samepage_update: invalid max offset number"); ! ! PageIndexDeleteNoCompact(page, &offnum, 1); ! offnum = PageAddItem(page, (Item) mmtuple, tuplen, offnum, true, false); ! if (offnum == InvalidOffsetNumber) ! elog(PANIC, "brin_xlog_samepage_update: failed to add tuple"); ! ! PageSetLSN(page, lsn); ! MarkBufferDirty(buffer); ! } ! UnlockReleaseBuffer(buffer); ! } } /* XXX no FSM updates here ... */ } --- 182,218 ---- xl_brin_samepage_update *xlrec = (xl_brin_samepage_update *) XLogRecGetData(record); BlockNumber blkno; Buffer buffer; ! XLogRedoAction action; ! blkno = ItemPointerGetBlockNumber(&(xlrec->tid)); ! action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node, blkno, ! &buffer); ! if (action == BLK_NEEDS_REDO) { int tuplen; + BrinTuple *mmtuple; + Page page; + OffsetNumber offnum; tuplen = record->xl_len - SizeOfBrinSamepageUpdate; ! mmtuple = (BrinTuple *) ((char *) xlrec + SizeOfBrinSamepageUpdate); ! ! page = (Page) BufferGetPage(buffer); ! ! offnum = ItemPointerGetOffsetNumber(&(xlrec->tid)); ! if (PageGetMaxOffsetNumber(page) + 1 < offnum) ! elog(PANIC, "brin_xlog_samepage_update: invalid max offset number"); ! ! PageIndexDeleteNoCompact(page, &offnum, 1); ! offnum = PageAddItem(page, (Item) mmtuple, tuplen, offnum, true, false); ! if (offnum == InvalidOffsetNumber) ! elog(PANIC, "brin_xlog_samepage_update: failed to add tuple"); ! ! PageSetLSN(page, lsn); ! MarkBufferDirty(buffer); } + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); /* XXX no FSM updates here ... */ } *************** *** 246,278 **** brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record) { xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record); Buffer metabuf; - Page metapg; - BrinMetaPageData *metadata; Buffer buf; Page page; /* Update the metapage */ ! if (record->xl_info & XLR_BKP_BLOCK(0)) { ! metabuf = RestoreBackupBlock(lsn, record, 0, false, true); ! } ! else ! { ! metabuf = XLogReadBuffer(xlrec->node, BRIN_METAPAGE_BLKNO, false); ! if (BufferIsValid(metabuf)) ! { ! metapg = BufferGetPage(metabuf); ! if (lsn > PageGetLSN(metapg)) ! { ! metadata = (BrinMetaPageData *) PageGetContents(metapg); ! ! Assert(metadata->lastRevmapPage == xlrec->targetBlk - 1); ! metadata->lastRevmapPage = xlrec->targetBlk; ! ! PageSetLSN(metapg, lsn); ! MarkBufferDirty(metabuf); ! } ! } } /* --- 223,248 ---- { xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record); Buffer metabuf; Buffer buf; Page page; + XLogRedoAction action; /* Update the metapage */ ! action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node, ! BRIN_METAPAGE_BLKNO, &metabuf); ! if (action == BLK_NEEDS_REDO) { ! Page metapg; ! BrinMetaPageData *metadata; ! ! metapg = BufferGetPage(metabuf); ! metadata = (BrinMetaPageData *) PageGetContents(metapg); ! ! Assert(metadata->lastRevmapPage == xlrec->targetBlk - 1); ! metadata->lastRevmapPage = xlrec->targetBlk; ! ! PageSetLSN(metapg, lsn); ! MarkBufferDirty(metabuf); } /* *** a/src/include/access/brin_internal.h --- b/src/include/access/brin_internal.h *************** *** 20,26 **** /* * A BrinDesc is a struct designed to enable decoding a BRIN tuple from the ! * on-disk format to a DeformedBrTuple and vice-versa. */ /* struct returned by "OpcInfo" amproc */ --- 20,26 ---- /* * A BrinDesc is a struct designed to enable decoding a BRIN tuple from the ! * on-disk format to an in-memory tuple and vice-versa. */ /* struct returned by "OpcInfo" amproc */ *************** *** 70,75 **** typedef struct BrinDesc --- 70,76 ---- #define BRIN_PROCNUM_ADDVALUE 2 #define BRIN_PROCNUM_CONSISTENT 3 #define BRIN_PROCNUM_UNION 4 + /* procedure numbers up to 10 are reserved for BRIN future expansion */ #define BRIN_DEBUG *** a/src/include/access/brin_pageops.h --- b/src/include/access/brin_pageops.h *************** *** 16,29 **** extern bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, brinRmAccess *rmAccess, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, ! const BrTuple *origtup, Size origsz, ! const BrTuple *newtup, Size newsz, bool samepage); extern bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz); extern OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, brinRmAccess *rmAccess, Buffer *buffer, BlockNumber heapBlk, ! BrTuple *tup, Size itemsz); extern void brin_page_init(Page page, uint16 type); extern void brin_metapage_init(Page page, BlockNumber pagesPerRange, --- 16,29 ---- extern bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, brinRmAccess *rmAccess, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, ! const BrinTuple *origtup, Size origsz, ! const BrinTuple *newtup, Size newsz, bool samepage); extern bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz); extern OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, brinRmAccess *rmAccess, Buffer *buffer, BlockNumber heapBlk, ! BrinTuple *tup, Size itemsz); extern void brin_page_init(Page page, uint16 type); extern void brin_metapage_init(Page page, BlockNumber pagesPerRange, *** a/src/include/access/brin_revmap.h --- b/src/include/access/brin_revmap.h *************** *** 32,38 **** extern Buffer brinLockRevmapPageForUpdate(brinRmAccess *rmAccess, BlockNumber heapBlk); extern void brinSetHeapBlockItemptr(Buffer rmbuf, BlockNumber pagesPerRange, BlockNumber heapBlk, ItemPointerData tid); ! extern BrTuple *brinGetTupleForHeapBlock(brinRmAccess *rmAccess, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode); --- 32,38 ---- BlockNumber heapBlk); extern void brinSetHeapBlockItemptr(Buffer rmbuf, BlockNumber pagesPerRange, BlockNumber heapBlk, ItemPointerData tid); ! extern BrinTuple *brinGetTupleForHeapBlock(brinRmAccess *rmAccess, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode); *** a/src/include/access/brin_tuple.h --- b/src/include/access/brin_tuple.h *************** *** 23,51 **** */ typedef struct BrinValues { ! AttrNumber attno; /* index attribute number */ ! bool hasnulls; /* is there any nulls in the page range? */ ! bool allnulls; /* are all values nulls in the page range? */ ! Datum *values; /* current accumulated values */ } BrinValues; /* * This struct is used to represent an in-memory index tuple. The values can * only be meaningfully decoded with an appropriate BrinDesc. */ ! typedef struct DeformedBrTuple { ! bool dt_placeholder; /* this is a placeholder tuple */ ! BlockNumber dt_blkno; /* heap blkno that the tuple is for */ ! BrinValues dt_columns[FLEXIBLE_ARRAY_MEMBER]; ! } DeformedBrTuple; /* * An on-disk BRIN tuple. This is possibly followed by a nulls bitmask, with * room for 2 null bits (two bits for each indexed column); an opclass-defined * number of Datum values for each column follow. */ ! typedef struct BrTuple { /* heap block number that the tuple is for */ BlockNumber bt_blkno; --- 23,52 ---- */ typedef struct BrinValues { ! AttrNumber bv_attno; /* index attribute number */ ! bool bv_hasnulls; /* is there any nulls in the page range? */ ! bool bv_allnulls; /* are all values nulls in the page range? */ ! Datum *bv_values; /* current accumulated values */ } BrinValues; /* * This struct is used to represent an in-memory index tuple. The values can * only be meaningfully decoded with an appropriate BrinDesc. */ ! typedef struct BrinMemTuple { ! bool bt_placeholder; /* this is a placeholder tuple */ ! BlockNumber bt_blkno; /* heap blkno that the tuple is for */ ! MemoryContext bt_context; /* memcxt holding the dt_column values */ ! BrinValues bt_columns[FLEXIBLE_ARRAY_MEMBER]; ! } BrinMemTuple; /* * An on-disk BRIN tuple. This is possibly followed by a nulls bitmask, with * room for 2 null bits (two bits for each indexed column); an opclass-defined * number of Datum values for each column follow. */ ! typedef struct BrinTuple { /* heap block number that the tuple is for */ BlockNumber bt_blkno; *************** *** 60,68 **** typedef struct BrTuple * --------------- */ uint8 bt_info; ! } BrTuple; ! #define SizeOfBrinTuple (offsetof(BrTuple, bt_info) + sizeof(uint8)) /* * t_info manipulation macros --- 61,69 ---- * --------------- */ uint8 bt_info; ! } BrinTuple; ! #define SizeOfBrinTuple (offsetof(BrinTuple, bt_info) + sizeof(uint8)) /* * t_info manipulation macros *************** *** 72,97 **** typedef struct BrTuple #define BRIN_PLACEHOLDER_MASK 0x40 #define BRIN_NULLS_MASK 0x80 ! #define BrinTupleDataOffset(tup) ((Size) (((BrTuple *) (tup))->bt_info & BRIN_OFFSET_MASK)) ! #define BrinTupleHasNulls(tup) (((((BrTuple *) (tup))->bt_info & BRIN_NULLS_MASK)) != 0) ! #define BrinTupleIsPlaceholder(tup) (((((BrTuple *) (tup))->bt_info & BRIN_PLACEHOLDER_MASK)) != 0) ! extern BrTuple *brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, ! DeformedBrTuple *tuple, Size *size); ! extern BrTuple *brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size); ! extern void brin_free_tuple(BrTuple *tuple); ! extern BrTuple *brin_copy_tuple(BrTuple *tuple, Size len); ! extern bool brin_tuples_equal(const BrTuple *a, Size alen, ! const BrTuple *b, Size blen); ! extern DeformedBrTuple *brin_new_dtuple(BrinDesc *brdesc); ! extern void brin_dtuple_initialize(DeformedBrTuple *dtuple, BrinDesc *brdesc); ! extern DeformedBrTuple *brin_deform_tuple(BrinDesc *brdesc, ! BrTuple *tuple); ! extern void brin_free_dtuple(BrinDesc *brdesc, ! DeformedBrTuple *dtup); #endif /* BRIN_TUPLE_H */ --- 73,96 ---- #define BRIN_PLACEHOLDER_MASK 0x40 #define BRIN_NULLS_MASK 0x80 ! #define BrinTupleDataOffset(tup) ((Size) (((BrinTuple *) (tup))->bt_info & BRIN_OFFSET_MASK)) ! #define BrinTupleHasNulls(tup) (((((BrinTuple *) (tup))->bt_info & BRIN_NULLS_MASK)) != 0) ! #define BrinTupleIsPlaceholder(tup) (((((BrinTuple *) (tup))->bt_info & BRIN_PLACEHOLDER_MASK)) != 0) ! extern BrinTuple *brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, ! BrinMemTuple *tuple, Size *size); ! extern BrinTuple *brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size); ! extern void brin_free_tuple(BrinTuple *tuple); ! extern BrinTuple *brin_copy_tuple(BrinTuple *tuple, Size len); ! extern bool brin_tuples_equal(const BrinTuple *a, Size alen, ! const BrinTuple *b, Size blen); ! extern BrinMemTuple *brin_new_dtuple(BrinDesc *brdesc); ! extern void brin_dtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc); ! extern BrinMemTuple *brin_deform_tuple(BrinDesc *brdesc, ! BrinTuple *tuple); #endif /* BRIN_TUPLE_H */ *** a/src/include/catalog/pg_am.h --- b/src/include/catalog/pg_am.h *************** *** 132,138 **** DESCR("GIN index access method"); DATA(insert OID = 4000 ( spgist 0 5 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions )); DESCR("SP-GiST index access method"); #define SPGIST_AM_OID 4000 ! DATA(insert OID = 3580 ( brin 5 8 f f f f t t f t t f f 0 brininsert brinbeginscan - bringetbitmap brinrescan brinendscan brinmarkpos brinrestrpos brinbuild brinbuildempty brinbulkdelete brinvacuumcleanup - brincostestimate brinoptions )); #define BRIN_AM_OID 3580 #endif /* PG_AM_H */ --- 132,138 ---- DATA(insert OID = 4000 ( spgist 0 5 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions )); DESCR("SP-GiST index access method"); #define SPGIST_AM_OID 4000 ! DATA(insert OID = 3580 ( brin 5 14 f f f f t t f t t f f 0 brininsert brinbeginscan - bringetbitmap brinrescan brinendscan brinmarkpos brinrestrpos brinbuild brinbuildempty brinbulkdelete brinvacuumcleanup - brincostestimate brinoptions )); #define BRIN_AM_OID 3580 #endif /* PG_AM_H */ *** a/src/include/catalog/pg_amproc.h --- b/src/include/catalog/pg_amproc.h *************** *** 438,680 **** DATA(insert ( 4064 17 17 1 3383 )); DATA(insert ( 4064 17 17 2 3384 )); DATA(insert ( 4064 17 17 3 3385 )); DATA(insert ( 4064 17 17 4 3386 )); ! DATA(insert ( 4064 17 17 5 1949 )); ! DATA(insert ( 4064 17 17 6 1950 )); ! DATA(insert ( 4064 17 17 7 1952 )); ! DATA(insert ( 4064 17 17 8 1951 )); /* minmax "char" */ DATA(insert ( 4062 18 18 1 3383 )); DATA(insert ( 4062 18 18 2 3384 )); DATA(insert ( 4062 18 18 3 3385 )); DATA(insert ( 4062 18 18 4 3386 )); ! DATA(insert ( 4062 18 18 5 1246 )); ! DATA(insert ( 4062 18 18 6 72 )); ! DATA(insert ( 4062 18 18 7 74 )); ! DATA(insert ( 4062 18 18 8 73 )); /* minmax name */ DATA(insert ( 4065 19 19 1 3383 )); DATA(insert ( 4065 19 19 2 3384 )); DATA(insert ( 4065 19 19 3 3385 )); DATA(insert ( 4065 19 19 4 3386 )); ! DATA(insert ( 4065 19 19 5 655 )); ! DATA(insert ( 4065 19 19 6 656 )); ! DATA(insert ( 4065 19 19 7 658 )); ! DATA(insert ( 4065 19 19 8 657 )); /* minmax bigint */ DATA(insert ( 4063 20 20 1 3383 )); DATA(insert ( 4063 20 20 2 3384 )); DATA(insert ( 4063 20 20 3 3385 )); DATA(insert ( 4063 20 20 4 3386 )); ! DATA(insert ( 4063 20 20 5 469 )); ! DATA(insert ( 4063 20 20 6 471 )); ! DATA(insert ( 4063 20 20 7 472 )); ! DATA(insert ( 4063 20 20 8 470 )); /* minmax smallint */ DATA(insert ( 4067 21 21 1 3383 )); DATA(insert ( 4067 21 21 2 3384 )); DATA(insert ( 4067 21 21 3 3385 )); DATA(insert ( 4067 21 21 4 3386 )); ! DATA(insert ( 4067 21 21 5 64 )); ! DATA(insert ( 4067 21 21 6 148 )); ! DATA(insert ( 4067 21 21 7 151 )); ! DATA(insert ( 4067 21 21 8 146 )); /* minmax integer */ DATA(insert ( 4054 23 23 1 3383 )); DATA(insert ( 4054 23 23 2 3384 )); DATA(insert ( 4054 23 23 3 3385 )); DATA(insert ( 4054 23 23 4 3386 )); ! DATA(insert ( 4054 23 23 5 66 )); ! DATA(insert ( 4054 23 23 6 149 )); ! DATA(insert ( 4054 23 23 7 150 )); ! DATA(insert ( 4054 23 23 8 147 )); /* minmax text */ DATA(insert ( 4056 25 25 1 3383 )); DATA(insert ( 4056 25 25 2 3384 )); DATA(insert ( 4056 25 25 3 3385 )); DATA(insert ( 4056 25 25 4 3386 )); ! DATA(insert ( 4056 25 25 5 740 )); ! DATA(insert ( 4056 25 25 6 741 )); ! DATA(insert ( 4056 25 25 7 743 )); ! DATA(insert ( 4056 25 25 8 742 )); /* minmax oid */ DATA(insert ( 4068 26 26 1 3383 )); DATA(insert ( 4068 26 26 2 3384 )); DATA(insert ( 4068 26 26 3 3385 )); DATA(insert ( 4068 26 26 4 3386 )); ! DATA(insert ( 4068 26 26 5 716 )); ! DATA(insert ( 4068 26 26 6 717 )); ! DATA(insert ( 4068 26 26 7 1639 )); ! DATA(insert ( 4068 26 26 8 1638 )); /* minmax tid */ DATA(insert ( 4069 27 27 1 3383 )); DATA(insert ( 4069 27 27 2 3384 )); DATA(insert ( 4069 27 27 3 3385 )); DATA(insert ( 4069 27 27 4 3386 )); ! DATA(insert ( 4069 27 27 5 2791 )); ! DATA(insert ( 4069 27 27 6 2793 )); ! DATA(insert ( 4069 27 27 7 2792 )); ! DATA(insert ( 4069 27 27 8 2790 )); /* minmax real */ DATA(insert ( 4070 700 700 1 3383 )); DATA(insert ( 4070 700 700 2 3384 )); DATA(insert ( 4070 700 700 3 3385 )); DATA(insert ( 4070 700 700 4 3386 )); ! DATA(insert ( 4070 700 700 5 289 )); ! DATA(insert ( 4070 700 700 6 290 )); ! DATA(insert ( 4070 700 700 7 292 )); ! DATA(insert ( 4070 700 700 8 291 )); /* minmax double precision */ DATA(insert ( 4071 701 701 1 3383 )); DATA(insert ( 4071 701 701 2 3384 )); DATA(insert ( 4071 701 701 3 3385 )); DATA(insert ( 4071 701 701 4 3386 )); ! DATA(insert ( 4071 701 701 5 295 )); ! DATA(insert ( 4071 701 701 6 296 )); ! DATA(insert ( 4071 701 701 7 298 )); ! DATA(insert ( 4071 701 701 8 297 )); /* minmax abstime */ DATA(insert ( 4072 702 702 1 3383 )); DATA(insert ( 4072 702 702 2 3384 )); DATA(insert ( 4072 702 702 3 3385 )); DATA(insert ( 4072 702 702 4 3386 )); ! DATA(insert ( 4072 702 702 5 253 )); ! DATA(insert ( 4072 702 702 6 255 )); ! DATA(insert ( 4072 702 702 7 256 )); ! DATA(insert ( 4072 702 702 8 254 )); /* minmax reltime */ DATA(insert ( 4073 703 703 1 3383 )); DATA(insert ( 4073 703 703 2 3384 )); DATA(insert ( 4073 703 703 3 3385 )); DATA(insert ( 4073 703 703 4 3386 )); ! DATA(insert ( 4073 703 703 5 259 )); ! DATA(insert ( 4073 703 703 6 261 )); ! DATA(insert ( 4073 703 703 7 262 )); ! DATA(insert ( 4073 703 703 8 260 )); /* minmax macaddr */ DATA(insert ( 4074 829 829 1 3383 )); DATA(insert ( 4074 829 829 2 3384 )); DATA(insert ( 4074 829 829 3 3385 )); DATA(insert ( 4074 829 829 4 3386 )); ! DATA(insert ( 4074 829 829 5 831 )); ! DATA(insert ( 4074 829 829 6 832 )); ! DATA(insert ( 4074 829 829 7 834 )); ! DATA(insert ( 4074 829 829 8 833 )); /* minmax inet */ DATA(insert ( 4075 869 869 1 3383 )); DATA(insert ( 4075 869 869 2 3384 )); DATA(insert ( 4075 869 869 3 3385 )); DATA(insert ( 4075 869 869 4 3386 )); ! DATA(insert ( 4075 869 869 5 921 )); ! DATA(insert ( 4075 869 869 6 922 )); ! DATA(insert ( 4075 869 869 7 924 )); ! DATA(insert ( 4075 869 869 8 923 )); /* minmax character */ DATA(insert ( 4076 1042 1042 1 3383 )); DATA(insert ( 4076 1042 1042 2 3384 )); DATA(insert ( 4076 1042 1042 3 3385 )); DATA(insert ( 4076 1042 1042 4 3386 )); ! DATA(insert ( 4076 1042 1042 5 1049 )); ! DATA(insert ( 4076 1042 1042 6 1050 )); ! DATA(insert ( 4076 1042 1042 7 1052 )); ! DATA(insert ( 4076 1042 1042 8 1051 )); /* minmax date */ DATA(insert ( 4061 1082 1082 1 3383 )); DATA(insert ( 4061 1082 1082 2 3384 )); DATA(insert ( 4061 1082 1082 3 3385 )); DATA(insert ( 4061 1082 1082 4 3386 )); ! DATA(insert ( 4061 1082 1082 5 1087 )); ! DATA(insert ( 4061 1082 1082 6 1088 )); ! DATA(insert ( 4061 1082 1082 7 1090 )); ! DATA(insert ( 4061 1082 1082 8 1089 )); /* minmax time without time zone */ DATA(insert ( 4077 1083 1083 1 3383 )); DATA(insert ( 4077 1083 1083 2 3384 )); DATA(insert ( 4077 1083 1083 3 3385 )); DATA(insert ( 4077 1083 1083 4 3386 )); ! DATA(insert ( 4077 1083 1083 5 1102 )); ! DATA(insert ( 4077 1083 1083 6 1103 )); ! DATA(insert ( 4077 1083 1083 7 1105 )); ! DATA(insert ( 4077 1083 1083 8 1104 )); /* minmax timestamp without time zone */ DATA(insert ( 4059 1114 1114 1 3383 )); DATA(insert ( 4059 1114 1114 2 3384 )); DATA(insert ( 4059 1114 1114 3 3385 )); DATA(insert ( 4059 1114 1114 4 3386 )); ! DATA(insert ( 4059 1114 1114 5 2054 )); ! DATA(insert ( 4059 1114 1114 6 2055 )); ! DATA(insert ( 4059 1114 1114 7 2056 )); ! DATA(insert ( 4059 1114 1114 8 2057 )); /* minmax timestamp with time zone */ DATA(insert ( 4060 1184 1184 1 3383 )); DATA(insert ( 4060 1184 1184 2 3384 )); DATA(insert ( 4060 1184 1184 3 3385 )); DATA(insert ( 4060 1184 1184 4 3386 )); ! DATA(insert ( 4060 1184 1184 5 1154 )); ! DATA(insert ( 4060 1184 1184 6 1155 )); ! DATA(insert ( 4060 1184 1184 7 1156 )); ! DATA(insert ( 4060 1184 1184 8 1157 )); /* minmax interval */ DATA(insert ( 4078 1186 1186 1 3383 )); DATA(insert ( 4078 1186 1186 2 3384 )); DATA(insert ( 4078 1186 1186 3 3385 )); DATA(insert ( 4078 1186 1186 4 3386 )); ! DATA(insert ( 4078 1186 1186 5 1164 )); ! DATA(insert ( 4078 1186 1186 6 1165 )); ! DATA(insert ( 4078 1186 1186 7 1166 )); ! DATA(insert ( 4078 1186 1186 8 1167 )); /* minmax time with time zone */ DATA(insert ( 4058 1266 1266 1 3383 )); DATA(insert ( 4058 1266 1266 2 3384 )); DATA(insert ( 4058 1266 1266 3 3385 )); DATA(insert ( 4058 1266 1266 4 3386 )); ! DATA(insert ( 4058 1266 1266 5 1354 )); ! DATA(insert ( 4058 1266 1266 6 1355 )); ! DATA(insert ( 4058 1266 1266 7 1356 )); ! DATA(insert ( 4058 1266 1266 8 1357 )); /* minmax bit */ DATA(insert ( 4079 1560 1560 1 3383 )); DATA(insert ( 4079 1560 1560 2 3384 )); DATA(insert ( 4079 1560 1560 3 3385 )); DATA(insert ( 4079 1560 1560 4 3386 )); ! DATA(insert ( 4079 1560 1560 5 1595 )); ! DATA(insert ( 4079 1560 1560 6 1594 )); ! DATA(insert ( 4079 1560 1560 7 1592 )); ! DATA(insert ( 4079 1560 1560 8 1593 )); /* minmax bit varying */ DATA(insert ( 4080 1562 1562 1 3383 )); DATA(insert ( 4080 1562 1562 2 3384 )); DATA(insert ( 4080 1562 1562 3 3385 )); DATA(insert ( 4080 1562 1562 4 3386 )); ! DATA(insert ( 4080 1562 1562 5 1671 )); ! DATA(insert ( 4080 1562 1562 6 1670 )); ! DATA(insert ( 4080 1562 1562 7 1668 )); ! DATA(insert ( 4080 1562 1562 8 1669 )); /* minmax numeric */ DATA(insert ( 4055 1700 1700 1 3383 )); DATA(insert ( 4055 1700 1700 2 3384 )); DATA(insert ( 4055 1700 1700 3 3385 )); DATA(insert ( 4055 1700 1700 4 3386 )); ! DATA(insert ( 4055 1700 1700 5 1722 )); ! DATA(insert ( 4055 1700 1700 6 1723 )); ! DATA(insert ( 4055 1700 1700 7 1721 )); ! DATA(insert ( 4055 1700 1700 8 1720 )); /* minmax uuid */ DATA(insert ( 4081 2950 2950 1 3383 )); DATA(insert ( 4081 2950 2950 2 3384 )); DATA(insert ( 4081 2950 2950 3 3385 )); DATA(insert ( 4081 2950 2950 4 3386 )); ! DATA(insert ( 4081 2950 2950 5 2954 )); ! DATA(insert ( 4081 2950 2950 6 2955 )); ! DATA(insert ( 4081 2950 2950 7 2957 )); ! DATA(insert ( 4081 2950 2950 8 2958 )); /* minmax pg_lsn */ DATA(insert ( 4082 3220 3220 1 3383 )); DATA(insert ( 4082 3220 3220 2 3384 )); DATA(insert ( 4082 3220 3220 3 3385 )); DATA(insert ( 4082 3220 3220 4 3386 )); ! DATA(insert ( 4082 3220 3220 5 3231 )); ! DATA(insert ( 4082 3220 3220 6 3232 )); ! DATA(insert ( 4082 3220 3220 7 3234 )); ! DATA(insert ( 4082 3220 3220 8 3235 )); #endif /* PG_AMPROC_H */ --- 438,680 ---- DATA(insert ( 4064 17 17 2 3384 )); DATA(insert ( 4064 17 17 3 3385 )); DATA(insert ( 4064 17 17 4 3386 )); ! DATA(insert ( 4064 17 17 11 1949 )); ! DATA(insert ( 4064 17 17 12 1950 )); ! DATA(insert ( 4064 17 17 13 1952 )); ! DATA(insert ( 4064 17 17 14 1951 )); /* minmax "char" */ DATA(insert ( 4062 18 18 1 3383 )); DATA(insert ( 4062 18 18 2 3384 )); DATA(insert ( 4062 18 18 3 3385 )); DATA(insert ( 4062 18 18 4 3386 )); ! DATA(insert ( 4062 18 18 11 1246 )); ! DATA(insert ( 4062 18 18 12 72 )); ! DATA(insert ( 4062 18 18 13 74 )); ! DATA(insert ( 4062 18 18 14 73 )); /* minmax name */ DATA(insert ( 4065 19 19 1 3383 )); DATA(insert ( 4065 19 19 2 3384 )); DATA(insert ( 4065 19 19 3 3385 )); DATA(insert ( 4065 19 19 4 3386 )); ! DATA(insert ( 4065 19 19 11 655 )); ! DATA(insert ( 4065 19 19 12 656 )); ! DATA(insert ( 4065 19 19 13 658 )); ! DATA(insert ( 4065 19 19 14 657 )); /* minmax bigint */ DATA(insert ( 4063 20 20 1 3383 )); DATA(insert ( 4063 20 20 2 3384 )); DATA(insert ( 4063 20 20 3 3385 )); DATA(insert ( 4063 20 20 4 3386 )); ! DATA(insert ( 4063 20 20 11 469 )); ! DATA(insert ( 4063 20 20 12 471 )); ! DATA(insert ( 4063 20 20 13 472 )); ! DATA(insert ( 4063 20 20 14 470 )); /* minmax smallint */ DATA(insert ( 4067 21 21 1 3383 )); DATA(insert ( 4067 21 21 2 3384 )); DATA(insert ( 4067 21 21 3 3385 )); DATA(insert ( 4067 21 21 4 3386 )); ! DATA(insert ( 4067 21 21 11 64 )); ! DATA(insert ( 4067 21 21 12 148 )); ! DATA(insert ( 4067 21 21 13 151 )); ! DATA(insert ( 4067 21 21 14 146 )); /* minmax integer */ DATA(insert ( 4054 23 23 1 3383 )); DATA(insert ( 4054 23 23 2 3384 )); DATA(insert ( 4054 23 23 3 3385 )); DATA(insert ( 4054 23 23 4 3386 )); ! DATA(insert ( 4054 23 23 11 66 )); ! DATA(insert ( 4054 23 23 12 149 )); ! DATA(insert ( 4054 23 23 13 150 )); ! DATA(insert ( 4054 23 23 14 147 )); /* minmax text */ DATA(insert ( 4056 25 25 1 3383 )); DATA(insert ( 4056 25 25 2 3384 )); DATA(insert ( 4056 25 25 3 3385 )); DATA(insert ( 4056 25 25 4 3386 )); ! DATA(insert ( 4056 25 25 11 740 )); ! DATA(insert ( 4056 25 25 12 741 )); ! DATA(insert ( 4056 25 25 13 743 )); ! DATA(insert ( 4056 25 25 14 742 )); /* minmax oid */ DATA(insert ( 4068 26 26 1 3383 )); DATA(insert ( 4068 26 26 2 3384 )); DATA(insert ( 4068 26 26 3 3385 )); DATA(insert ( 4068 26 26 4 3386 )); ! DATA(insert ( 4068 26 26 11 716 )); ! DATA(insert ( 4068 26 26 12 717 )); ! DATA(insert ( 4068 26 26 13 1639 )); ! DATA(insert ( 4068 26 26 14 1638 )); /* minmax tid */ DATA(insert ( 4069 27 27 1 3383 )); DATA(insert ( 4069 27 27 2 3384 )); DATA(insert ( 4069 27 27 3 3385 )); DATA(insert ( 4069 27 27 4 3386 )); ! DATA(insert ( 4069 27 27 11 2791 )); ! DATA(insert ( 4069 27 27 12 2793 )); ! DATA(insert ( 4069 27 27 13 2792 )); ! DATA(insert ( 4069 27 27 14 2790 )); /* minmax real */ DATA(insert ( 4070 700 700 1 3383 )); DATA(insert ( 4070 700 700 2 3384 )); DATA(insert ( 4070 700 700 3 3385 )); DATA(insert ( 4070 700 700 4 3386 )); ! DATA(insert ( 4070 700 700 11 289 )); ! DATA(insert ( 4070 700 700 12 290 )); ! DATA(insert ( 4070 700 700 13 292 )); ! DATA(insert ( 4070 700 700 14 291 )); /* minmax double precision */ DATA(insert ( 4071 701 701 1 3383 )); DATA(insert ( 4071 701 701 2 3384 )); DATA(insert ( 4071 701 701 3 3385 )); DATA(insert ( 4071 701 701 4 3386 )); ! DATA(insert ( 4071 701 701 11 295 )); ! DATA(insert ( 4071 701 701 12 296 )); ! DATA(insert ( 4071 701 701 13 298 )); ! DATA(insert ( 4071 701 701 14 297 )); /* minmax abstime */ DATA(insert ( 4072 702 702 1 3383 )); DATA(insert ( 4072 702 702 2 3384 )); DATA(insert ( 4072 702 702 3 3385 )); DATA(insert ( 4072 702 702 4 3386 )); ! DATA(insert ( 4072 702 702 11 253 )); ! DATA(insert ( 4072 702 702 12 255 )); ! DATA(insert ( 4072 702 702 13 256 )); ! DATA(insert ( 4072 702 702 14 254 )); /* minmax reltime */ DATA(insert ( 4073 703 703 1 3383 )); DATA(insert ( 4073 703 703 2 3384 )); DATA(insert ( 4073 703 703 3 3385 )); DATA(insert ( 4073 703 703 4 3386 )); ! DATA(insert ( 4073 703 703 11 259 )); ! DATA(insert ( 4073 703 703 12 261 )); ! DATA(insert ( 4073 703 703 13 262 )); ! DATA(insert ( 4073 703 703 14 260 )); /* minmax macaddr */ DATA(insert ( 4074 829 829 1 3383 )); DATA(insert ( 4074 829 829 2 3384 )); DATA(insert ( 4074 829 829 3 3385 )); DATA(insert ( 4074 829 829 4 3386 )); ! DATA(insert ( 4074 829 829 11 831 )); ! DATA(insert ( 4074 829 829 12 832 )); ! DATA(insert ( 4074 829 829 13 834 )); ! DATA(insert ( 4074 829 829 14 833 )); /* minmax inet */ DATA(insert ( 4075 869 869 1 3383 )); DATA(insert ( 4075 869 869 2 3384 )); DATA(insert ( 4075 869 869 3 3385 )); DATA(insert ( 4075 869 869 4 3386 )); ! DATA(insert ( 4075 869 869 11 921 )); ! DATA(insert ( 4075 869 869 12 922 )); ! DATA(insert ( 4075 869 869 13 924 )); ! DATA(insert ( 4075 869 869 14 923 )); /* minmax character */ DATA(insert ( 4076 1042 1042 1 3383 )); DATA(insert ( 4076 1042 1042 2 3384 )); DATA(insert ( 4076 1042 1042 3 3385 )); DATA(insert ( 4076 1042 1042 4 3386 )); ! DATA(insert ( 4076 1042 1042 11 1049 )); ! DATA(insert ( 4076 1042 1042 12 1050 )); ! DATA(insert ( 4076 1042 1042 13 1052 )); ! DATA(insert ( 4076 1042 1042 14 1051 )); /* minmax date */ DATA(insert ( 4061 1082 1082 1 3383 )); DATA(insert ( 4061 1082 1082 2 3384 )); DATA(insert ( 4061 1082 1082 3 3385 )); DATA(insert ( 4061 1082 1082 4 3386 )); ! DATA(insert ( 4061 1082 1082 11 1087 )); ! DATA(insert ( 4061 1082 1082 12 1088 )); ! DATA(insert ( 4061 1082 1082 13 1090 )); ! DATA(insert ( 4061 1082 1082 14 1089 )); /* minmax time without time zone */ DATA(insert ( 4077 1083 1083 1 3383 )); DATA(insert ( 4077 1083 1083 2 3384 )); DATA(insert ( 4077 1083 1083 3 3385 )); DATA(insert ( 4077 1083 1083 4 3386 )); ! DATA(insert ( 4077 1083 1083 11 1102 )); ! DATA(insert ( 4077 1083 1083 12 1103 )); ! DATA(insert ( 4077 1083 1083 13 1105 )); ! DATA(insert ( 4077 1083 1083 14 1104 )); /* minmax timestamp without time zone */ DATA(insert ( 4059 1114 1114 1 3383 )); DATA(insert ( 4059 1114 1114 2 3384 )); DATA(insert ( 4059 1114 1114 3 3385 )); DATA(insert ( 4059 1114 1114 4 3386 )); ! DATA(insert ( 4059 1114 1114 11 2054 )); ! DATA(insert ( 4059 1114 1114 12 2055 )); ! DATA(insert ( 4059 1114 1114 13 2056 )); ! DATA(insert ( 4059 1114 1114 14 2057 )); /* minmax timestamp with time zone */ DATA(insert ( 4060 1184 1184 1 3383 )); DATA(insert ( 4060 1184 1184 2 3384 )); DATA(insert ( 4060 1184 1184 3 3385 )); DATA(insert ( 4060 1184 1184 4 3386 )); ! DATA(insert ( 4060 1184 1184 11 1154 )); ! DATA(insert ( 4060 1184 1184 12 1155 )); ! DATA(insert ( 4060 1184 1184 13 1156 )); ! DATA(insert ( 4060 1184 1184 14 1157 )); /* minmax interval */ DATA(insert ( 4078 1186 1186 1 3383 )); DATA(insert ( 4078 1186 1186 2 3384 )); DATA(insert ( 4078 1186 1186 3 3385 )); DATA(insert ( 4078 1186 1186 4 3386 )); ! DATA(insert ( 4078 1186 1186 11 1164 )); ! DATA(insert ( 4078 1186 1186 12 1165 )); ! DATA(insert ( 4078 1186 1186 13 1166 )); ! DATA(insert ( 4078 1186 1186 14 1167 )); /* minmax time with time zone */ DATA(insert ( 4058 1266 1266 1 3383 )); DATA(insert ( 4058 1266 1266 2 3384 )); DATA(insert ( 4058 1266 1266 3 3385 )); DATA(insert ( 4058 1266 1266 4 3386 )); ! DATA(insert ( 4058 1266 1266 11 1354 )); ! DATA(insert ( 4058 1266 1266 12 1355 )); ! DATA(insert ( 4058 1266 1266 13 1356 )); ! DATA(insert ( 4058 1266 1266 14 1357 )); /* minmax bit */ DATA(insert ( 4079 1560 1560 1 3383 )); DATA(insert ( 4079 1560 1560 2 3384 )); DATA(insert ( 4079 1560 1560 3 3385 )); DATA(insert ( 4079 1560 1560 4 3386 )); ! DATA(insert ( 4079 1560 1560 11 1595 )); ! DATA(insert ( 4079 1560 1560 12 1594 )); ! DATA(insert ( 4079 1560 1560 13 1592 )); ! DATA(insert ( 4079 1560 1560 14 1593 )); /* minmax bit varying */ DATA(insert ( 4080 1562 1562 1 3383 )); DATA(insert ( 4080 1562 1562 2 3384 )); DATA(insert ( 4080 1562 1562 3 3385 )); DATA(insert ( 4080 1562 1562 4 3386 )); ! DATA(insert ( 4080 1562 1562 11 1671 )); ! DATA(insert ( 4080 1562 1562 12 1670 )); ! DATA(insert ( 4080 1562 1562 13 1668 )); ! DATA(insert ( 4080 1562 1562 14 1669 )); /* minmax numeric */ DATA(insert ( 4055 1700 1700 1 3383 )); DATA(insert ( 4055 1700 1700 2 3384 )); DATA(insert ( 4055 1700 1700 3 3385 )); DATA(insert ( 4055 1700 1700 4 3386 )); ! DATA(insert ( 4055 1700 1700 11 1722 )); ! DATA(insert ( 4055 1700 1700 12 1723 )); ! DATA(insert ( 4055 1700 1700 13 1721 )); ! DATA(insert ( 4055 1700 1700 14 1720 )); /* minmax uuid */ DATA(insert ( 4081 2950 2950 1 3383 )); DATA(insert ( 4081 2950 2950 2 3384 )); DATA(insert ( 4081 2950 2950 3 3385 )); DATA(insert ( 4081 2950 2950 4 3386 )); ! DATA(insert ( 4081 2950 2950 11 2954 )); ! DATA(insert ( 4081 2950 2950 12 2955 )); ! DATA(insert ( 4081 2950 2950 13 2957 )); ! DATA(insert ( 4081 2950 2950 14 2958 )); /* minmax pg_lsn */ DATA(insert ( 4082 3220 3220 1 3383 )); DATA(insert ( 4082 3220 3220 2 3384 )); DATA(insert ( 4082 3220 3220 3 3385 )); DATA(insert ( 4082 3220 3220 4 3386 )); ! DATA(insert ( 4082 3220 3220 11 3231 )); ! DATA(insert ( 4082 3220 3220 12 3232 )); ! DATA(insert ( 4082 3220 3220 13 3234 )); ! DATA(insert ( 4082 3220 3220 14 3235 )); #endif /* PG_AMPROC_H */ *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** *** 591,597 **** DATA(insert OID = 3800 ( brincostestimate PGNSP PGUID 12 1 0 0 0 f f f f t f DESCR("brin(internal)"); DATA(insert OID = 3801 ( brinoptions PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_ brinoptions _null_ _null_ _null_ )); DESCR("brin(internal)"); ! DATA(insert OID = 339 ( poly_same PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "604 604" _null_ _null_ _null_ _null_ poly_same _null_ _null_ _null_ )); DATA(insert OID = 340 ( poly_contain PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "604 604" _null_ _null_ _null_ _null_ poly_contain _null_ _null_ _null_ )); --- 591,598 ---- DESCR("brin(internal)"); DATA(insert OID = 3801 ( brinoptions PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_ brinoptions _null_ _null_ _null_ )); DESCR("brin(internal)"); ! DATA(insert OID = 3952 ( brin_summarize_new_values PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 23 "26" _null_ _null_ _null_ _null_ brin_summarize_new_values _null_ _null_ _null_ )); ! DESCR("brin: standalone scan new table pages"); DATA(insert OID = 339 ( poly_same PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "604 604" _null_ _null_ _null_ _null_ poly_same _null_ _null_ _null_ )); DATA(insert OID = 340 ( poly_contain PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "604 604" _null_ _null_ _null_ _null_ poly_contain _null_ _null_ _null_ )); *** a/src/test/regress/expected/opr_sanity.out --- b/src/test/regress/expected/opr_sanity.out *************** *** 1847,1859 **** WHERE NOT ( -- GIN has six support functions. 1-3 are mandatory, 5 is optional, and -- at least one of 4 and 6 must be given. -- SP-GiST has five support functions, all mandatory ! -- BRIN has eight support functions, all mandatory amname = 'btree' AND procnums @> '{1}' OR amname = 'hash' AND procnums = '{1}' OR amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR ! amname = 'brin' AND procnums = '{1, 2, 3, 4, 5, 6, 7, 8}' ); amname | opfname | amproclefttype | amprocrighttype | procnums --------+---------+----------------+-----------------+---------- --- 1847,1859 ---- -- GIN has six support functions. 1-3 are mandatory, 5 is optional, and -- at least one of 4 and 6 must be given. -- SP-GiST has five support functions, all mandatory ! -- BRIN has four mandatory support functions, and a bunch of optionals amname = 'btree' AND procnums @> '{1}' OR amname = 'hash' AND procnums = '{1}' OR amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR ! amname = 'brin' AND procnums @> '{1, 2, 3, 4}' ); amname | opfname | amproclefttype | amprocrighttype | procnums --------+---------+----------------+-----------------+---------- *************** *** 1875,1881 **** WHERE NOT ( amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR ! amname = 'brin' AND procnums = '{1, 2, 3, 4, 5, 6, 7, 8}' ); amname | opcname | procnums --------+---------+---------- --- 1875,1881 ---- amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR ! amname = 'brin' AND procnums @> '{1, 2, 3, 4}' ); amname | opcname | procnums --------+---------+---------- *** a/src/test/regress/sql/opr_sanity.sql --- b/src/test/regress/sql/opr_sanity.sql *************** *** 1195,1207 **** WHERE NOT ( -- GIN has six support functions. 1-3 are mandatory, 5 is optional, and -- at least one of 4 and 6 must be given. -- SP-GiST has five support functions, all mandatory ! -- BRIN has eight support functions, all mandatory amname = 'btree' AND procnums @> '{1}' OR amname = 'hash' AND procnums = '{1}' OR amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR ! amname = 'brin' AND procnums = '{1, 2, 3, 4, 5, 6, 7, 8}' ); -- Also, check if there are any pg_opclass entries that don't seem to have --- 1195,1207 ---- -- GIN has six support functions. 1-3 are mandatory, 5 is optional, and -- at least one of 4 and 6 must be given. -- SP-GiST has five support functions, all mandatory ! -- BRIN has four mandatory support functions, and a bunch of optionals amname = 'btree' AND procnums @> '{1}' OR amname = 'hash' AND procnums = '{1}' OR amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR ! amname = 'brin' AND procnums @> '{1, 2, 3, 4}' ); -- Also, check if there are any pg_opclass entries that don't seem to have *************** *** 1221,1227 **** WHERE NOT ( amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR ! amname = 'brin' AND procnums = '{1, 2, 3, 4, 5, 6, 7, 8}' ); -- Unfortunately, we can't check the amproc link very well because the --- 1221,1227 ---- amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR ! amname = 'brin' AND procnums @> '{1, 2, 3, 4}' ); -- Unfortunately, we can't check the amproc link very well because the