From 3023a9c77892af520c392c8b6fc944ad0ff75096 Mon Sep 17 00:00:00 2001 From: kommih Date: Fri, 25 Jan 2019 15:04:50 +1100 Subject: [PATCH] Doc and comments update --- doc/src/sgml/{indexam.sgml => am.sgml} | 506 +++++++++++++++++- doc/src/sgml/catalogs.sgml | 5 +- doc/src/sgml/config.sgml | 24 + doc/src/sgml/filelist.sgml | 2 +- doc/src/sgml/postgres.sgml | 2 +- doc/src/sgml/ref/create_access_method.sgml | 12 +- .../sgml/ref/create_materialized_view.sgml | 14 + doc/src/sgml/ref/create_table.sgml | 18 +- doc/src/sgml/ref/create_table_as.sgml | 14 + doc/src/sgml/release-9.6.sgml | 2 +- doc/src/sgml/xindex.sgml | 2 +- src/backend/access/heap/heapam.c | 8 +- src/backend/access/heap/heapam_handler.c | 29 +- src/backend/access/table/tableam.c | 10 + src/include/access/tableam.h | 165 ++++++ 15 files changed, 779 insertions(+), 34 deletions(-) rename doc/src/sgml/{indexam.sgml => am.sgml} (79%) diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/am.sgml similarity index 79% rename from doc/src/sgml/indexam.sgml rename to doc/src/sgml/am.sgml index 05102724ea..118df57a1c 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/am.sgml @@ -1,16 +1,480 @@ - + - - Index Access Method Interface Definition + + Access Method Interface Definition This chapter defines the interface between the core - PostgreSQL system and index access - methods, which manage individual index types. The core system - knows nothing about indexes beyond what is specified here, so it is - possible to develop entirely new index types by writing add-on code. + PostgreSQL system and access + methods, which manage individual INDEX + and TABLE types. The core system knows nothing + about these access methods beyond what is specified here, so it is + possible to develop entirely new access method types by writing add-on code. + + + + Overview of Table access methods + + + All Tables in PostgreSQL are the primary + data store. Each table is stored as its own physical relation + and so is described by an entry in the pg_class + catalog. The contents of an table are entirely under the control of its + access method. (All the access methods furthermore use the standard page + layout described in .) + + + + Table access method API + + + Each table access method is described by a row in the + pg_am system + catalog. The pg_am entry specifies a type + of the access method and a handler function for the + access method. These entries can be created and deleted using the + and SQL commands. + + + + A table access method handler function must be declared to accept a + single argument of type internal and to return the + pseudo-type table_am_handler. The argument is a dummy value that + simply serves to prevent handler functions from being called directly from + SQL commands. The result of the function must be a palloc'd struct of + type TableAmRoutine, which contains everything + that the core code needs to know to make use of the table access method. + The TableAmRoutine struct, also called the access + method's API struct, includes fields specifying assorted + fixed properties of the access method, such as whether it can support + bitmap scans. More importantly, it contains pointers to support + functions for the access method, which do all of the real work to access + tables. These support functions are plain C functions and are not + visible or callable at the SQL level. The support functions are described + below. + + + + The structure TableAmRoutine is defined thus: + +/* + * API struct for a table AM. Note this must be allocated in a + * server-lifetime manner, typically as a static const struct, which then gets + * returned by FormData_pg_am.amhandler. + */ +typedef struct TableAmRoutine +{ + NodeTag type; + + /* + * Return slot implementation suitable for storing a tuple of this AM. + */ + const TupleTableSlotOps *(*slot_callbacks) (Relation rel); + + + /* ------------------------------------------------------------------------ + * Table scan callbacks. + * ------------------------------------------------------------------------ + */ + + /* + * Returns scan descriptor that is opened on the provided relation based + * on the provided keys. + */ + TableScanDesc (*scan_begin) (Relation rel, + Snapshot snapshot, + int nkeys, struct ScanKeyData *key, + ParallelTableScanDesc parallel_scan, + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, + bool temp_snap); + + /* API to end scan that is started */ + void (*scan_end) (TableScanDesc scan); + + /* Restart the scan that is already started on the corresponding relation */ + void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode); + + /* Returns the next satisfied tuple from the scan */ + TupleTableSlot *(*scan_getnextslot) (TableScanDesc scan, + ScanDirection direction, TupleTableSlot *slot); + + + /* ------------------------------------------------------------------------ + * Parallel table scan related functions. + * ------------------------------------------------------------------------ + */ + + /* + * Returns the total size that is required for the relation to perform + * parallel sequential scan on the relation + */ + Size (*parallelscan_estimate) (Relation rel); + + /* + * Initialize the parallel scan of the relation that is necessary and also + * return the total size that is required for storing the parallel scan + */ + Size (*parallelscan_initialize) (Relation rel, ParallelTableScanDesc parallel_scan); + + /* + * Reinitialize the parallel scan structure parameters that are necessary to + * restart the parallel scan again. + */ + void (*parallelscan_reinitialize) (Relation rel, ParallelTableScanDesc parallel_scan); + + + /* ------------------------------------------------------------------------ + * Index Scan Callbacks + * ------------------------------------------------------------------------ + */ + + /* + * Returns the allocated prepared the IndexFetchTableData structure + * for the relation. + */ + struct IndexFetchTableData *(*begin_index_fetch) (Relation rel); + + /* Resets the internal members of the IndexFetchTableData structure */ + void (*reset_index_fetch) (struct IndexFetchTableData *data); + + /* Frees the IndexFetchTableData that is allocated */ + void (*end_index_fetch) (struct IndexFetchTableData *data); + + /* + * Compute the newest xid among the tuples pointed to by items. This is + * used to compute what snapshots to conflict with when replaying WAL + * records for page-level index vacuums. + */ + TransactionId (*compute_xid_horizon_for_tuples) (Relation rel, + ItemPointerData *items, + int nitems); + + + /* ------------------------------------------------------------------------ + * Manipulations of physical tuples. + * ------------------------------------------------------------------------ + */ + + /* + * Insert the tuple into the relation specified and provide the location + * of the tuple in the form of ItemPointerData and also use the + * BulkInsertStateData if available. + */ + void (*tuple_insert) (Relation rel, TupleTableSlot *slot, CommandId cid, + int options, struct BulkInsertStateData *bistate); + + /* + * It is similar like tuple_insert API, but it inserts the tuple with + * speculative token, to confirm the success of the operation. + */ + void (*tuple_insert_speculative) (Relation rel, + TupleTableSlot *slot, + CommandId cid, + int options, + struct BulkInsertStateData *bistate, + uint32 specToken); + + /* + * API to complete the speculative insert that is done by the + * tuple_insert_speculative, returns the result based on the + * success of the operation. + */ + void (*tuple_complete_speculative) (Relation rel, + TupleTableSlot *slot, + uint32 specToken, + bool succeeded); + + /* + * Deletes the tuple of a relation pointed by the ItemPointer + * and returns the result of the operation. In case of any failure + * updates the hufd. + */ + HTSU_Result (*tuple_delete) (Relation rel, + ItemPointer tid, + CommandId cid, + Snapshot snapshot, + Snapshot crosscheck, + bool wait, + HeapUpdateFailureData *hufd, + bool changingPart); + + /* + * Update a tuple with the new tuple pointed by the ItemPointer and + * returns the result of the operation and also updates the flag to + * indicate whether the index needs an update or not? In case of any + * failure, it update the hufd flag. + */ + HTSU_Result (*tuple_update) (Relation rel, + ItemPointer otid, + TupleTableSlot *slot, + CommandId cid, + Snapshot snapshot, + Snapshot crosscheck, + bool wait, + HeapUpdateFailureData *hufd, + LockTupleMode *lockmode, + bool *update_indexes); + + /* + * Insert multiple tuples into the relation for faster data insertion. + * It can use the BulkInsertStateData if available. + */ + void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots, + CommandId cid, int options, struct BulkInsertStateData *bistate); + + /* + * Locks the tuple record specified by the ItemPointer after getting the + * latest record and returns the result of the operation. In case of + * failure updates the hufd. + */ + HTSU_Result (*tuple_lock) (Relation rel, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + CommandId cid, + LockTupleMode mode, + LockWaitPolicy wait_policy, + uint8 flags, + HeapUpdateFailureData *hufd); + + /* + * Perform operations necessary to complete insertions made via + * tuple_insert and multi_insert with a BulkInsertState specified. This + * e.g. may e.g. used to flush the relation when inserting with skipping + * WAL. + * + * May be NULL. + */ + void (*finish_bulk_insert) (Relation rel, int options); + + + /* ------------------------------------------------------------------------ + * Non-modifying operations on individual tuples. + * ------------------------------------------------------------------------ + */ + + /* + * Fetches the latest tuple specified by the ItemPointer and store it + * in the slot. + */ + bool (*tuple_fetch_row_version) (Relation rel, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + Relation stats_relation); + + /* + * Gets the latest ItemPointer of the tuple based on the specified + * ItemPointer. + * + * For example, in the case of Heap AM, the update chains are created + * whenever any tuple is updated. This API is useful to find out + * latest ItemPointer. + */ + void (*tuple_get_latest_tid) (Relation rel, + Snapshot snapshot, + ItemPointer tid); + + /* + * Fetches the tuple pointed by the ItemPointer based on the + * IndexFetchTableData and store it in the specified slot and + * also updates the flags. + */ + bool (*tuple_fetch_follow) (struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead); + + /* + * Performs the tuple visibility according to the snapshot and returns + * "true" if is visible otherwise "false" + */ + bool (*tuple_satisfies_snapshot) (Relation rel, + TupleTableSlot *slot, + Snapshot snapshot); + + + /* ------------------------------------------------------------------------ + * DDL related functionality. + * ------------------------------------------------------------------------ + */ + + /* + * Creates the storage that is necessary to store the tuples of the + * relation and also updates the minimum XID that is possible to insert + * the tuples. + */ + void (*relation_set_new_filenode) (Relation rel, + char persistence, + TransactionId *freezeXid, + MultiXactId *minmulti); + + /* + * Truncate the specified relation, this operation is not non-reversible. + */ + void (*relation_nontransactional_truncate) (Relation rel); + + /* + * Performs copy of the relation existing data to the new filenodes + * specified by the newrnode and removes the existing filenodes. + */ + void (*relation_copy_data) (Relation rel, RelFileNode newrnode); + + /* + * Performs vacuuming of the relation based on the specified params. + * It Gathers all the dead tuples of the relation and clean them including + * the indexes. + */ + void (*relation_vacuum) (Relation onerel, int options, + struct VacuumParams *params, BufferAccessStrategy bstrategy); + + /* + * Prepares the block of the relation specified for the analysis of tuples that + * are present in the block. + */ + void (*scan_analyze_next_block) (TableScanDesc scan, BlockNumber blockno, + BufferAccessStrategy bstrategy); + + /* + * Scans all the tuples in the page based on the snapshot and return the + * visible tuple, and also update the stats related to the page. + */ + bool (*scan_analyze_next_tuple) (TableScanDesc scan, TransactionId OldestXmin, + double *liverows, double *deadrows, TupleTableSlot *slot); + + /* + * Reorganizes the relation data in the new filenode according to the + * specified index. All the tuples the new file node are ordered similar like + * index and also removes some of the dead tuples. + */ + void (*relation_copy_for_cluster) (Relation NewHeap, Relation OldHeap, Relation OldIndex, + bool use_sort, + TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff, + double *num_tuples, double *tups_vacuumed, double *tups_recently_dead); + + /* + * Performs the range scan of the relation instead of performing the full scan. + * Range scan can be specified by start and end block numbers. In case if there + * is no range, specify the InvalidBlockNumber for end block. + */ + double (*index_build_range_scan) (Relation heap_rel, + Relation index_rel, + IndexInfo *index_nfo, + bool allow_sync, + bool anyvisible, + BlockNumber start_blockno, + BlockNumber end_blockno, + IndexBuildCallback callback, + void *callback_state, + TableScanDesc scan); + + /* + * Performs the table scan and insert the satisfied records into the index. + * This API is similar like index_build_range_scan, but this is used for + * the scenario where the index is getting build concurrently. + */ + void (*index_validate_scan) (Relation heap_rel, + Relation index_rel, + IndexInfo *index_info, + Snapshot snapshot, + struct ValidateIndexState *state); + + + /* ------------------------------------------------------------------------ + * Planner related functions. + * ------------------------------------------------------------------------ + */ + + /* + * Estimates the total size of the relation and also returns the number of + * pages, tuples and etc related to the corresponding relation. + */ + void (*relation_estimate_size) (Relation rel, int32 *attr_widths, + BlockNumber *pages, double *tuples, double *allvisfrac); + + + /* ------------------------------------------------------------------------ + * Executor related functions. + * ------------------------------------------------------------------------ + */ + + /* + * Scans the page for all the tuples and store all of their ItemPointers + * based on the visibility. + */ + bool (*scan_bitmap_pagescan) (TableScanDesc scan, + TBMIterateResult *tbmres); + + /* Returns the next tuple of the scan */ + bool (*scan_bitmap_pagescan_next) (TableScanDesc scan, + TupleTableSlot *slot); + + /* + * Get the next block for sampling based on the sampling method that is + * available or sequentially to get the next from the scan. + */ + bool (*scan_sample_next_block) (TableScanDesc scan, + struct SampleScanState *scanstate); + + /* + * Get the next tuple to sample from the current sampling block based on + * the sampling method, otherwise get the next visible tuple of the block. + */ + bool (*scan_sample_next_tuple) (TableScanDesc scan, + struct SampleScanState *scanstate, + TupleTableSlot *slot); +} TableAmRoutine; + + + + + + + Table scanning + + + + + + Table insert/update/delete + + + + + + Table locking + + + + + + + Table vacuum + + + + + + + Table fetch + + + + + + + + + Overview of Index access methods + All indexes in PostgreSQL are what are known technically as secondary indexes; that is, the index is @@ -42,8 +506,8 @@ dead tuples are reclaimed (by vacuuming) when the dead tuples themselves are reclaimed. - - + + Basic API Structure for Indexes @@ -217,9 +681,9 @@ typedef struct IndexAmRoutine conditions. - + - + Index Access Method Functions @@ -710,9 +1174,11 @@ amparallelrescan (IndexScanDesc scan); the beginning. - + + + - + Index Scanning @@ -865,9 +1331,9 @@ amparallelrescan (IndexScanDesc scan); if its internal implementation is unsuited to one API or the other. - + - + Index Locking Considerations @@ -979,9 +1445,9 @@ amparallelrescan (IndexScanDesc scan); reduce the frequency of such transaction cancellations. - + - + Index Uniqueness Checks @@ -1128,9 +1594,9 @@ amparallelrescan (IndexScanDesc scan); - + - + Index Cost Estimation Functions @@ -1377,5 +1843,7 @@ cost_qual_eval(&index_qual_cost, path->indexquals, root); Examples of cost estimator functions can be found in src/backend/utils/adt/selfuncs.c. + + diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index af4d0625ea..35122035e5 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -587,8 +587,9 @@ The catalog pg_am stores information about relation access methods. There is one row for each access method supported by the system. - Currently, only indexes have access methods. The requirements for index - access methods are discussed in detail in . + Currently, only INDEX and TABLE have + access methods. The requirements for access methods are discussed in detail + in . diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index b6f5822b84..0f62270cee 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -7168,6 +7168,30 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + default_table_access_method (string) + + default_table_access_method configuration parameter + + + + + This variable specifies the default table access method using which to create + objects (tables and materialized views) when a CREATE command does + not explicitly specify an access method. + + + + The value is either the name of a table access method, or an empty string + to specify using the default table access method of the current database. + If the value does not match the name of any existing table access methods, + PostgreSQL will automatically use the default + table access method of the current database. + + + + + default_tablespace (string) diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 5dfdf54815..fed460b7c3 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -89,7 +89,7 @@ - + diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index 96d196d229..9dce0c5f81 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -250,7 +250,7 @@ &tablesample-method; &custom-scan; &geqo; - &indexam; + &am; &generic-wal; &btree; &gist; diff --git a/doc/src/sgml/ref/create_access_method.sgml b/doc/src/sgml/ref/create_access_method.sgml index 851c5e63be..256914022a 100644 --- a/doc/src/sgml/ref/create_access_method.sgml +++ b/doc/src/sgml/ref/create_access_method.sgml @@ -61,7 +61,8 @@ CREATE ACCESS METHOD name This clause specifies the type of access method to define. - Only INDEX is supported at present. + Only INDEX and TABLE + are supported at present. @@ -76,9 +77,12 @@ CREATE ACCESS METHOD name declared to take a single argument of type internal, and its return type depends on the type of access method; for INDEX access methods, it must - be index_am_handler. The C-level API that the handler - function must implement varies depending on the type of access method. - The index access method API is described in . + be index_am_handler and for TABLE + access methods, it must be table_am_handler. + The C-level API that the handler function must implement varies + depending on the type of access method. The index access method API + is described in and the table access method + API is described in . diff --git a/doc/src/sgml/ref/create_materialized_view.sgml b/doc/src/sgml/ref/create_materialized_view.sgml index 7f31ab4d26..3a052ee6a4 100644 --- a/doc/src/sgml/ref/create_materialized_view.sgml +++ b/doc/src/sgml/ref/create_materialized_view.sgml @@ -23,6 +23,7 @@ PostgreSQL documentation CREATE MATERIALIZED VIEW [ IF NOT EXISTS ] table_name [ (column_name [, ...] ) ] + [ USING method ] [ WITH ( storage_parameter [= value] [, ... ] ) ] [ TABLESPACE tablespace_name ] AS query @@ -85,6 +86,19 @@ CREATE MATERIALIZED VIEW [ IF NOT EXISTS ] table_name + + USING method + + + This clause specifies optional access method for the new materialize view; + see for more information. + If this option is not specified, then the default table access method + is chosen for the new materialized view. see + for more information. + + + + WITH ( storage_parameter [= value] [, ... ] ) diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 857515ec8f..72a1a785e7 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -29,6 +29,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI ] ) [ INHERITS ( parent_table [, ... ] ) ] [ PARTITION BY { RANGE | LIST | HASH } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ USING method ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -40,6 +41,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI [, ... ] ) ] [ PARTITION BY { RANGE | LIST | HASH } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ USING method ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -51,6 +53,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI [, ... ] ) ] { FOR VALUES partition_bound_spec | DEFAULT } [ PARTITION BY { RANGE | LIST | HASH } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ USING method ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -953,7 +956,7 @@ WITH ( MODULUS numeric_literal, REM The access method must support amgettuple (see ); at present this means GIN + linkend="index-access-methods"/>); at present this means GIN cannot be used. Although it's allowed, there is little point in using B-tree or hash indexes with an exclusion constraint, because this does nothing that an ordinary unique constraint doesn't do better. @@ -1136,6 +1139,19 @@ WITH ( MODULUS numeric_literal, REM + + USING method + + + This clause specifies optional access method for the new table; + see for more information. + If this option is not specified, then the default table access method + is chosen for the new table. see + for more information. + + + + WITH ( storage_parameter [= value] [, ... ] ) diff --git a/doc/src/sgml/ref/create_table_as.sgml b/doc/src/sgml/ref/create_table_as.sgml index 679e8f521e..90c9dbdaa5 100644 --- a/doc/src/sgml/ref/create_table_as.sgml +++ b/doc/src/sgml/ref/create_table_as.sgml @@ -23,6 +23,7 @@ PostgreSQL documentation CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] table_name [ (column_name [, ...] ) ] + [ USING method ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -120,6 +121,19 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI + + USING method + + + This clause specifies optional access method for the new table; + see for more information. + If this option is not specified, then the default table access method + is chosen for the new table. see + for more information. + + + + WITH ( storage_parameter [= value] [, ... ] ) diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml index acebcc6249..c0a96c2cce 100644 --- a/doc/src/sgml/release-9.6.sgml +++ b/doc/src/sgml/release-9.6.sgml @@ -10763,7 +10763,7 @@ This commit is also listed under libpq and PL/pgSQL 2016-08-13 [ed0097e4f] Add SQL-accessible functions for inspecting index AM pro --> - Restructure index access + Restructure index access method API to hide most of it at the C level (Alexander Korotkov, Andrew Gierth) diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml index 9446f8b836..4fa821160c 100644 --- a/doc/src/sgml/xindex.sgml +++ b/doc/src/sgml/xindex.sgml @@ -36,7 +36,7 @@ described in pg_am. It is possible to add a new index access method by writing the necessary code and then creating an entry in pg_am — but that is - beyond the scope of this chapter (see ). + beyond the scope of this chapter (see ). diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 6655a95433..328626d633 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1098,7 +1098,10 @@ fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, * ---------------------------------------------------------------- */ - +/* + * Returns the prepared Heap scan descriptor that is opened on the provided + * relation using the other members. + */ TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, @@ -1332,6 +1335,9 @@ heap_getnext(TableScanDesc sscan, ScanDirection direction) #define HEAPAMSLOTDEBUG_3 #endif +/* + * Retrieve slot with next tuple in scan + */ TupleTableSlot * heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) { diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 3dc1444739..5f7b39360c 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -63,13 +63,18 @@ reform_and_rewrite_tuple(HeapTuple tuple, * ---------------------------------------------------------------- */ +/* + * Return slot implementation suitable for storing a tuple. + */ static const TupleTableSlotOps * heapam_slot_callbacks(Relation relation) { return &TTSOpsBufferHeapTuple; } - +/* + * + */ static IndexFetchTableData * heapam_begin_index_fetch(Relation rel) { @@ -107,8 +112,7 @@ heapam_end_index_fetch(IndexFetchTableData *scan) /* - * Insert a heap tuple from a slot, which may contain an OID and speculative - * insertion token. + * Insert a heap tuple from a slot, which may contain an OID */ static void heapam_heap_insert(Relation relation, TupleTableSlot *slot, CommandId cid, @@ -128,6 +132,10 @@ heapam_heap_insert(Relation relation, TupleTableSlot *slot, CommandId cid, pfree(tuple); } +/* + * Insert a heap tuple from a slot, which may contain an OID and speculative + * insertion token. + */ static void heapam_heap_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate, uint32 specToken) @@ -148,6 +156,9 @@ heapam_heap_insert_speculative(Relation relation, TupleTableSlot *slot, CommandI pfree(tuple); } +/* + * Complete the speculative insert based on the succeeded flag. + */ static void heapam_heap_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 spekToken, bool succeeded) @@ -179,6 +190,11 @@ heapam_heap_delete(Relation relation, ItemPointer tid, CommandId cid, } +/* + * Updates the heaptuple pointed by the Otid with the new tuple from slot + * and returns the flag to indicate whether the update affects indexes + * also. + */ static HTSU_Result heapam_heap_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, @@ -448,6 +464,10 @@ retry: return result; } +/* + * Finish the bulk insert operation by syncing the relation + * to the disk filenodes in case if the bulk insert skip the WAL. + */ static void heapam_finish_bulk_insert(Relation relation, int options) { @@ -460,6 +480,9 @@ heapam_finish_bulk_insert(Relation relation, int options) } +/* + * + */ static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index a2da7b7809..8fed8a0e00 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -266,12 +266,19 @@ simple_table_delete(Relation rel, ItemPointer tid, Snapshot snapshot) } +/* + * Returns the size required to perform Parallel Block table scan. + */ Size table_block_parallelscan_estimate(Relation rel) { return sizeof(ParallelBlockTableScanDescData); } +/* + * Initializes the table block parallel scan with the relation specific + * information. + */ Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan) { @@ -290,6 +297,9 @@ table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan) return sizeof(ParallelBlockTableScanDescData); } +/* + * Reinitialize the table block parallel scan information + */ void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan) { diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 4aa4369366..b997552fdb 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -76,6 +76,10 @@ typedef struct TableAmRoutine * ------------------------------------------------------------------------ */ + /* + * Returns scan descriptor that is opened on the provided relation based + * on the provided keys. + */ TableScanDesc (*scan_begin) (Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, @@ -86,9 +90,15 @@ typedef struct TableAmRoutine bool is_bitmapscan, bool is_samplescan, bool temp_snap); + + /* API to end scan that is started */ void (*scan_end) (TableScanDesc scan); + + /* Restart the scan that is already started on the corresponding relation */ void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode); + + /* Returns the next satisfied tuple from the scan */ TupleTableSlot *(*scan_getnextslot) (TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot); @@ -97,8 +107,23 @@ typedef struct TableAmRoutine * Parallel table scan related functions. * ------------------------------------------------------------------------ */ + + /* + * Returns the total size that is required for the relation to perform + * parallel sequential scan on the relation + */ Size (*parallelscan_estimate) (Relation rel); + + /* + * Initialize the parallel scan of the relation that is necessary and also + * return the total size that is required for storing the parallel scan + */ Size (*parallelscan_initialize) (Relation rel, ParallelTableScanDesc parallel_scan); + + /* + * Reinitialize the parallel scan structure parameters that are necessary to + * restart the parallel scan again. + */ void (*parallelscan_reinitialize) (Relation rel, ParallelTableScanDesc parallel_scan); @@ -107,8 +132,16 @@ typedef struct TableAmRoutine * ------------------------------------------------------------------------ */ + /* + * Returns the allocated prepared the IndexFetchTableData structure + * for the relation. + */ struct IndexFetchTableData *(*begin_index_fetch) (Relation rel); + + /* Resets the internal members of the IndexFetchTableData structure */ void (*reset_index_fetch) (struct IndexFetchTableData *data); + + /* Frees the IndexFetchTableData that is allocated */ void (*end_index_fetch) (struct IndexFetchTableData *data); /* @@ -126,18 +159,40 @@ typedef struct TableAmRoutine * ------------------------------------------------------------------------ */ + /* + * Insert the tuple into the relation specified and provide the location + * of the tuple in the form of ItemPointerData and also use the + * BulkInsertStateData if available. + */ void (*tuple_insert) (Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate); + + /* + * It is similar like tuple_insert API, but it inserts the tuple with + * speculative token, to confirm the success of the operation. + */ void (*tuple_insert_speculative) (Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken); + + /* + * API to complete the speculative insert that is done by the + * tuple_insert_speculative, returns the result based on the + * success of the operation. + */ void (*tuple_complete_speculative) (Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded); + + /* + * Deletes the tuple of a relation pointed by the ItemPointer + * and returns the result of the operation. In case of any failure + * updates the hufd. + */ HTSU_Result (*tuple_delete) (Relation rel, ItemPointer tid, CommandId cid, @@ -146,6 +201,13 @@ typedef struct TableAmRoutine bool wait, HeapUpdateFailureData *hufd, bool changingPart); + + /* + * Update a tuple with the new tuple pointed by the ItemPointer and + * returns the result of the operation and also updates the flag to + * indicate whether the index needs an update or not? In case of any + * failure, it update the hufd flag. + */ HTSU_Result (*tuple_update) (Relation rel, ItemPointer otid, TupleTableSlot *slot, @@ -156,8 +218,19 @@ typedef struct TableAmRoutine HeapUpdateFailureData *hufd, LockTupleMode *lockmode, bool *update_indexes); + + /* + * Insert multiple tuples into the relation for faster data insertion. + * It can use the BulkInsertStateData if available. + */ void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate); + + /* + * Locks the tuple record specified by the ItemPointer after getting the + * latest record and returns the result of the operation. In case of + * failure updates the hufd. + */ HTSU_Result (*tuple_lock) (Relation rel, ItemPointer tid, Snapshot snapshot, @@ -184,19 +257,43 @@ typedef struct TableAmRoutine * ------------------------------------------------------------------------ */ + /* + * Fetches the latest tuple specified by the ItemPointer and store it + * in the slot. + */ bool (*tuple_fetch_row_version) (Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, Relation stats_relation); + + /* + * Gets the latest ItemPointer of the tuple based on the specified + * ItemPointer. + * + * For example, in the case of Heap AM, the update chains are created + * whenever any tuple is updated. This API is useful to find out + * latest ItemPointer. + */ void (*tuple_get_latest_tid) (Relation rel, Snapshot snapshot, ItemPointer tid); + + /* + * Fetches the tuple pointed by the ItemPointer based on the + * IndexFetchTableData and store it in the specified slot and + * also updates the flags. + */ bool (*tuple_fetch_follow) (struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead); + + /* + * Performs the tuple visibility according to the snapshot and returns + * "true" if is visible otherwise "false" + */ bool (*tuple_satisfies_snapshot) (Relation rel, TupleTableSlot *slot, Snapshot snapshot); @@ -207,22 +304,64 @@ typedef struct TableAmRoutine * ------------------------------------------------------------------------ */ + /* + * Creates the storage that is necessary to store the tuples of the + * relation and also updates the minimum XID that is possible to insert + * the tuples. + */ void (*relation_set_new_filenode) (Relation rel, char persistence, TransactionId *freezeXid, MultiXactId *minmulti); + + /* + * Truncate the specified relation, this operation is not non-reversible. + */ void (*relation_nontransactional_truncate) (Relation rel); + + /* + * Performs copy of the relation existing data to the new filenodes + * specified by the newrnode and removes the existing filenodes. + */ void (*relation_copy_data) (Relation rel, RelFileNode newrnode); + + /* + * Performs vacuuming of the relation based on the specified params. + * It Gathers all the dead tuples of the relation and clean them including + * the indexes. + */ void (*relation_vacuum) (Relation onerel, int options, struct VacuumParams *params, BufferAccessStrategy bstrategy); + + /* + * Prepares the block of the relation specified for the analysis of tuples that + * are present in the block. + */ void (*scan_analyze_next_block) (TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy); + + /* + * Scans all the tuples in the page based on the snapshot and return the + * visible tuple, and also update the stats related to the page. + */ bool (*scan_analyze_next_tuple) (TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot); + + /* + * Reorganizes the relation data in the new filenode according to the + * specified index. All the tuples the new file node are ordered similar like + * index and also removes some of the dead tuples. + */ void (*relation_copy_for_cluster) (Relation NewHeap, Relation OldHeap, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead); + + /* + * Performs the range scan of the relation instead of performing the full scan. + * Range scan can be specified by start and end block numbers. In case if there + * is no range, specify the InvalidBlockNumber for end block. + */ double (*index_build_range_scan) (Relation heap_rel, Relation index_rel, IndexInfo *index_nfo, @@ -233,6 +372,12 @@ typedef struct TableAmRoutine IndexBuildCallback callback, void *callback_state, TableScanDesc scan); + + /* + * Performs the table scan and insert the satisfied records into the index. + * This API is similar like index_build_range_scan, but this is used for + * the scenario where the index is getting build concurrently. + */ void (*index_validate_scan) (Relation heap_rel, Relation index_rel, IndexInfo *index_info, @@ -245,6 +390,10 @@ typedef struct TableAmRoutine * ------------------------------------------------------------------------ */ + /* + * Estimates the total size of the relation and also returns the number of + * pages, tuples and etc related to the corresponding relation. + */ void (*relation_estimate_size) (Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac); @@ -254,12 +403,28 @@ typedef struct TableAmRoutine * ------------------------------------------------------------------------ */ + /* + * Scans the page for all the tuples and store all of their ItemPointers + * based on the visibility. + */ bool (*scan_bitmap_pagescan) (TableScanDesc scan, TBMIterateResult *tbmres); + + /* Returns the next tuple of the scan */ bool (*scan_bitmap_pagescan_next) (TableScanDesc scan, TupleTableSlot *slot); + + /* + * Get the next block for sampling based on the sampling method that is + * available or sequentially to get the next from the scan. + */ bool (*scan_sample_next_block) (TableScanDesc scan, struct SampleScanState *scanstate); + + /* + * Get the next tuple to sample from the current sampling block based on + * the sampling method, otherwise get the next visible tuple of the block. + */ bool (*scan_sample_next_tuple) (TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot); -- 2.20.1.windows.1