diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index f7f726b5aec..bbcab9ce31a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3638,7 +3638,7 @@ static struct config_string ConfigureNamesString[] = {"default_table_access_method", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the default table access method for new tables."), NULL, - GUC_IS_NAME + GUC_NOT_IN_SAMPLE | GUC_IS_NAME }, &default_table_access_method, DEFAULT_TABLE_ACCESS_METHOD, diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 82de4cdcf2c..8aeeba38ca2 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -57,6 +57,8 @@ typedef struct TableScanDescData *TableScanDesc; * pointer to this structure. The information here must be sufficient to * properly initialize each new TableScanDesc as workers join the scan, and it * must act as a information what to scan for those workers. + * + * This is stored in dynamic shared memory, so you can't use pointers here. */ typedef struct ParallelTableScanDescData { @@ -64,6 +66,11 @@ typedef struct ParallelTableScanDescData bool phs_syncscan; /* report location to syncscan logic? */ bool phs_snapshot_any; /* SnapshotAny, not phs_snapshot_data? */ Size phs_snapshot_off; /* data for snapshot */ + + /* + * Table AM specific data follows. After the table AM specific data + * comes the snapshot data, at 'phs_snapshot_off'. + */ } ParallelTableScanDescData; typedef struct ParallelTableScanDescData *ParallelTableScanDesc; diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 6fbfcb96c98..d4709563e7e 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -91,8 +91,9 @@ typedef enum TM_Result * xmax is the outdating transaction's XID. If the caller wants to visit the * replacement tuple, it must check that this matches before believing the * replacement is really a match. + * HEIKKI: matches what? xmin, but that's specific to the heapam. * - * cmax is the outdating command's CID, but only when the failure code is + * cmax is the outdating command's CID. Only set when the failure code is * TM_SelfModified (i.e., something in the current transaction outdated the * tuple); otherwise cmax is zero. (We make this restriction because * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other @@ -133,7 +134,11 @@ typedef void (*IndexBuildCallback) (Relation index, * returned by FormData_pg_am.amhandler. * * In most cases it's not appropriate to call the callbacks directly, use the - * table_* wrapper functions instead. + * table_* wrapper functions instead. The descriptions of the callbacks here + * are written from the AM implementor's point of view. For more information + * on how to call them, see the wrapper functions. (If you update the comments + * on either a callback or a wrapper function, remember to also update the other + * one!) * * GetTableAmRoutine() asserts that required callbacks are filled in, remember * to update when adding a callback. @@ -179,6 +184,12 @@ typedef struct TableAmRoutine * * if temp_snap is true, the snapshot will need to be deallocated at * scan_end. + * + * HEIKKI: table_scan_update_snapshot() changes the snapshot. That's + * a bit surprising for the AM, no? Can it be called when a scan is + * already in progress? + * + * HEIKKI: A flags bitmask argument would be more readable than 6 booleans */ TableScanDesc (*scan_begin) (Relation rel, Snapshot snapshot, @@ -194,6 +205,9 @@ typedef struct TableAmRoutine /* * Release resources and deallocate scan. If TableScanDesc.temp_snap, * TableScanDesc.rs_snapshot needs to be unregistered. + * + * HEIKKI: I find this 'temp_snap' thing pretty weird. Can't the caller handle + * deregistering it? */ void (*scan_end) (TableScanDesc scan); @@ -221,6 +235,11 @@ typedef struct TableAmRoutine /* * Estimate the size of shared memory needed for a parallel scan of this * relation. The snapshot does not need to be accounted for. + * + * HEIKKI: If this returns X, then the parallelscan_initialize() call + * mustn't use more than X. So this is not just for optimization purposes, + * for example. Not sure how to phrase that, but could use some + * clarification. */ Size (*parallelscan_estimate) (Relation rel); @@ -228,6 +247,8 @@ typedef struct TableAmRoutine * Initialize ParallelTableScanDesc for a parallel scan of this relation. * `pscan` will be sized according to parallelscan_estimate() for the same * relation. + * + * HEIKKI: What does this return? */ Size (*parallelscan_initialize) (Relation rel, ParallelTableScanDesc pscan); @@ -246,18 +267,22 @@ typedef struct TableAmRoutine */ /* - * Prepare to fetch tuples from the relation, as needed when fetching - * tuples for an index scan. The callback has to return an - * IndexFetchTableData, which the AM will typically embed in a larger - * structure with additional information. + * Prepare to fetch tuples from the relation, for an index scan. The + * callback has to return an IndexFetchTableData, which the AM will + * typically embed in a larger structure with additional information. * - * Tuples for an index scan can then be fetched via index_fetch_tuple. + * After this, the caller will call index_fetch_tuple(), as many times as + * needed, to fetch the tuples. */ struct IndexFetchTableData *(*index_fetch_begin) (Relation rel); /* * Reset index fetch. Typically this will release cross index fetch * resources held in IndexFetchTableData. + * + * HEIKKI: Is this called between every call to index_fetch_tuple()? + * Between every call to index_fetch_tuple(), except when call_again is + * set? Can it be a no-op? */ void (*index_fetch_reset) (struct IndexFetchTableData *data); @@ -272,19 +297,22 @@ typedef struct TableAmRoutine * test, return true, false otherwise. * * Note that AMs that do not necessarily update indexes when indexed - * columns do not change, need to return the current/correct version of + * columns don't change, need to return the current/correct version of * the tuple that is visible to the snapshot, even if the tid points to an * older version of the tuple. * * *call_again is false on the first call to index_fetch_tuple for a tid. - * If there potentially is another tuple matching the tid, *call_again - * needs be set to true by index_fetch_tuple, signalling to the caller + * If there potentially is another tuple matching the tid, the callback + * needs to set *call_again to true, signalling to the caller * that index_fetch_tuple should be called again for the same tid. * * *all_dead, if all_dead is not NULL, should be set to true by * index_fetch_tuple iff it is guaranteed that no backend needs to see - * that tuple. Index AMs can use that do avoid returning that tid in + * that tuple. Index AMs can use that to avoid returning that tid in * future searches. + * + * HEIKKI: Should the snapshot be given in index_fetch_begin()? Can it + * differ between calls? */ bool (*index_fetch_tuple) (struct IndexFetchTableData *scan, ItemPointer tid, @@ -302,6 +330,8 @@ typedef struct TableAmRoutine * Fetch tuple at `tid` into `slot`, after doing a visibility test * according to `snapshot`. If a tuple was found and passed the visibility * test, returns true, false otherwise. + * + * HEIKKI: explain how this differs from index_fetch_tuple. */ bool (*tuple_fetch_row_version) (Relation rel, ItemPointer tid, @@ -311,14 +341,17 @@ typedef struct TableAmRoutine /* * Return the latest version of the tuple at `tid`, by updating `tid` to * point at the newest version. + * + * HEIKKI: the latest version visible to the snapshot? */ void (*tuple_get_latest_tid) (Relation rel, Snapshot snapshot, ItemPointer tid); /* - * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of - * the appropriate type for the AM. + * Does the tuple in `slot` satisfy `snapshot`? + * + * The AM may modify the data underlying the tuple as a side-effect. */ bool (*tuple_satisfies_snapshot) (Relation rel, TupleTableSlot *slot, @@ -413,8 +446,8 @@ typedef struct TableAmRoutine */ /* - * This callback needs to create a new relation filenode for `rel`, with - * appropriate durability behaviour for `persistence`. + * Create a new relation filenode for `rel`, with appropriate durability + * behaviour for `persistence`. * * On output *freezeXid, *minmulti must be set to the values appropriate * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those @@ -429,24 +462,40 @@ typedef struct TableAmRoutine MultiXactId *minmulti); /* - * This callback needs to remove all contents from `rel`'s current - * relfilenode. No provisions for transactional behaviour need to be made. - * Often this can be implemented by truncating the underlying storage to - * its minimal size. - * - * See also table_relation_nontransactional_truncate(). + * Remove all rows from `rel`'s current relfilenode. No provisions for + * transactional behaviour need to be made. Often this can be implemented + * by truncating the underlying storage to its minimal size. */ void (*relation_nontransactional_truncate) (Relation rel); /* - * See table_relation_copy_data(). + * Copy data from `rel` into the new relfilenode `newrnode`. The new + * relfilenode might not have storage associated with it before this + * callback is called. This is used for low level operations like + * changing a relation's tablespace. * * This can typically be implemented by directly copying the underlying * storage, unless it contains references to the tablespace internally. */ void (*relation_copy_data) (Relation rel, RelFileNode newrnode); - /* See table_relation_copy_for_cluster() */ + /* + * Copy all data from `OldHeap` into `NewHeap`, as part of a CLUSTER or + * VACUUM FULL. + * + * If `OldIndex` is valid, the data should be ordered according to the + * given index. If `use_sort` is false, the data should be fetched from the + * index, otherwise it should be fetched from the old table and sorted. + * + * OldestXmin, FreezeXid, MultiXactCutoff are currently valid values for + * the table. + * HEIKKI: What does "currently valid" mean? Valid for the old table? + * + * The callback should set *num_tuples, *tups_vacuumed, *tups_recently_dead + * to statistics computed while copying for the relation. Not all might make + * sense for every AM. + * HEIKKI: What to do for the ones that don't make sense? Set to 0? + */ void (*relation_copy_for_cluster) (Relation NewHeap, Relation OldHeap, Relation OldIndex, @@ -466,9 +515,8 @@ typedef struct TableAmRoutine * On entry a transaction is already established, and the relation is * locked with a ShareUpdateExclusive lock. * - * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through - * this routine, even if (for ANALYZE) it is part of the same VACUUM - * command. + * Note that VACUUM FULL (or CLUSTER) does not use this callback. + * Neither does ANALYZE, even if it is part of the same VACUUM command. * * There probably, in the future, needs to be a separate callback to * integrate with autovacuum's scheduling. @@ -479,13 +527,13 @@ typedef struct TableAmRoutine /* * Prepare to analyze block `blockno` of `scan`. The scan has been started - * with table_beginscan_analyze(). See also - * table_scan_analyze_next_block(). + * with table_beginscan_analyze(). * * The callback may acquire resources like locks that are held until - * table_scan_analyze_next_tuple() returns false. It e.g. can make sense + * table_scan_analyze_next_tuple() returns false. For example, it can make sense * to hold a lock until all tuples on a block have been analyzed by * scan_analyze_next_tuple. + * HEIKKI: Hold a lock on what? A lwlock on the page? * * The callback can return false if the block is not suitable for * sampling, e.g. because it's a metapage that could never contain tuples. @@ -589,8 +637,8 @@ typedef struct TableAmRoutine struct TBMIterateResult *tbmres); /* - * Fetch the next tuple of a bitmap table scan into `slot` and return true - * if a visible tuple was found, false otherwise. + * Fetch the next visible tuple of a bitmap table scan into `slot`. If a + * tuple was found, returns true, false otherwise. * * For some AMs it will make more sense to do all the work referencing * `tbmres` contents in scan_bitmap_next_block, for others it might be @@ -618,6 +666,8 @@ typedef struct TableAmRoutine * internally needs to perform mapping between the internal and a block * based representation. * + * HEIKKI: What TsmRoutine? Where is that? + * * Note that it's not acceptable to hold deadlock prone resources such as * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the * block - the tuple is likely to be returned to an upper query node, and @@ -632,9 +682,11 @@ typedef struct TableAmRoutine struct SampleScanState *scanstate); /* - * This callback, only called after scan_sample_next_block has returned - * true, should determine the next tuple to be returned from the selected - * block using the TsmRoutine's NextSampleTuple() callback. + * Return the next tuple in a sample scan. + * + * This callback will only be called after scan_sample_next_block has + * returned true. It should determine the next tuple to be returned from + * the selected block using the TsmRoutine's NextSampleTuple() callback. * * The callback needs to perform visibility checks, and only return * visible tuples. That obviously can mean calling NextSampletuple() @@ -657,15 +709,15 @@ typedef struct TableAmRoutine */ /* - * Returns slot callbacks suitable for holding tuples of the appropriate type + * Return slot callbacks suitable for holding tuples of the appropriate type * for the relation. Works for tables, views, foreign tables and partitioned * tables. */ extern const TupleTableSlotOps *table_slot_callbacks(Relation rel); /* - * Returns slot using the callbacks returned by table_slot_callbacks(), and - * registers it on *reglist. + * Return a slot using the callbacks returned by table_slot_callbacks(), and + * register it on *reglist. */ extern TupleTableSlot *table_slot_create(Relation rel, List **reglist); @@ -676,8 +728,8 @@ extern TupleTableSlot *table_slot_create(Relation rel, List **reglist); */ /* - * Start a scan of `rel`. Returned tuples pass a visibility test of - * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys. + * Start a scan of `rel`. Returned tuples are visible according to `snapshot`, + * and if nkeys != 0, the results are filtered by those scan keys. */ static inline TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, @@ -688,18 +740,24 @@ table_beginscan(Relation rel, Snapshot snapshot, } /* - * Like table_beginscan(), but for scanning catalog. It'll automatically use a - * snapshot appropriate for scanning catalog relations. + * Like table_beginscan(), but for scanning a system catalog. It will + * automatically use a snapshot appropriate for scanning catalog relations. */ extern TableScanDesc table_beginscan_catalog(Relation rel, int nkeys, struct ScanKeyData *key); /* * Like table_beginscan(), but table_beginscan_strat() offers an extended API - * that lets the caller control whether a nondefault buffer access strategy - * can be used, and whether syncscan can be chosen (possibly resulting in the - * scan not starting from block zero). Both of these default to true with - * plain table_beginscan. + * that lets the caller to use a non-default buffer access strategy, or + * specify that a synchronized scan can be used (possibly resulting in the + * scan not starting from block zero). Both of these default to true, as + * with plain table_beginscan. + * + * HEIKKI: I'm a bit confused by 'allow_strat'. What is the non-default + * strategy that will get used if you pass allow_strat=true? Perhaps the flag + * should be called "use_bulkread_strategy"? Or it should be of type + * BufferAccessStrategyType, or the caller should create a strategy with + * GetAccessStrategy() and pass that. */ static inline TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, @@ -712,10 +770,10 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, } /* - * table_beginscan_bm is an alternative entry point for setting up a - * TableScanDesc for a bitmap heap scan. Although that scan technology is - * really quite unlike a standard seqscan, there is just enough commonality to - * make it worth using the same data structure. + * table_beginscan_bm() is an alternative entry point for setting up a + * TableScanDesc for a bitmap heap scan. Although a bitmap scan is + * really quite unlike a standard seqscan, there is just enough commonality + * that it makes sense to use a TableScanDesc for both. */ static inline TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, @@ -726,11 +784,13 @@ table_beginscan_bm(Relation rel, Snapshot snapshot, } /* - * table_beginscan_sampling is an alternative entry point for setting up a + * table_beginscan_sampling() is an alternative entry point for setting up a * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth * using the same data structure although the behavior is rather different. * In addition to the options offered by table_beginscan_strat, this call * also allows control of whether page-mode visibility checking is used. + * + * HEIKKI: What is 'pagemode'? */ static inline TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, @@ -973,9 +1033,6 @@ table_get_latest_tid(Relation rel, Snapshot snapshot, ItemPointer tid) * * This assumes the slot's tuple is valid, and of the appropriate type for the * AM. - * - * Some AMs might modify the data underlying the tuple as a side-effect. If so - * they ought to mark the relevant buffer dirty. */ static inline bool table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, @@ -1004,31 +1061,33 @@ table_compute_xid_horizon_for_tuples(Relation rel, */ /* - * Insert a tuple from a slot into table AM routine. + * Insert a tuple from a slot into the table. * - * The options bitmask allows to specify options that allow to change the - * behaviour of the AM. Several options might be ignored by AMs not supporting - * them. + * The options bitmask allows changing the behaviour of the AM. The AM may + * ignore options that it does not support. * * If the TABLE_INSERT_SKIP_WAL option is specified, the new tuple doesn't * need to be logged to WAL, even for a non-temp relation. It is the AMs * choice whether this optimization is supported. * - * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse - * free space in the relation. This can save some cycles when we know the + * The TABLE_INSERT_SKIP_FSM option is a hint that the AM should not bother + * to try reusing free space. This can save some cycles when we know the * relation is new and doesn't contain useful amounts of free space. It's * commonly passed directly to RelationGetBufferForTuple, see for more info. * - * TABLE_INSERT_FROZEN should only be specified for inserts into - * relfilenodes created during the current subtransaction and when - * there are no prior snapshots or pre-existing portals open. - * This causes rows to be frozen, which is an MVCC violation and - * requires explicit options chosen by user. + * TABLE_INSERT_FROZEN means that the AM may skip normal MVCC rules for the + * tuples, so that they become immediately visible to everyone. That is a + * violation of the normal MVCC rules, and requires specific action from the + * user, currently only used for the COPY FREEZE option. Even then, it should + * only be specified for inserts into relfilenodes created during the current + * subtransaction and when there are no prior snapshots or pre-existing portals + * open. * * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding * information for the tuple. This should solely be used during table rewrites * where RelationIsLogicallyLogged(relation) is not yet accurate for the new * relation. + * HEIKKI: Is this optional, too? Can the AM ignore it? * * Note that most of these options will be applied when inserting into the * heap's TOAST table, too, if the tuple requires any out-of-line data. @@ -1041,6 +1100,8 @@ table_compute_xid_horizon_for_tuples(Relation rel, * On return the slot's tts_tid and tts_tableOid are updated to reflect the * insertion. But note that any toasting of fields within the slot is NOT * reflected in the slots contents. + * + * HEIKKI: I think GetBulkInsertState() should be an AM-specific callback. */ static inline void table_insert(Relation rel, TupleTableSlot *slot, CommandId cid, @@ -1089,9 +1150,8 @@ table_complete_speculative(Relation rel, TupleTableSlot *slot, * operation. That's often faster than calling table_insert() in a loop, * because e.g. the AM can reduce WAL logging and page locking overhead. * - * Except for taking `nslots` tuples as input, as an array of TupleTableSlots - * in `slots`, the parameters for table_multi_insert() are the same as for - * table_insert(). + * The parameters are the same as for table_insert(), except for taking + * an array of slots. * * Note: this leaks memory into the current memory context. You can create a * temporary context before calling this, if that's a problem. @@ -1115,20 +1175,25 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, * tid - TID of tuple to be deleted * cid - delete command ID (used for visibility test, and stored into * cmax if successful) + * HEIKKI: description for 'snapshot' parameter is missing * crosscheck - if not InvalidSnapshot, also check tuple against this * wait - true if should wait for any conflicting update to commit/abort - * Output parameters: - * tmfd - filled in failure cases (see below) * changingPart - true iff the tuple is being moved to another partition * table due to an update of the partition key. Otherwise, false. + * Output parameters: + * tmfd - filled in failure cases (see below) + * + * HEIKKI: What's the AM supposed to do differently if 'changingPart' is set? + * (I know, it's supposed to set the t_ctid to the magic "moved partition" + * value. Explain that) * * Normal, successful return value is TM_Ok, which means we did actually - * delete it. Failure return codes are TM_SelfModified, TM_Updated, and - * TM_BeingModified (the last only possible if wait == false). + * delete it. Failure return codes are TM_SelfModified, TM_Updated, + * TM_Deleted, and TM_BeingModified (the last only possible if wait == false). * * In the failure cases, the routine fills *tmfd with the tuple's t_ctid, - * t_xmax, and, if possible, and, if possible, t_cmax. See comments for - * struct TM_FailureData for additional info. + * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData + * for additional info. */ static inline TM_Result table_delete(Relation rel, ItemPointer tid, CommandId cid, @@ -1161,8 +1226,8 @@ table_delete(Relation rel, ItemPointer tid, CommandId cid, * are required for this tuple * * Normal, successful return value is TM_Ok, which means we did actually - * update it. Failure return codes are TM_SelfModified, TM_Updated, and - * TM_BeingModified (the last only possible if wait == false). + * update it. Failure return codes are TM_SelfModified, TM_Updated, + * TM_Deleted, and TM_BeingModified (the last only possible if wait == false). * * On success, the slot's tts_tid and tts_tableOid are updated to match the new * stored tuple; in particular, slot->tts_tid is set to the TID where the @@ -1170,6 +1235,9 @@ table_delete(Relation rel, ItemPointer tid, CommandId cid, * update was done. However, any TOAST changes in the new tuple's * data are not reflected into *newtup. * + * HEIKKI: There is no 'newtup'. + * HEIKKI: HEAP_ONLY_TUPLE is AM-specific; do the callers peek into that, currently? + * * In the failure cases, the routine fills *tmfd with the tuple's t_ctid, * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData * for additional info. @@ -1233,8 +1301,8 @@ table_lock_tuple(Relation rel, ItemPointer tid, Snapshot snapshot, /* * Perform operations necessary to complete insertions made via * tuple_insert and multi_insert with a BulkInsertState specified. This - * e.g. may e.g. used to flush the relation when inserting with - * TABLE_INSERT_SKIP_WAL specified. + * may e.g. be used to flush the relation when the TABLE_INSERT_SKIP_WAL + * option was used. */ static inline void table_finish_bulk_insert(Relation rel, int options) @@ -1257,8 +1325,8 @@ table_finish_bulk_insert(Relation rel, int options) * This is used both during relation creation and various DDL operations to * create a new relfilenode that can be filled from scratch. * - * *freezeXid, *minmulti are set to the xid / multixact horizon for the table - * that pg_class.{relfrozenxid, relminmxid} have to be set to. + * The function sets *freezeXid, *minmulti to the xid / multixact horizon + * values that the table's pg_class.{relfrozenxid, relminmxid} have to be set to. */ static inline void table_relation_set_new_filenode(Relation rel, char persistence, @@ -1271,7 +1339,7 @@ table_relation_set_new_filenode(Relation rel, char persistence, /* * Remove all table contents from `rel`, in a non-transactional manner. - * Non-transactional meaning that there's no need to support rollbacks. This + * Non-transactional, meaning that this cannot be rolled back. This is * commonly only is used to perform truncations for relfilenodes created in the * current transaction. */ @@ -1294,20 +1362,19 @@ table_relation_copy_data(Relation rel, RelFileNode newrnode) } /* - * Copy data from `OldHeap` into `NewHeap`, as part of a CLUSTER or VACUUM - * FULL. + * Copy all data from `OldHeap` into `NewHeap`, as part of a CLUSTER or + * VACUUM FULL. * - * If `use_sort` is true, the table contents are sorted appropriate for - * `OldIndex`; if use_sort is false and OldIndex is not InvalidOid, the data - * is copied in that index's order; if use_sort is false and OidIndex is - * InvalidOid, no sorting is performed. + * If `OldIndex` is valid, the data should be ordered according to the + * given index. If `use_sort` is false, the data should be fetched from the + * index, otherwise it should be fetched from the old table and sorted. * * OldestXmin, FreezeXid, MultiXactCutoff must be currently valid values for * the table. * - * *num_tuples, *tups_vacuumed, *tups_recently_dead will contain statistics - * computed while copying for the relation. Not all might make sense for every - * AM. + * The function sets *num_tuples, *tups_vacuumed, *tups_recently_dead with + * statistics computed while copying for the relation. Not all might make sense + * for every AM. */ static inline void table_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, @@ -1369,7 +1436,7 @@ table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, * is stored in `slot`. * * *liverows and *deadrows are incremented according to the encountered - * tuples. + * tuples, if the AM has the concept of live and dead tuples. */ static inline bool table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, @@ -1384,34 +1451,36 @@ table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, /* * table_index_build_scan - scan the table to find tuples to be indexed * - * This is called back from an access-method-specific index build procedure - * after the AM has done whatever setup it needs. The parent heap relation - * is scanned to find tuples that should be entered into the index. Each - * such tuple is passed to the AM's callback routine, which does the right - * things to add it to the new index. After we return, the AM's index - * build procedure does whatever cleanup it needs. + * This is called by the index-AM-specific index build procedure, after the + * index AM has done whatever setup it needs. This function scans the parent + * table, to find tuples that should be entered into the index. Each such tuple + * is passed to the index AM's callback routine, which does the right things to + * add it to the new index. After this returns, the index AM's index build + * procedure does whatever cleanup it needs. * * The total count of live tuples is returned. This is for updating pg_class * statistics. (It's annoying not to be able to do that here, but we want to * merge that update with others; see index_update_stats.) Note that the * index AM itself must keep track of the number of index tuples; we don't do - * so here because the AM might reject some of the tuples for its own reasons, + * so here because the index AM might reject some of the tuples for its own reasons, * such as being unable to store NULLs. * - * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when + * If 'progress' is true, the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along. * - * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect + * A side effect is to set index_info->ii_BrokenHotChain to true if we detect * any potentially broken HOT chains. Currently, we set this if there are any * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying * very hard to detect whether they're really incompatible with the chain tip. * This only really makes sense for heap AM, it might need to be generalized * for other AMs later. + * + * HEIKKI: What does 'allow_sync' do? */ static inline double table_index_build_scan(Relation heap_rel, Relation index_rel, - struct IndexInfo *index_nfo, + struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, @@ -1420,7 +1489,7 @@ table_index_build_scan(Relation heap_rel, { return heap_rel->rd_tableam->index_build_range_scan(heap_rel, index_rel, - index_nfo, + index_info, allow_sync, false, progress, @@ -1432,12 +1501,12 @@ table_index_build_scan(Relation heap_rel, } /* - * As table_index_build_scan(), except that instead of scanning the complete - * table, only the given number of blocks are scanned. Scan to end-of-rel can + * As table_index_build_scan(), but instead of scanning the complete + * table, only the given block range is scanned. Scan to end-of-rel can * be signalled by passing InvalidBlockNumber as numblocks. Note that * restricting the range to scan cannot be done when requesting syncscan. * - * When "anyvisible" mode is requested, all tuples visible to any transaction + * When 'anyvisible' mode is requested, all tuples visible to any transaction * are indexed and counted as live, including those inserted or deleted by * transactions that are still in progress. */