diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index dba32ceff3..68cb22b568 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -159,6 +159,19 @@ static relopt_bool boolRelOpts[] = }, true }, + /* + * In order to avoid consistency problems, the global temporary table + * uses ShareUpdateExclusiveLock. + */ + { + { + "on_commit_delete_rows", + "global temporary table on commit options", + RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED, + ShareUpdateExclusiveLock + }, + true + }, /* list terminator */ {{NULL}} }; @@ -1834,6 +1847,8 @@ bytea * default_reloptions(Datum reloptions, bool validate, relopt_kind kind) { static const relopt_parse_elt tab[] = { + {"on_commit_delete_rows", RELOPT_TYPE_BOOL, + offsetof(StdRdOptions, on_commit_delete_rows)}, {"fillfactor", RELOPT_TYPE_INT, offsetof(StdRdOptions, fillfactor)}, {"autovacuum_enabled", RELOPT_TYPE_BOOL, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, enabled)}, @@ -1978,11 +1993,6 @@ build_local_reloptions(local_relopts *relopts, Datum options, bool validate) bytea * partitioned_table_reloptions(Datum reloptions, bool validate) { - /* - * autovacuum_enabled, autovacuum_analyze_threshold and - * autovacuum_analyze_scale_factor are supported for partitioned tables. - */ - return default_reloptions(reloptions, validate, RELOPT_KIND_PARTITIONED); } diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 43ba03b6eb..49f1052fdb 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -1023,7 +1023,7 @@ gistproperty(Oid index_oid, int attno, XLogRecPtr gistGetFakeLSN(Relation rel) { - if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + if (RELATION_IS_TEMP(rel)) { /* * Temporary relations are only accessible in our session, so a simple diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 0752fb38a9..5c85d777f4 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -151,7 +151,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo) * metapage, nor the first bitmap page. */ sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ; - if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + if (!RELATION_IS_TEMP(index)) sort_threshold = Min(sort_threshold, NBuffers); else sort_threshold = Min(sort_threshold, NLocBuffer); diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index beb8f20708..421e22428d 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -589,7 +589,7 @@ heapam_relation_set_new_filenode(Relation rel, */ *minmulti = GetOldestMultiXactId(); - srel = RelationCreateStorage(*newrnode, persistence); + srel = RelationCreateStorage(*newrnode, persistence, rel); /* * If required, set up an init fork for an unlogged table so that it can @@ -641,7 +641,7 @@ heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode) * NOTE: any conflict in relfilenode value will be caught in * RelationCreateStorage(). */ - RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence); + RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence, rel); /* copy main fork */ RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM, diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 2c04b69221..5511ac908c 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -63,6 +63,7 @@ #include "access/xlog.h" #include "catalog/index.h" #include "catalog/storage.h" +#include "catalog/storage_gtt.h" #include "commands/dbcommands.h" #include "commands/progress.h" #include "commands/vacuum.h" @@ -510,6 +511,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, TransactionId FreezeLimit; MultiXactId MultiXactCutoff; + /* + * not every AM requires these to be valid, but regular heap does. + * Transaction information for the global temp table will be stored + * in the local hash table, not the catalog. + */ + Assert(RELATION_IS_GLOBAL_TEMP(rel) ^ TransactionIdIsNormal(rel->rd_rel->relfrozenxid)); + Assert(RELATION_IS_GLOBAL_TEMP(rel) ^ MultiXactIdIsValid(rel->rd_rel->relminmxid)); + /* measure elapsed time iff autovacuum logging requires it */ if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0) { diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index ebec8fa5b8..84766b3a33 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -28,6 +28,7 @@ #include "access/transam.h" #include "access/xlog.h" #include "access/xloginsert.h" +#include "catalog/storage_gtt.h" #include "miscadmin.h" #include "storage/indexfsm.h" #include "storage/lmgr.h" @@ -677,6 +678,14 @@ _bt_getrootheight(Relation rel) { Buffer metabuf; + /* + * If a global temporary table storage file is not initialized in the + * current backend, its index does not have a root page, just returns 0. + */ + if (RELATION_IS_GLOBAL_TEMP(rel) && + !gtt_storage_attached(RelationGetRelid(rel))) + return 0; + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); metad = _bt_getmeta(rel, metabuf); diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y index 5fcd004e1b..58b994cef5 100644 --- a/src/backend/bootstrap/bootparse.y +++ b/src/backend/bootstrap/bootparse.y @@ -212,7 +212,8 @@ Boot_CreateStmt: mapped_relation, true, &relfrozenxid, - &relminmxid); + &relminmxid, + false); elog(DEBUG4, "bootstrap relation created"); } else diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index d297e77361..8c21979625 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -44,6 +44,7 @@ OBJS = \ pg_subscription.o \ pg_type.o \ storage.o \ + storage_gtt.o \ toasting.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/catalog/README.gtt b/src/backend/catalog/README.gtt new file mode 100644 index 0000000000..bedc85c8df --- /dev/null +++ b/src/backend/catalog/README.gtt @@ -0,0 +1,165 @@ +Global Temporary Table(GTT) +============== + +Feature description +-------------------------------- + +Previously, temporary tables are defined once and automatically +exist (starting with empty contents) in every session before using them. + +The temporary table implementation in PostgreSQL, known as Local temp tables(LTT), +did not fully comply with the SQL standard. This version added the support of +Global Temporary Table . + +The metadata of Global Temporary Table is persistent and shared among sessions. +The data stored in the Global temporary table is independent of sessions. This +means, when a session creates a Global Temporary Table and writes some data. +Other sessions cannot see those data, but they have an empty Global Temporary +Table with same schema. + +Like local temporary table, Global Temporary Table supports ON COMMIT PRESERVE ROWS +or ON COMMIT DELETE ROWS clause, so that data in the temporary table can be +cleaned up or preserved automatically when a session exits or a transaction COMMITs. + +Unlike Local Temporary Table, Global Temporary Table does not support +ON COMMIT DROP clauses. + +In following paragraphs, we use GTT for Global Temporary Table and LTT for +local temporary table. + +Main design ideas +----------------------------------------- + +STORAGE & BUFFER + +In general, GTT and LTT use the same storage and buffer design and +implementation. The storage files for both types of temporary tables are named +as t_backendid_relfilenode, and the local buffer is used to cache the data. + +The schema of GTTs is shared among sessions while their data are not. We build +a new mechanisms to manage those non-shared data and their statistics. +Here is the summary of changes: + +1) CATALOG +GTTs store session-specific data. The storage information of GTTs'data, their +transaction information, and their statistics are not stored in the catalog. + +2) STORAGE INFO & STATISTICS & TRANSACTION +In order to maintain durability and availability of GTTs'session-specific data, +their storage information, statistics, and transaction information is managed +in a local hash table tt_storage_local_hash. + +3) DDL +A shared hash table active_gtt_shared_hash is added to track the state of the +GTT in a different session. This information is recorded in the hash table +during the DDL execution of the GTT. + +4) LOCK +The data stored in a GTT can only be modified or accessed by owning session. +The statements that only modify data in a GTT do not need a high level of table +locking. +The operations making those changes include truncate GTT, Vacuum/Cluster GTT, +and Lock GTT. + +Detailed design +----------------------------------------- + +1. CATALOG +1.1 relpersistence +define RELPERSISTENCEGLOBALTEMP 'g' +Mark Global Temporary Table in pg_class relpersistence to 'g'. The relpersistence +of indexes created on the GTT, sequences on GTT and toast tables on GTT are +also set to 'g' + +1.2 on commit clause +LTT's status associated with on commit DELETE ROWS and on commit PRESERVE ROWS +is not stored in catalog. Instead, GTTs need a bool value on_commit_delete_rows +in reloptions which is shared among sessions. + +1.3 gram.y +GTT is already supported in syntax tree. We remove the warning message +"GLOBAL is deprecated in temporary table creation" and mark +relpersistence = RELPERSISTENCEGLOBALTEMP. + +2. STORAGE INFO & STATISTICS DATA & TRANSACTION INFO +2.1. gtt_storage_local_hash +Each backend creates a local hashtable gtt_storage_local_hash to track a GTT's +storage file information, statistics, and transaction information. + +2.2 GTT storage file info track +1) When one session inserts data into a GTT for the first time, record the +storage info to gtt_storage_local_hash. +2) Use beforeshmemexit to ensure that all files of session GTT are deleted when +the session exits. + +2.3 statistics info +1) relpages reltuples relallvisible +2) The statistics of each column from pg_statistic +All the above information is stored in gtt_storage_local_hash. +When doing vacuum or analyze, GTT's statistic is updated, which is used by +the SQL planner. +The statistics summarizes only data in the current session. + +2.3 transaction info track +frozenxid minmulti from pg_class is stored to gtt_storage_local_hash. + +4 DDL +4.1. active_gtt_shared_hash +This is the hash table created in shared memory to trace the GTT files initialized +in each session. Each hash entry contains a bitmap that records the backendid of +the initialized GTT file. With this hash table, we know which backend/session +is using this GTT. Such information is used during GTT's DDL operations. + +4.1 DROP GTT +One GTT is allowed to be deleted when there is only one session using the table +and the session is the current session. +After holding the AccessExclusiveLock lock on GTT, active_gtt_shared_hash +is checked to ensure that. + +4.2 ALTER GTT/DROP INDEX ON GTT +Same as drop GTT. + +4.3 CREATE INDEX ON GTT +1) create index on GTT statements build index based on local data in a session. +2) After the index is created, record the index metadata to the catalog. +3) Other sessions can enable or disable the local GTT index. + +5 LOCK + +5.1 TRUNCATE GTT +The truncate GTT command uses RowExclusiveLock, not AccessExclusiveLock, because +this command only cleans up local data and local buffers in current session. + +5.2 CLUSTER GTT/VACUUM FULL GTT +Same as truncate GTT. + +5.3 Lock GTT +A lock GTT statement does not hold any table locks. + +6 MVCC commit log(clog) cleanup + +The GTT storage file contains transaction information. Queries for GTT data rely +on transaction information such as clog. The transaction information required by +each session may be completely different. We need to ensure that the transaction +information of the GTT data is not cleaned up during its lifetime and that +transaction resources are recycled at the instance level. + +6.1 The session level GTT oldest frozenxid +1) To manage all GTT transaction information, add session level oldest frozenxid +in each session. When one GTT is created or removed, record the session level +oldest frozenxid and store it in MyProc. +2) When vacuum advances the database's frozenxid, session level oldest frozenxid +should be considered. This is acquired by searching all of MyProc. This way, +we can avoid the clog required by GTTs to be cleaned. + +6.2 vacuum GTT +Users can perform vacuum over a GTT to clean up local data in the GTT. + +6.3 autovacuum GTT +Autovacuum skips all GTTs, because the data in GTTs is only visible in current +session. + +7 OTHERS +Parallel query +Planner does not produce parallel query plans for SQL related to GTT. Because +GTT private data cannot be accessed across processes. diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index aa7d4d5456..595cb03eb4 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -504,6 +504,7 @@ GetNewRelFileNode(Oid reltablespace, Relation pg_class, char relpersistence) switch (relpersistence) { case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_GLOBAL_TEMP: backend = BackendIdForTempRelations(); break; case RELPERSISTENCE_UNLOGGED: diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 83746d3fd9..8cf1385cfe 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -62,6 +62,7 @@ #include "catalog/pg_type.h" #include "catalog/storage.h" #include "catalog/storage_xlog.h" +#include "catalog/storage_gtt.h" #include "commands/tablecmds.h" #include "commands/typecmds.h" #include "executor/executor.h" @@ -100,6 +101,7 @@ static void AddNewRelationTuple(Relation pg_class_desc, Oid reloftype, Oid relowner, char relkind, + char relpersistence, TransactionId relfrozenxid, TransactionId relminmxid, Datum relacl, @@ -306,7 +308,8 @@ heap_create(const char *relname, bool mapped_relation, bool allow_system_table_mods, TransactionId *relfrozenxid, - MultiXactId *relminmxid) + MultiXactId *relminmxid, + bool skip_create_storage) { bool create_storage; Relation rel; @@ -371,7 +374,9 @@ heap_create(const char *relname, * storage is already created, so don't do it here. Also don't create it * for relkinds without physical storage. */ - if (!RELKIND_HAS_STORAGE(relkind) || OidIsValid(relfilenode)) + if (!RELKIND_HAS_STORAGE(relkind) || + OidIsValid(relfilenode) || + skip_create_storage) create_storage = false; else { @@ -427,7 +432,7 @@ heap_create(const char *relname, case RELKIND_INDEX: case RELKIND_SEQUENCE: - RelationCreateStorage(rel->rd_node, relpersistence); + RelationCreateStorage(rel->rd_node, relpersistence, rel); break; case RELKIND_RELATION: @@ -998,6 +1003,7 @@ AddNewRelationTuple(Relation pg_class_desc, Oid reloftype, Oid relowner, char relkind, + char relpersistence, TransactionId relfrozenxid, TransactionId relminmxid, Datum relacl, @@ -1036,8 +1042,21 @@ AddNewRelationTuple(Relation pg_class_desc, break; } - new_rel_reltup->relfrozenxid = relfrozenxid; - new_rel_reltup->relminmxid = relminmxid; + /* + * The transaction information of the global temporary table is stored + * in the local hash table, not in catalog. + */ + if (relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + { + new_rel_reltup->relfrozenxid = InvalidTransactionId; + new_rel_reltup->relminmxid = InvalidMultiXactId; + } + else + { + new_rel_reltup->relfrozenxid = relfrozenxid; + new_rel_reltup->relminmxid = relminmxid; + } + new_rel_reltup->relowner = relowner; new_rel_reltup->reltype = new_type_oid; new_rel_reltup->reloftype = reloftype; @@ -1303,7 +1322,8 @@ heap_create_with_catalog(const char *relname, mapped_relation, allow_system_table_mods, &relfrozenxid, - &relminmxid); + &relminmxid, + false); Assert(relid == RelationGetRelid(new_rel_desc)); @@ -1410,6 +1430,7 @@ heap_create_with_catalog(const char *relname, reloftypeid, ownerid, relkind, + relpersistence, relfrozenxid, relminmxid, PointerGetDatum(relacl), @@ -1993,6 +2014,19 @@ heap_drop_with_catalog(Oid relid) if (relid == defaultPartOid) update_default_partition_oid(parentOid, InvalidOid); + /* + * Only when other sessions are not using this Global temporary table, + * is it allowed to DROP it. + */ + if (RELATION_IS_GLOBAL_TEMP(rel)) + { + if (is_other_backend_use_gtt(RelationGetRelid(rel))) + ereport(ERROR, + (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), + errmsg("cannot drop global temporary table %s when other backend attached it.", + RelationGetRelationName(rel)))); + } + /* * Schedule unlinking of the relation's physical files at commit. */ @@ -3277,7 +3311,7 @@ RemoveStatistics(Oid relid, AttrNumber attnum) * the specified relation. Caller must hold exclusive lock on rel. */ static void -RelationTruncateIndexes(Relation heapRelation) +RelationTruncateIndexes(Relation heapRelation, LOCKMODE lockmode) { ListCell *indlist; @@ -3289,7 +3323,7 @@ RelationTruncateIndexes(Relation heapRelation) IndexInfo *indexInfo; /* Open the index relation; use exclusive lock, just to be sure */ - currentIndex = index_open(indexId, AccessExclusiveLock); + currentIndex = index_open(indexId, lockmode); /* * Fetch info needed for index_build. Since we know there are no @@ -3335,8 +3369,16 @@ heap_truncate(List *relids) { Oid rid = lfirst_oid(cell); Relation rel; + LOCKMODE lockmode = AccessExclusiveLock; - rel = table_open(rid, AccessExclusiveLock); + /* + * Truncate global temporary table only clears local data, + * so only low-level locks need to be held. + */ + if (get_rel_persistence(rid) == RELPERSISTENCE_GLOBAL_TEMP) + lockmode = RowExclusiveLock; + + rel = table_open(rid, lockmode); relations = lappend(relations, rel); } @@ -3369,6 +3411,7 @@ void heap_truncate_one_rel(Relation rel) { Oid toastrelid; + LOCKMODE lockmode = AccessExclusiveLock; /* * Truncate the relation. Partitioned tables have no storage, so there is @@ -3377,23 +3420,47 @@ heap_truncate_one_rel(Relation rel) if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) return; + /* For global temporary table only */ + if (RELATION_IS_GLOBAL_TEMP(rel)) + { + /* + * If this GTT is not initialized in current backend, there is + * no needs to anything. + */ + if (!gtt_storage_attached(RelationGetRelid(rel))) + return; + + /* + * Truncate GTT only clears local data, so only low-level locks + * need to be held. + */ + lockmode = RowExclusiveLock; + } + /* Truncate the underlying relation */ table_relation_nontransactional_truncate(rel); /* If the relation has indexes, truncate the indexes too */ - RelationTruncateIndexes(rel); + RelationTruncateIndexes(rel, lockmode); /* If there is a toast table, truncate that too */ toastrelid = rel->rd_rel->reltoastrelid; if (OidIsValid(toastrelid)) { - Relation toastrel = table_open(toastrelid, AccessExclusiveLock); + Relation toastrel = table_open(toastrelid, lockmode); table_relation_nontransactional_truncate(toastrel); - RelationTruncateIndexes(toastrel); + RelationTruncateIndexes(toastrel, lockmode); /* keep the lock... */ table_close(toastrel, NoLock); } + + /* + * After the data is cleaned up on the GTT, the transaction information + * for the data(stored in local hash table) is also need reset. + */ + if (RELATION_IS_GLOBAL_TEMP(rel)) + up_gtt_relstats(RelationGetRelid(rel), 0, 0, 0, RecentXmin, InvalidMultiXactId); } /* diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 26bfa74ce7..c3fe8950a0 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -54,6 +54,7 @@ #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" #include "catalog/storage.h" +#include "catalog/storage_gtt.h" #include "commands/event_trigger.h" #include "commands/progress.h" #include "commands/tablecmds.h" @@ -732,6 +733,29 @@ index_create(Relation heapRelation, char relkind; TransactionId relfrozenxid; MultiXactId relminmxid; + bool skip_create_storage = false; + + /* For global temporary table only */ + if (RELATION_IS_GLOBAL_TEMP(heapRelation)) + { + /* No support create index on global temporary table with concurrent mode yet */ + if (concurrent) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot reindex global temporary tables concurrently"))); + + /* + * For the case that some backend is applied relcache message to create + * an index on a global temporary table, if this table in the current + * backend are not initialized, the creation of index storage on the + * table are also skipped. + */ + if (!gtt_storage_attached(RelationGetRelid(heapRelation))) + { + skip_create_storage = true; + flags |= INDEX_CREATE_SKIP_BUILD; + } + } /* constraint flags can only be set when a constraint is requested */ Assert((constr_flags == 0) || @@ -939,7 +963,8 @@ index_create(Relation heapRelation, mapped_relation, allow_system_table_mods, &relfrozenxid, - &relminmxid); + &relminmxid, + skip_create_storage); Assert(relfrozenxid == InvalidTransactionId); Assert(relminmxid == InvalidMultiXactId); @@ -2107,7 +2132,7 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) * lock (see comments in RemoveRelations), and a non-concurrent DROP is * more efficient. */ - Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP || + Assert(!RelpersistenceTsTemp(get_rel_persistence(indexId)) || (!concurrent && !concurrent_lock_mode)); /* @@ -2139,6 +2164,20 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) */ CheckTableNotInUse(userIndexRelation, "DROP INDEX"); + /* + * Allow to drop index on global temporary table when only current + * backend use it. + */ + if (RELATION_IS_GLOBAL_TEMP(userHeapRelation) && + is_other_backend_use_gtt(RelationGetRelid(userHeapRelation))) + { + ereport(ERROR, + (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), + errmsg("cannot drop index %s or global temporary table %s", + RelationGetRelationName(userIndexRelation), RelationGetRelationName(userHeapRelation)), + errhint("Because the index is created on the global temporary table and other backend attached it."))); + } + /* * Drop Index Concurrently is more or less the reverse process of Create * Index Concurrently. @@ -2747,6 +2786,7 @@ index_update_stats(Relation rel, HeapTuple tuple; Form_pg_class rd_rel; bool dirty; + bool is_gtt = RELATION_IS_GLOBAL_TEMP(rel); /* * We always update the pg_class row using a non-transactional, @@ -2841,20 +2881,37 @@ index_update_stats(Relation rel, else /* don't bother for indexes */ relallvisible = 0; - if (rd_rel->relpages != (int32) relpages) - { - rd_rel->relpages = (int32) relpages; - dirty = true; - } - if (rd_rel->reltuples != (float4) reltuples) + /* For global temporary table */ + if (is_gtt) { - rd_rel->reltuples = (float4) reltuples; - dirty = true; + /* Update GTT'statistics into local relcache */ + rel->rd_rel->relpages = (int32) relpages; + rel->rd_rel->reltuples = (float4) reltuples; + rel->rd_rel->relallvisible = (int32) relallvisible; + + /* Update GTT'statistics into local hashtable */ + up_gtt_relstats(RelationGetRelid(rel), relpages, reltuples, relallvisible, + InvalidTransactionId, InvalidMultiXactId); } - if (rd_rel->relallvisible != (int32) relallvisible) + else { - rd_rel->relallvisible = (int32) relallvisible; - dirty = true; + if (rd_rel->relpages != (int32) relpages) + { + rd_rel->relpages = (int32) relpages; + dirty = true; + } + + if (rd_rel->reltuples != (float4) reltuples) + { + rd_rel->reltuples = (float4) reltuples; + dirty = true; + } + + if (rd_rel->relallvisible != (int32) relallvisible) + { + rd_rel->relallvisible = (int32) relallvisible; + dirty = true; + } } } @@ -2967,6 +3024,26 @@ index_build(Relation heapRelation, pgstat_progress_update_multi_param(6, progress_index, progress_vals); } + /* For build index on global temporary table */ + if (RELATION_IS_GLOBAL_TEMP(indexRelation)) + { + /* + * If the storage for the index in this session is not initialized, + * it needs to be created. + */ + if (!gtt_storage_attached(RelationGetRelid(indexRelation))) + { + /* Before create init storage, fix the local Relcache first */ + force_enable_gtt_index(indexRelation); + + Assert(gtt_storage_attached(RelationGetRelid(heapRelation))); + + /* Init storage for index */ + RelationCreateStorage(indexRelation->rd_node, RELPERSISTENCE_GLOBAL_TEMP, indexRelation); + + } + } + /* * Call the access method's build procedure */ @@ -3521,6 +3598,20 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, if (!OidIsValid(heapId)) return; + /* + * For reindex on global temporary table, If the storage for the index + * in current backend is not initialized, nothing is done. + */ + if (persistence == RELPERSISTENCE_GLOBAL_TEMP && + !gtt_storage_attached(indexId)) + { + /* Suppress use of the target index while rebuilding it */ + SetReindexProcessing(heapId, indexId); + /* Re-allow use of target index */ + ResetReindexProcessing(); + return; + } + if ((params->options & REINDEXOPT_MISSING_OK) != 0) heapRelation = try_table_open(heapId, ShareLock); else diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index fd767fc5cf..a438082e45 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -656,6 +656,13 @@ RangeVarAdjustRelationPersistence(RangeVar *newRelation, Oid nspid) errmsg("cannot create temporary relation in non-temporary schema"))); } break; + case RELPERSISTENCE_GLOBAL_TEMP: + /* Do not allow create global temporary table in temporary schemas */ + if (isAnyTempNamespace(nspid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot create global temp table in temporary schemas"))); + break; case RELPERSISTENCE_PERMANENT: if (isTempOrTempToastNamespace(nspid)) newRelation->relpersistence = RELPERSISTENCE_TEMP; diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index c5ad28d71f..707068a6fd 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -27,6 +27,7 @@ #include "access/xlogutils.h" #include "catalog/storage.h" #include "catalog/storage_xlog.h" +#include "catalog/storage_gtt.h" #include "miscadmin.h" #include "storage/freespace.h" #include "storage/smgr.h" @@ -61,6 +62,7 @@ typedef struct PendingRelDelete { RelFileNode relnode; /* relation that may need to be deleted */ BackendId backend; /* InvalidBackendId if not a temp rel */ + Oid temprelOid; /* InvalidOid if not a global temporary rel */ bool atCommit; /* T=delete at commit; F=delete at abort */ int nestLevel; /* xact nesting level of request */ struct PendingRelDelete *next; /* linked-list link */ @@ -115,7 +117,7 @@ AddPendingSync(const RelFileNode *rnode) * transaction aborts later on, the storage will be destroyed. */ SMgrRelation -RelationCreateStorage(RelFileNode rnode, char relpersistence) +RelationCreateStorage(RelFileNode rnode, char relpersistence, Relation rel) { PendingRelDelete *pending; SMgrRelation srel; @@ -126,7 +128,12 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence) switch (relpersistence) { + /* + * Global temporary table and local temporary table use same + * design on storage module. + */ case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_GLOBAL_TEMP: backend = BackendIdForTempRelations(); needs_wal = false; break; @@ -154,6 +161,7 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = rnode; pending->backend = backend; + pending->temprelOid = InvalidOid; pending->atCommit = false; /* delete if abort */ pending->nestLevel = GetCurrentTransactionNestLevel(); pending->next = pendingDeletes; @@ -165,6 +173,21 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence) AddPendingSync(&rnode); } + if (relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + { + Assert(rel && RELATION_IS_GLOBAL_TEMP(rel)); + + /* + * Remember the reloid of global temporary table, which is used for + * transaction commit or rollback. + * see smgrDoPendingDeletes. + */ + pending->temprelOid = RelationGetRelid(rel); + + /* Remember global temporary table storage info to localhash */ + remember_gtt_storage_info(rnode, rel); + } + return srel; } @@ -201,11 +224,20 @@ RelationDropStorage(Relation rel) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = rel->rd_node; pending->backend = rel->rd_backend; + pending->temprelOid = InvalidOid; pending->atCommit = true; /* delete if commit */ pending->nestLevel = GetCurrentTransactionNestLevel(); pending->next = pendingDeletes; pendingDeletes = pending; + /* + * Remember the reloid of global temporary table, which is used for + * transaction commit or rollback. + * see smgrDoPendingDeletes. + */ + if (RELATION_IS_GLOBAL_TEMP(rel)) + pending->temprelOid = RelationGetRelid(rel); + /* * NOTE: if the relation was created in this transaction, it will now be * present in the pending-delete list twice, once with atCommit true and @@ -618,6 +650,7 @@ smgrDoPendingDeletes(bool isCommit) int nrels = 0, maxrels = 0; SMgrRelation *srels = NULL; + Oid *reloids = NULL; prev = NULL; for (pending = pendingDeletes; pending != NULL; pending = next) @@ -647,14 +680,18 @@ smgrDoPendingDeletes(bool isCommit) { maxrels = 8; srels = palloc(sizeof(SMgrRelation) * maxrels); + reloids = palloc(sizeof(Oid) * maxrels); } else if (maxrels <= nrels) { maxrels *= 2; srels = repalloc(srels, sizeof(SMgrRelation) * maxrels); + reloids = repalloc(reloids, sizeof(Oid) * maxrels); } - srels[nrels++] = srel; + srels[nrels] = srel; + reloids[nrels] = pending->temprelOid; + nrels++; } /* must explicitly free the list entry */ pfree(pending); @@ -664,12 +701,21 @@ smgrDoPendingDeletes(bool isCommit) if (nrels > 0) { + int i; + smgrdounlinkall(srels, nrels, false); - for (int i = 0; i < nrels; i++) + for (i = 0; i < nrels; i++) + { smgrclose(srels[i]); + /* Delete global temporary table info in localhash */ + if (gtt_storage_attached(reloids[i])) + forget_gtt_storage_info(reloids[i], srels[i]->smgr_rnode.node, isCommit); + } + pfree(srels); + pfree(reloids); } } diff --git a/src/backend/catalog/storage_gtt.c b/src/backend/catalog/storage_gtt.c new file mode 100644 index 0000000000..6aec275c73 --- /dev/null +++ b/src/backend/catalog/storage_gtt.c @@ -0,0 +1,1651 @@ +/*------------------------------------------------------------------------- + * + * storage_gtt.c + * The body implementation of Global Temparary table. + * + * IDENTIFICATION + * src/backend/catalog/storage_gtt.c + * + * See src/backend/catalog/GTT_README for Global temparary table's + * requirements and design. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/amapi.h" +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/multixact.h" +#include "access/table.h" +#include "access/relation.h" +#include "access/visibilitymap.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "access/xloginsert.h" +#include "access/xlogutils.h" +#include "catalog/storage.h" +#include "catalog/storage_xlog.h" +#include "catalog/storage_gtt.h" +#include "catalog/heap.h" +#include "catalog/namespace.h" +#include "catalog/index.h" +#include "catalog/pg_type.h" +#include "catalog/pg_statistic.h" +#include "commands/tablecmds.h" +#include "commands/sequence.h" +#include "funcapi.h" +#include "nodes/primnodes.h" +#include "nodes/pg_list.h" +#include "nodes/execnodes.h" +#include "miscadmin.h" +#include "storage/freespace.h" +#include "storage/smgr.h" +#include "storage/ipc.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "storage/sinvaladt.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/hsearch.h" +#include "utils/catcache.h" +#include "utils/lsyscache.h" +#include +#include "utils/inval.h" +#include "utils/guc.h" + + +/* Copy from bitmapset.c, because gtt used the function in bitmapset.c */ +#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) +#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) + +#define BITMAPSET_SIZE(nwords) \ + (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword)) + +static bool gtt_cleaner_exit_registered = false; +static HTAB *gtt_storage_local_hash = NULL; +static HTAB *active_gtt_shared_hash = NULL; +static MemoryContext gtt_info_context = NULL; + +/* relfrozenxid of all gtts in the current session */ +static List *gtt_session_relfrozenxid_list = NIL; +static TransactionId gtt_session_frozenxid = InvalidTransactionId; + +int vacuum_gtt_defer_check_age = 0; + +/* + * The Global temporary table's shared hash table data structure + */ +typedef struct gtt_ctl_data +{ + LWLock lock; + int max_entry; + int entry_size; +}gtt_ctl_data; + +static gtt_ctl_data *gtt_shared_ctl = NULL; + +typedef struct gtt_fnode +{ + Oid dbNode; + Oid relNode; +} gtt_fnode; + +/* record this global temporary table in which backends are being used */ +typedef struct +{ + gtt_fnode rnode; + Bitmapset *map; + /* bitmap data */ +} gtt_shared_hash_entry; + +/* + * The Global temporary table's local hash table data structure + */ +/* Record the storage information and statistical information of the global temporary table */ +typedef struct +{ + Oid relfilenode; + Oid spcnode; + + /* pg_class relstat */ + int32 relpages; + float4 reltuples; + int32 relallvisible; + TransactionId relfrozenxid; + TransactionId relminmxid; + + /* pg_statistic column stat */ + int natts; + int *attnum; + HeapTuple *att_stat_tups; +} gtt_relfilenode; + +typedef struct +{ + Oid relid; + + List *relfilenode_list; + + char relkind; + bool on_commit_delete; + + Oid oldrelid; /* remember the source of relid, before the switch relfilenode. */ +} gtt_local_hash_entry; + +static Size action_gtt_shared_hash_entry_size(void); +static void gtt_storage_checkin(Oid relid); +static void gtt_storage_checkout(Oid relid, bool skiplock, bool isCommit); +static void gtt_storage_removeall(int code, Datum arg); +static void insert_gtt_relfrozenxid_to_ordered_list(Oid relfrozenxid); +static void remove_gtt_relfrozenxid_from_ordered_list(Oid relfrozenxid); +static void set_gtt_session_relfrozenxid(void); +static void gtt_free_statistics(gtt_relfilenode *rnode); +static gtt_relfilenode *gtt_search_relfilenode(gtt_local_hash_entry *entry, Oid relfilenode, bool missing_ok); +static gtt_local_hash_entry *gtt_search_by_relid(Oid relid, bool missing_ok); +static Bitmapset *copy_active_gtt_bitmap(Oid relid); + +Datum pg_get_gtt_statistics(PG_FUNCTION_ARGS); +Datum pg_get_gtt_relstats(PG_FUNCTION_ARGS); +Datum pg_gtt_attached_pid(PG_FUNCTION_ARGS); +Datum pg_list_gtt_relfrozenxids(PG_FUNCTION_ARGS); + +/* + * Calculate shared hash table entry size for GTT. + */ +static Size +action_gtt_shared_hash_entry_size(void) +{ + int wordnum; + Size hash_entry_size = 0; + + if (max_active_gtt <= 0) + return 0; + + wordnum = WORDNUM(MaxBackends + 1); + /* hash entry header size */ + hash_entry_size += MAXALIGN(sizeof(gtt_shared_hash_entry)); + /* + * hash entry data size + * this is a bitmap in shared memory, each backend have a bit. + */ + hash_entry_size += MAXALIGN(BITMAPSET_SIZE(wordnum + 1)); + + return hash_entry_size; +} + +/* + * Calculate shared hash table max size for GTT. + */ +Size +active_gtt_shared_hash_size(void) +{ + Size size = 0; + Size hash_entry_size = 0; + + if (max_active_gtt <= 0) + return 0; + + /* shared hash header size */ + size = MAXALIGN(sizeof(gtt_ctl_data)); + /* hash entry size */ + hash_entry_size = action_gtt_shared_hash_entry_size(); + /* max size */ + size += hash_estimate_size(max_active_gtt, hash_entry_size); + + return size; +} + +/* + * Initialization shared hash table for GTT. + */ +void +active_gtt_shared_hash_init(void) +{ + HASHCTL info; + bool found; + + if (max_active_gtt <= 0) + return; + + gtt_shared_ctl = + ShmemInitStruct("gtt_shared_ctl", + sizeof(gtt_ctl_data), + &found); + + if (!found) + { + LWLockRegisterTranche(LWTRANCHE_GTT_CTL, "gtt_shared_ctl"); + LWLockInitialize(>t_shared_ctl->lock, LWTRANCHE_GTT_CTL); + gtt_shared_ctl->max_entry = max_active_gtt; + gtt_shared_ctl->entry_size = action_gtt_shared_hash_entry_size(); + } + + MemSet(&info, 0, sizeof(info)); + info.keysize = sizeof(gtt_fnode); + info.entrysize = action_gtt_shared_hash_entry_size(); + active_gtt_shared_hash = + ShmemInitHash("active gtt shared hash", + gtt_shared_ctl->max_entry, + gtt_shared_ctl->max_entry, + &info, HASH_ELEM | HASH_BLOBS | HASH_FIXED_SIZE); +} + +/* + * Record GTT relid to shared hash table, which means that current backend is using this GTT. + */ +static void +gtt_storage_checkin(Oid relid) +{ + gtt_shared_hash_entry *entry; + bool found; + gtt_fnode fnode; + + if (max_active_gtt <= 0) + return; + + fnode.dbNode = MyDatabaseId; + fnode.relNode = relid; + LWLockAcquire(>t_shared_ctl->lock, LW_EXCLUSIVE); + entry = hash_search(active_gtt_shared_hash, + (void *)&(fnode), HASH_ENTER_NULL, &found); + + if (entry == NULL) + { + LWLockRelease(>t_shared_ctl->lock); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"), + errhint("You might need to increase max_active_global_temporary_table."))); + } + + if (!found) + { + int wordnum; + + /* init bitmap */ + entry->map = (Bitmapset *)((char *)entry + MAXALIGN(sizeof(gtt_shared_hash_entry))); + wordnum = WORDNUM(MaxBackends + 1); + memset(entry->map, 0, BITMAPSET_SIZE(wordnum + 1)); + entry->map->nwords = wordnum + 1; + } + + /* record itself in bitmap */ + bms_add_member(entry->map, MyBackendId); + LWLockRelease(>t_shared_ctl->lock); +} + +/* + * Remove the GTT relid record from the shared hash table which means that current backend is + * not use this GTT. + */ +static void +gtt_storage_checkout(Oid relid, bool skiplock, bool isCommit) +{ + gtt_shared_hash_entry *entry; + gtt_fnode fnode; + + if (max_active_gtt <= 0) + return; + + fnode.dbNode = MyDatabaseId; + fnode.relNode = relid; + if (!skiplock) + LWLockAcquire(>t_shared_ctl->lock, LW_EXCLUSIVE); + + entry = hash_search(active_gtt_shared_hash, + (void *) &(fnode), HASH_FIND, NULL); + + if (entry == NULL) + { + if (!skiplock) + LWLockRelease(>t_shared_ctl->lock); + + if (isCommit) + elog(WARNING, "relid %u not exist in gtt shared hash when forget", relid); + + return; + } + + Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends); + + /* remove itself from bitmap */ + bms_del_member(entry->map, MyBackendId); + + if (bms_is_empty(entry->map)) + { + if (!hash_search(active_gtt_shared_hash, &fnode, HASH_REMOVE, NULL)) + elog(PANIC, "gtt shared hash table corrupted"); + } + + if (!skiplock) + LWLockRelease(>t_shared_ctl->lock); + + return; +} + +/* + * Gets usage information for a GTT from shared hash table. + * The information is in the form of bitmap. + * Quickly copy the entire bitmap from shared memory and return it. + * that to avoid holding locks for a long time. + */ +static Bitmapset * +copy_active_gtt_bitmap(Oid relid) +{ + gtt_shared_hash_entry *entry; + Bitmapset *map_copy = NULL; + gtt_fnode fnode; + + if (max_active_gtt <= 0) + return NULL; + + fnode.dbNode = MyDatabaseId; + fnode.relNode = relid; + LWLockAcquire(>t_shared_ctl->lock, LW_SHARED); + entry = hash_search(active_gtt_shared_hash, + (void *) &(fnode), HASH_FIND, NULL); + + if (entry == NULL) + { + LWLockRelease(>t_shared_ctl->lock); + return NULL; + } + + Assert(entry->map); + + /* copy the entire bitmap */ + if (!bms_is_empty(entry->map)) + map_copy = bms_copy(entry->map); + + LWLockRelease(>t_shared_ctl->lock); + + return map_copy; +} + +/* + * Check if there are other backends using this GTT besides the current backend. + */ +bool +is_other_backend_use_gtt(Oid relid) +{ + gtt_shared_hash_entry *entry; + bool in_use = false; + int num_use = 0; + gtt_fnode fnode; + + if (max_active_gtt <= 0) + return false; + + fnode.dbNode = MyDatabaseId; + fnode.relNode = relid; + LWLockAcquire(>t_shared_ctl->lock, LW_SHARED); + entry = hash_search(active_gtt_shared_hash, + (void *) &(fnode), HASH_FIND, NULL); + + if (entry == NULL) + { + LWLockRelease(>t_shared_ctl->lock); + return false; + } + + Assert(entry->map); + Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends); + + /* how many backend are using this GTT */ + num_use = bms_num_members(entry->map); + if (num_use == 0) + in_use = false; + else if (num_use == 1) + { + /* check if this is itself */ + if(bms_is_member(MyBackendId, entry->map)) + in_use = false; + else + in_use = true; + } + else + in_use = true; + + LWLockRelease(>t_shared_ctl->lock); + + return in_use; +} + +/* + * Record GTT information to local hash. + * They include GTT storage info, transaction info and statistical info. + */ +void +remember_gtt_storage_info(RelFileNode rnode, Relation rel) +{ + gtt_local_hash_entry *entry; + MemoryContext oldcontext; + gtt_relfilenode *new_node = NULL; + Oid relid = RelationGetRelid(rel); + int natts = 0; + + if (max_active_gtt <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Global temporary table feature is disable"), + errhint("You might need to increase max_active_global_temporary_table to enable this feature."))); + + if (RecoveryInProgress()) + elog(ERROR, "readonly mode not support access global temporary table"); + + if (rel->rd_rel->relkind == RELKIND_INDEX && + rel->rd_index && + (!rel->rd_index->indisvalid || + !rel->rd_index->indisready || + !rel->rd_index->indislive)) + elog(ERROR, "invalid gtt index %s not allow to create storage", RelationGetRelationName(rel)); + + /* First time through: initialize the hash table */ + if (gtt_storage_local_hash == NULL) + { +#define GTT_LOCAL_HASH_SIZE 1024 + HASHCTL ctl; + + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + + gtt_info_context = + AllocSetContextCreate(CacheMemoryContext, + "gtt info context", + ALLOCSET_DEFAULT_SIZES); + + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(gtt_local_hash_entry); + ctl.hcxt = gtt_info_context; + gtt_storage_local_hash = + hash_create("global temporary table info", + GTT_LOCAL_HASH_SIZE, + &ctl, HASH_ELEM | HASH_BLOBS); + } + + Assert(CacheMemoryContext); + Assert(gtt_info_context); + oldcontext = MemoryContextSwitchTo(gtt_info_context); + + entry = gtt_search_by_relid(relid, true); + if (!entry) + { + bool found = false; + + /* Look up or create an entry */ + entry = hash_search(gtt_storage_local_hash, + (void *) &relid, HASH_ENTER, &found); + + if (found) + { + MemoryContextSwitchTo(oldcontext); + elog(ERROR, "backend %d relid %u already exists in gtt local hash", + MyBackendId, relid); + } + + entry->relfilenode_list = NIL; + entry->relkind = rel->rd_rel->relkind; + entry->on_commit_delete = false; + entry->oldrelid = InvalidOid; + + if (entry->relkind == RELKIND_RELATION) + { + /* record the on commit clause */ + if (RELATION_GTT_ON_COMMIT_DELETE(rel)) + { + entry->on_commit_delete = true; + register_on_commit_action(RelationGetRelid(rel), ONCOMMIT_DELETE_ROWS); + } + } + + if (entry->relkind == RELKIND_RELATION || + entry->relkind == RELKIND_SEQUENCE) + { + gtt_storage_checkin(relid); + } + } + + /* record storage info relstat columnstats and transaction info to relfilenode list */ + new_node = palloc0(sizeof(gtt_relfilenode)); + new_node->relfilenode = rnode.relNode; + new_node->spcnode = rnode.spcNode; + new_node->relpages = 0; + new_node->reltuples = 0; + new_node->relallvisible = 0; + new_node->relfrozenxid = InvalidTransactionId; + new_node->relminmxid = InvalidMultiXactId; + new_node->natts = 0; + new_node->attnum = NULL; + new_node->att_stat_tups = NULL; + entry->relfilenode_list = lappend(entry->relfilenode_list, new_node); + + /* init column stats structure */ + natts = RelationGetNumberOfAttributes(rel); + new_node->attnum = palloc0(sizeof(int) * natts); + new_node->att_stat_tups = palloc0(sizeof(HeapTuple) * natts); + new_node->natts = natts; + + /* only heap have transaction info */ + if (entry->relkind == RELKIND_RELATION) + { + new_node->relfrozenxid = RecentXmin; + new_node->relminmxid = GetOldestMultiXactId(); + + /**/ + insert_gtt_relfrozenxid_to_ordered_list(new_node->relfrozenxid); + set_gtt_session_relfrozenxid(); + } + + MemoryContextSwitchTo(oldcontext); + + /* Registration callbacks are used to trigger cleanup during process exit */ + if (!gtt_cleaner_exit_registered) + { + before_shmem_exit(gtt_storage_removeall, 0); + gtt_cleaner_exit_registered = true; + } + + return; +} + +/* + * Remove GTT information from local hash when transaction commit/rollback. + */ +void +forget_gtt_storage_info(Oid relid, RelFileNode rnode, bool isCommit) +{ + gtt_local_hash_entry *entry = NULL; + gtt_relfilenode *d_rnode = NULL; + + if (max_active_gtt <= 0) + return; + + entry = gtt_search_by_relid(relid, true); + if (entry == NULL) + { + if (isCommit) + elog(ERROR,"gtt rel %u not found in local hash", relid); + + return; + } + + d_rnode = gtt_search_relfilenode(entry, rnode.relNode, true); + if (d_rnode == NULL) + { + if (isCommit) + elog(ERROR,"gtt relfilenode %u not found in rel %u", rnode.relNode, relid); + else if (entry->oldrelid != InvalidOid) + { + gtt_local_hash_entry *entry2 = NULL; + gtt_relfilenode *gttnode2 = NULL; + + /* + * For cluster GTT rollback. + * We need to roll back the exchange relfilenode operation. + */ + entry2 = gtt_search_by_relid(entry->oldrelid, false); + gttnode2 = gtt_search_relfilenode(entry2, rnode.relNode, false); + Assert(gttnode2->relfilenode == rnode.relNode); + Assert(list_length(entry->relfilenode_list) == 1); + /* rollback switch relfilenode */ + gtt_switch_rel_relfilenode(entry2->relid, gttnode2->relfilenode, + entry->relid, gtt_fetch_current_relfilenode(entry->relid), + false); + /* clean up footprint */ + entry2->oldrelid = InvalidOid; + + /* temp relfilenode need free */ + d_rnode = gtt_search_relfilenode(entry, rnode.relNode, false); + Assert(d_rnode); + } + else + { + /* rollback transaction */ + if (entry->relfilenode_list == NIL) + { + if (entry->relkind == RELKIND_RELATION || + entry->relkind == RELKIND_SEQUENCE) + gtt_storage_checkout(relid, false, isCommit); + + hash_search(gtt_storage_local_hash, + (void *) &(relid), HASH_REMOVE, NULL); + } + + return; + } + } + + /* Clean up transaction info from Local order list and MyProc */ + if (entry->relkind == RELKIND_RELATION) + { + Assert(TransactionIdIsNormal(d_rnode->relfrozenxid) || !isCommit); + + /* this is valid relfrozenxid */ + if (TransactionIdIsValid(d_rnode->relfrozenxid)) + { + remove_gtt_relfrozenxid_from_ordered_list(d_rnode->relfrozenxid); + set_gtt_session_relfrozenxid(); + } + } + + /* delete relfilenode from rel entry */ + entry->relfilenode_list = list_delete_ptr(entry->relfilenode_list, d_rnode); + gtt_free_statistics(d_rnode); + + if (entry->relfilenode_list == NIL) + { + /* this means we truncate this GTT at current backend */ + + /* tell shared hash that current backend will no longer use this GTT */ + if (entry->relkind == RELKIND_RELATION || + entry->relkind == RELKIND_SEQUENCE) + gtt_storage_checkout(relid, false, isCommit); + + if (isCommit && entry->oldrelid != InvalidOid) + { + gtt_local_hash_entry *entry2 = NULL; + + /* commit transaction at cluster GTT, need clean up footprint */ + entry2 = gtt_search_by_relid(entry->oldrelid, false); + entry2->oldrelid = InvalidOid; + } + + hash_search(gtt_storage_local_hash, + (void *) &(relid), HASH_REMOVE, NULL); + } + + return; +} + +/* + * Check if current backend is using this GTT. + */ +bool +gtt_storage_attached(Oid relid) +{ + bool found = false; + gtt_local_hash_entry *entry = NULL; + + if (max_active_gtt <= 0) + return false; + + if (!OidIsValid(relid)) + return false; + + entry = gtt_search_by_relid(relid, true); + if (entry) + found = true; + + return found; +} + +/* + * When backend exit, bulk cleaning all GTT storage and local buffer of this backend. + */ +static void +gtt_storage_removeall(int code, Datum arg) +{ + HASH_SEQ_STATUS status; + gtt_local_hash_entry *entry; + SMgrRelation *srels = NULL; + Oid *relids = NULL; + char *relkinds = NULL; + int nrels = 0, + nfiles = 0, + maxrels = 0, + maxfiles = 0, + i = 0; + + if (gtt_storage_local_hash == NULL) + return; + + /* Search all relfilenode for GTT in current backend */ + hash_seq_init(&status, gtt_storage_local_hash); + while ((entry = (gtt_local_hash_entry *) hash_seq_search(&status)) != NULL) + { + ListCell *lc; + + foreach(lc, entry->relfilenode_list) + { + SMgrRelation srel; + RelFileNode rnode; + gtt_relfilenode *gtt_rnode = lfirst(lc); + + rnode.spcNode = gtt_rnode->spcnode; + rnode.dbNode = MyDatabaseId; + rnode.relNode = gtt_rnode->relfilenode; + srel = smgropen(rnode, MyBackendId); + + if (maxfiles == 0) + { + maxfiles = 32; + srels = palloc(sizeof(SMgrRelation) * maxfiles); + } + else if (maxfiles <= nfiles) + { + maxfiles *= 2; + srels = repalloc(srels, sizeof(SMgrRelation) * maxfiles); + } + + srels[nfiles++] = srel; + } + + if (maxrels == 0) + { + maxrels = 32; + relids = palloc(sizeof(Oid) * maxrels); + relkinds = palloc(sizeof(char) * maxrels); + } + else if (maxrels <= nrels) + { + maxrels *= 2; + relids = repalloc(relids , sizeof(Oid) * maxrels); + relkinds = repalloc(relkinds, sizeof(char) * maxrels); + } + + relkinds[nrels] = entry->relkind; + relids[nrels] = entry->relid; + nrels++; + } + + /* drop local buffer and storage */ + if (nfiles > 0) + { + /* Need to ensure we have a usable transaction. */ + AbortOutOfAnyTransaction(); + + smgrdounlinkall(srels, nfiles, false); + for (i = 0; i < nfiles; i++) + smgrclose(srels[i]); + + pfree(srels); + } + + if (nrels) + { + LWLockAcquire(>t_shared_ctl->lock, LW_EXCLUSIVE); + for (i = 0; i < nrels; i++) + { + /* tell shared hash */ + if (relkinds[i] == RELKIND_RELATION || + relkinds[i] == RELKIND_SEQUENCE) + gtt_storage_checkout(relids[i], true, false); + } + LWLockRelease(>t_shared_ctl->lock); + + pfree(relids); + pfree(relkinds); + } + + /* set to global area */ + MyProc->backend_gtt_frozenxid = InvalidTransactionId; + + return; +} + +/* + * Update GTT relstats(relpage/reltuple/relallvisible) + * to local hash. + */ +void +up_gtt_relstats(Oid relid, + BlockNumber num_pages, + double num_tuples, + BlockNumber num_all_visible_pages, + TransactionId relfrozenxid, + TransactionId relminmxid) +{ + gtt_local_hash_entry *entry; + gtt_relfilenode *gtt_rnode = NULL; + + if (max_active_gtt <= 0) + return; + + if (!OidIsValid(relid)) + return; + + entry = gtt_search_by_relid(relid, true); + if (entry == NULL) + return; + + gtt_rnode = lfirst(list_tail(entry->relfilenode_list)); + if (gtt_rnode == NULL) + return; + + if (num_pages > 0 && + gtt_rnode->relpages != (int32)num_pages) + gtt_rnode->relpages = (int32)num_pages; + + if (num_tuples > 0 && + gtt_rnode->reltuples != (float4)num_tuples) + gtt_rnode->reltuples = (float4)num_tuples; + + /* only heap contain transaction information and relallvisible */ + if (entry->relkind == RELKIND_RELATION) + { + if (num_all_visible_pages > 0 && + gtt_rnode->relallvisible != (int32)num_all_visible_pages) + { + gtt_rnode->relallvisible = (int32)num_all_visible_pages; + } + + if (TransactionIdIsNormal(relfrozenxid) && + gtt_rnode->relfrozenxid != relfrozenxid && + (TransactionIdPrecedes(gtt_rnode->relfrozenxid, relfrozenxid) || + TransactionIdPrecedes(ReadNextTransactionId(), gtt_rnode->relfrozenxid))) + { + /* set to local order list */ + remove_gtt_relfrozenxid_from_ordered_list(gtt_rnode->relfrozenxid); + gtt_rnode->relfrozenxid = relfrozenxid; + insert_gtt_relfrozenxid_to_ordered_list(relfrozenxid); + /* set to global area */ + set_gtt_session_relfrozenxid(); + } + + if (MultiXactIdIsValid(relminmxid) && + gtt_rnode->relminmxid != relminmxid && + (MultiXactIdPrecedes(gtt_rnode->relminmxid, relminmxid) || + MultiXactIdPrecedes(ReadNextMultiXactId(), gtt_rnode->relminmxid))) + { + gtt_rnode->relminmxid = relminmxid; + } + } + + return; +} + +/* + * Search GTT relstats(relpage/reltuple/relallvisible) + * from local has. + */ +bool +get_gtt_relstats(Oid relid, BlockNumber *relpages, double *reltuples, + BlockNumber *relallvisible, TransactionId *relfrozenxid, + TransactionId *relminmxid) +{ + gtt_local_hash_entry *entry; + gtt_relfilenode *gtt_rnode = NULL; + + if (max_active_gtt <= 0) + return false; + + entry = gtt_search_by_relid(relid, true); + if (entry == NULL) + return false; + + Assert(entry->relid == relid); + + gtt_rnode = lfirst(list_tail(entry->relfilenode_list)); + if (gtt_rnode == NULL) + return false; + + if (relpages) + *relpages = gtt_rnode->relpages; + + if (reltuples) + *reltuples = gtt_rnode->reltuples; + + if (relallvisible) + *relallvisible = gtt_rnode->relallvisible; + + if (relfrozenxid) + *relfrozenxid = gtt_rnode->relfrozenxid; + + if (relminmxid) + *relminmxid = gtt_rnode->relminmxid; + + return true; +} + +/* + * Update GTT info(definition is same as pg_statistic) + * to local hash. + */ +void +up_gtt_att_statistic(Oid reloid, int attnum, bool inh, int natts, + TupleDesc tupleDescriptor, Datum *values, bool *isnull) +{ + gtt_local_hash_entry *entry; + MemoryContext oldcontext; + int i = 0; + gtt_relfilenode *gtt_rnode = NULL; + + if (max_active_gtt <= 0) + return; + + entry = gtt_search_by_relid(reloid, true); + if (entry == NULL) + return; + + Assert(entry->relid == reloid); + + gtt_rnode = lfirst(list_tail(entry->relfilenode_list)); + if (gtt_rnode == NULL) + return; + + if (gtt_rnode->natts < natts) + { + elog(WARNING, "reloid %u not support update attstat after add colunm", reloid); + return; + } + + /* switch context to gtt_info_context for store tuple at heap_form_tuple */ + oldcontext = MemoryContextSwitchTo(gtt_info_context); + + for (i = 0; i < gtt_rnode->natts; i++) + { + if (gtt_rnode->attnum[i] == 0) + { + gtt_rnode->attnum[i] = attnum; + break; + } + else if (gtt_rnode->attnum[i] == attnum) + { + Assert(gtt_rnode->att_stat_tups[i]); + heap_freetuple(gtt_rnode->att_stat_tups[i]); + gtt_rnode->att_stat_tups[i] = NULL; + break; + } + } + + Assert(i < gtt_rnode->natts); + Assert(gtt_rnode->att_stat_tups[i] == NULL); + gtt_rnode->att_stat_tups[i] = heap_form_tuple(tupleDescriptor, values, isnull); + + MemoryContextSwitchTo(oldcontext); + + return; +} + +/* + * Search GTT statistic info(definition is same as pg_statistic) + * from local hash. + */ +HeapTuple +get_gtt_att_statistic(Oid reloid, int attnum, bool inh) +{ + gtt_local_hash_entry *entry; + int i = 0; + gtt_relfilenode *gtt_rnode = NULL; + + if (max_active_gtt <= 0) + return NULL; + + entry = gtt_search_by_relid(reloid, true); + if (entry == NULL) + return NULL; + + gtt_rnode = lfirst(list_tail(entry->relfilenode_list)); + if (gtt_rnode == NULL) + return NULL; + + for (i = 0; i < gtt_rnode->natts; i++) + { + if (gtt_rnode->attnum[i] == attnum) + { + Assert(gtt_rnode->att_stat_tups[i]); + return gtt_rnode->att_stat_tups[i]; + } + } + + return NULL; +} + +void +release_gtt_statistic_cache(HeapTuple tup) +{ + /* do nothing */ + return; +} + +/* + * Maintain a order relfrozenxid list of backend Level for GTT. + * Insert a RelfrozenXID into the list and keep the list in order. + */ +static void +insert_gtt_relfrozenxid_to_ordered_list(Oid relfrozenxid) +{ + MemoryContext oldcontext; + ListCell *cell; + int i; + + Assert(TransactionIdIsNormal(relfrozenxid)); + + oldcontext = MemoryContextSwitchTo(gtt_info_context); + + /* Does the datum belong at the front? */ + if (gtt_session_relfrozenxid_list == NIL || + TransactionIdFollowsOrEquals(relfrozenxid, + linitial_oid(gtt_session_relfrozenxid_list))) + { + gtt_session_relfrozenxid_list = + lcons_oid(relfrozenxid, gtt_session_relfrozenxid_list); + MemoryContextSwitchTo(oldcontext); + + return; + } + + /* No, so find the entry it belongs after */ + i = 0; + foreach (cell, gtt_session_relfrozenxid_list) + { + if (TransactionIdFollowsOrEquals(relfrozenxid, lfirst_oid(cell))) + break; + + i++; + } + gtt_session_relfrozenxid_list = + list_insert_nth_oid(gtt_session_relfrozenxid_list, i, relfrozenxid); + + MemoryContextSwitchTo(oldcontext); + + return; +} + +/* + * Maintain a order relfrozenxid list of backend Level for GTT. + * Remove a RelfrozenXID from order list gtt_session_relfrozenxid_list. + */ +static void +remove_gtt_relfrozenxid_from_ordered_list(Oid relfrozenxid) +{ + gtt_session_relfrozenxid_list = + list_delete_oid(gtt_session_relfrozenxid_list, relfrozenxid); +} + +/* + * Update of backend Level oldest relfrozenxid to MyProc. + * This makes each backend's oldest RelFrozenxID globally visible. + */ +static void +set_gtt_session_relfrozenxid(void) +{ + TransactionId gtt_frozenxid = InvalidTransactionId; + + if (gtt_session_relfrozenxid_list) + gtt_frozenxid = llast_oid(gtt_session_relfrozenxid_list); + + gtt_session_frozenxid = gtt_frozenxid; + if (MyProc->backend_gtt_frozenxid != gtt_frozenxid) + MyProc->backend_gtt_frozenxid = gtt_frozenxid; +} + +/* + * Get GTT column level data statistics. + */ +Datum +pg_get_gtt_statistics(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Tuplestorestate *tupstore; + HeapTuple tuple; + Relation rel = NULL; + Oid reloid = PG_GETARG_OID(0); + int attnum = PG_GETARG_INT32(1); + char rel_persistence; + TupleDesc tupdesc; + MemoryContext oldcontext; + Relation pg_tatistic = NULL; + TupleDesc sd; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + oldcontext = MemoryContextSwitchTo( + rsinfo->econtext->ecxt_per_query_memory); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + MemoryContextSwitchTo(oldcontext); + + rel = relation_open(reloid, AccessShareLock); + rel_persistence = get_rel_persistence(reloid); + if (rel_persistence != RELPERSISTENCE_GLOBAL_TEMP) + { + elog(WARNING, "relation OID %u is not a global temporary table", reloid); + relation_close(rel, NoLock); + return (Datum) 0; + } + + pg_tatistic = relation_open(StatisticRelationId, AccessShareLock); + sd = RelationGetDescr(pg_tatistic); + + /* get data from local hash */ + tuple = get_gtt_att_statistic(reloid, attnum, false); + if (tuple) + { + Datum values[31]; + bool isnull[31]; + HeapTuple res = NULL; + + memset(&values, 0, sizeof(values)); + memset(&isnull, 0, sizeof(isnull)); + heap_deform_tuple(tuple, sd, values, isnull); + res = heap_form_tuple(tupdesc, values, isnull); + tuplestore_puttuple(tupstore, res); + } + tuplestore_donestoring(tupstore); + + relation_close(rel, NoLock); + relation_close(pg_tatistic, AccessShareLock); + + return (Datum) 0; +} + +/* + * Get GTT table level data statistics. + */ +Datum +pg_get_gtt_relstats(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Tuplestorestate *tupstore; + TupleDesc tupdesc; + MemoryContext oldcontext; + HeapTuple tuple; + Oid reloid = PG_GETARG_OID(0); + Oid relnode = 0; + char rel_persistence; + BlockNumber relpages = 0; + BlockNumber relallvisible = 0; + uint32 relfrozenxid = 0; + uint32 relminmxid = 0; + double reltuples = 0; + Relation rel = NULL; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + oldcontext = MemoryContextSwitchTo( + rsinfo->econtext->ecxt_per_query_memory); + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + MemoryContextSwitchTo(oldcontext); + + rel = relation_open(reloid, AccessShareLock); + rel_persistence = get_rel_persistence(reloid); + if (rel_persistence != RELPERSISTENCE_GLOBAL_TEMP) + { + elog(WARNING, "relation OID %u is not a global temporary table", reloid); + relation_close(rel, NoLock); + return (Datum) 0; + } + + get_gtt_relstats(reloid, + &relpages, &reltuples, &relallvisible, + &relfrozenxid, &relminmxid); + relnode = gtt_fetch_current_relfilenode(reloid); + if (relnode != InvalidOid) + { + Datum values[6]; + bool isnull[6]; + + memset(isnull, 0, sizeof(isnull)); + memset(values, 0, sizeof(values)); + values[0] = UInt32GetDatum(relnode); + values[1] = Int32GetDatum(relpages); + values[2] = Float4GetDatum((float4)reltuples); + values[3] = Int32GetDatum(relallvisible); + values[4] = UInt32GetDatum(relfrozenxid); + values[5] = UInt32GetDatum(relminmxid); + tuple = heap_form_tuple(tupdesc, values, isnull); + tuplestore_puttuple(tupstore, tuple); + } + tuplestore_donestoring(tupstore); + + relation_close(rel, NoLock); + + return (Datum) 0; +} + +/* + * Get a list of backend pids that are currently using this GTT. + */ +Datum +pg_gtt_attached_pid(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + PGPROC *proc = NULL; + Bitmapset *map = NULL; + Tuplestorestate *tupstore; + TupleDesc tupdesc; + MemoryContext oldcontext; + HeapTuple tuple; + Oid reloid = PG_GETARG_OID(0); + char rel_persistence; + Relation rel = NULL; + pid_t pid = 0; + int backendid = 0; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + oldcontext = MemoryContextSwitchTo( + rsinfo->econtext->ecxt_per_query_memory); + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + MemoryContextSwitchTo(oldcontext); + + rel = relation_open(reloid, AccessShareLock); + rel_persistence = get_rel_persistence(reloid); + if (rel_persistence != RELPERSISTENCE_GLOBAL_TEMP) + { + elog(WARNING, "relation OID %u is not a global temporary table", reloid); + relation_close(rel, NoLock); + return (Datum) 0; + } + + /* get data from share hash */ + map = copy_active_gtt_bitmap(reloid); + if (map) + { + backendid = bms_first_member(map); + + do + { + /* backendid map to process pid */ + proc = BackendIdGetProc(backendid); + pid = proc->pid; + if (pid > 0) + { + Datum values[2]; + bool isnull[2]; + + memset(isnull, false, sizeof(isnull)); + memset(values, 0, sizeof(values)); + values[0] = UInt32GetDatum(reloid); + values[1] = Int32GetDatum(pid); + tuple = heap_form_tuple(tupdesc, values, isnull); + tuplestore_puttuple(tupstore, tuple); + } + backendid = bms_next_member(map, backendid); + } while (backendid > 0); + + pfree(map); + } + + tuplestore_donestoring(tupstore); + relation_close(rel, NoLock); + + return (Datum) 0; +} + +/* + * Get backend level oldest relfrozenxid of each backend using GTT in current database. + */ +Datum +pg_list_gtt_relfrozenxids(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Tuplestorestate *tupstore; + int *pids = NULL; + uint32 *xids = NULL; + TupleDesc tupdesc; + MemoryContext oldcontext; + HeapTuple tuple; + int num_xid = MaxBackends + 1; + int i = 0; + int j = 0; + uint32 oldest = 0; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + oldcontext = MemoryContextSwitchTo( + rsinfo->econtext->ecxt_per_query_memory); + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + MemoryContextSwitchTo(oldcontext); + + if (max_active_gtt <= 0) + return (Datum) 0; + + if (RecoveryInProgress()) + return (Datum) 0; + + pids = palloc0(sizeof(int) * num_xid); + xids = palloc0(sizeof(int) * num_xid); + + /* Get backend level oldest relfrozenxid in all backend that in MyDatabaseId use GTT */ + oldest = list_all_backend_gtt_frozenxids(num_xid, pids, xids, &i); + if (i > 0) + { + if (i > 0) + { + pids[i] = 0; + xids[i] = oldest; + i++; + } + + for(j = 0; j < i; j++) + { + Datum values[2]; + bool isnull[2]; + + memset(isnull, false, sizeof(isnull)); + memset(values, 0, sizeof(values)); + values[0] = Int32GetDatum(pids[j]); + values[1] = UInt32GetDatum(xids[j]); + tuple = heap_form_tuple(tupdesc, values, isnull); + tuplestore_puttuple(tupstore, tuple); + } + } + tuplestore_donestoring(tupstore); + pfree(pids); + pfree(xids); + + return (Datum) 0; +} + +/* + * In order to build the GTT index, force enable GTT'index. + */ +void +force_enable_gtt_index(Relation index) +{ + if (!RELATION_IS_GLOBAL_TEMP(index)) + return; + + Assert(index->rd_rel->relkind == RELKIND_INDEX); + Assert(OidIsValid(RelationGetRelid(index))); + + index->rd_index->indisvalid = true; + index->rd_index->indislive = true; + index->rd_index->indisready = true; +} + +/* + * Fix the local state of the GTT's index. + */ +void +gtt_fix_index_backend_state(Relation index) +{ + Oid indexOid = RelationGetRelid(index); + Oid heapOid = index->rd_index->indrelid; + + /* Must be GTT */ + if (!RELATION_IS_GLOBAL_TEMP(index)) + return; + + if (!index->rd_index->indisvalid) + return; + + /* + * If this GTT is not initialized in the current backend, + * its index status is temporarily set to invalid(local relcache). + */ + if (gtt_storage_attached(heapOid) && + !gtt_storage_attached(indexOid)) + { + index->rd_index->indisvalid = false; + index->rd_index->indislive = false; + index->rd_index->indisready = false; + } + + return; +} + +/* + * During the SQL initialization of the executor (InitPlan) + * Initialize storage of GTT GTT'indexes and build empty index. + */ +void +init_gtt_storage(CmdType operation, ResultRelInfo *resultRelInfo) +{ + Relation relation = resultRelInfo->ri_RelationDesc; + int i; + Oid toastrelid; + + if (!(operation == CMD_UTILITY || operation == CMD_INSERT)) + return; + + if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) + return; + + if (!RELATION_IS_GLOBAL_TEMP(relation)) + return; + + /* Each GTT is initialized once in each backend */ + if (gtt_storage_attached(RelationGetRelid(relation))) + return; + + /* init heap storage */ + RelationCreateStorage(relation->rd_node, RELPERSISTENCE_GLOBAL_TEMP, relation); + + for (i = 0; i < resultRelInfo->ri_NumIndices; i++) + { + Relation index = resultRelInfo->ri_IndexRelationDescs[i]; + IndexInfo *info = resultRelInfo->ri_IndexRelationInfo[i]; + + Assert(index->rd_index->indisvalid); + Assert(index->rd_index->indislive); + Assert(index->rd_index->indisready); + + index_build(relation, index, info, true, false); + } + + toastrelid = relation->rd_rel->reltoastrelid; + if (OidIsValid(toastrelid)) + { + Relation toastrel; + ListCell *indlist; + + toastrel = table_open(toastrelid, RowExclusiveLock); + + /* init index storage */ + RelationCreateStorage(toastrel->rd_node, RELPERSISTENCE_GLOBAL_TEMP, toastrel); + + foreach(indlist, RelationGetIndexList(toastrel)) + { + Oid indexId = lfirst_oid(indlist); + Relation currentIndex; + IndexInfo *indexInfo; + + currentIndex = index_open(indexId, RowExclusiveLock); + /* build empty index */ + indexInfo = BuildDummyIndexInfo(currentIndex); + index_build(toastrel, currentIndex, indexInfo, true, false); + index_close(currentIndex, NoLock); + } + + table_close(toastrel, NoLock); + } + + return; +} + +/* + * Release the data structure memory used to store GTT storage info. + */ +static void +gtt_free_statistics(gtt_relfilenode *rnode) +{ + int i; + + Assert(rnode); + + for (i = 0; i < rnode->natts; i++) + { + if (rnode->att_stat_tups[i]) + { + heap_freetuple(rnode->att_stat_tups[i]); + rnode->att_stat_tups[i] = NULL; + } + } + + if (rnode->attnum) + pfree(rnode->attnum); + + if (rnode->att_stat_tups) + pfree(rnode->att_stat_tups); + + pfree(rnode); + + return; +} + +/* + * Get the current relfilenode of this GTT. + */ +Oid +gtt_fetch_current_relfilenode(Oid relid) +{ + gtt_local_hash_entry *entry; + gtt_relfilenode *gtt_rnode = NULL; + + if (max_active_gtt <= 0) + return InvalidOid; + + entry = gtt_search_by_relid(relid, true); + if (entry == NULL) + return InvalidOid; + + Assert(entry->relid == relid); + + gtt_rnode = lfirst(list_tail(entry->relfilenode_list)); + if (gtt_rnode == NULL) + return InvalidOid; + + return gtt_rnode->relfilenode; +} + +/* + * For cluster GTT. + * Exchange new and old relfilenode, leave footprints ensure rollback capability. + */ +void +gtt_switch_rel_relfilenode(Oid rel1, Oid relfilenode1, Oid rel2, Oid relfilenode2, bool footprint) +{ + gtt_local_hash_entry *entry1; + gtt_local_hash_entry *entry2; + gtt_relfilenode *gtt_rnode1 = NULL; + gtt_relfilenode *gtt_rnode2 = NULL; + MemoryContext oldcontext; + + if (max_active_gtt <= 0) + return; + + if (gtt_storage_local_hash == NULL) + return; + + entry1 = gtt_search_by_relid(rel1, false); + gtt_rnode1 = gtt_search_relfilenode(entry1, relfilenode1, false); + + entry2 = gtt_search_by_relid(rel2, false); + gtt_rnode2 = gtt_search_relfilenode(entry2, relfilenode2, false); + + oldcontext = MemoryContextSwitchTo(gtt_info_context); + entry1->relfilenode_list = list_delete_ptr(entry1->relfilenode_list, gtt_rnode1); + entry2->relfilenode_list = lappend(entry2->relfilenode_list, gtt_rnode1); + + entry2->relfilenode_list = list_delete_ptr(entry2->relfilenode_list, gtt_rnode2); + entry1->relfilenode_list = lappend(entry1->relfilenode_list, gtt_rnode2); + MemoryContextSwitchTo(oldcontext); + + if (footprint) + { + entry1->oldrelid = rel2; + entry2->oldrelid = rel1; + } + + return; +} + +/* + * Get a relfilenode used by this GTT during the transaction life cycle. + */ +static gtt_relfilenode * +gtt_search_relfilenode(gtt_local_hash_entry *entry, Oid relfilenode, bool missing_ok) +{ + gtt_relfilenode *rnode = NULL; + ListCell *lc; + + Assert(entry); + + foreach(lc, entry->relfilenode_list) + { + gtt_relfilenode *gtt_rnode = lfirst(lc); + if (gtt_rnode->relfilenode == relfilenode) + { + rnode = gtt_rnode; + break; + } + } + + if (!missing_ok && rnode == NULL) + elog(ERROR, "find relfilenode %u relfilenodelist from relid %u fail", relfilenode, entry->relid); + + return rnode; +} + +/* + * Get one GTT info from local hash. + */ +static gtt_local_hash_entry * +gtt_search_by_relid(Oid relid, bool missing_ok) +{ + gtt_local_hash_entry *entry = NULL; + + if (gtt_storage_local_hash == NULL) + return NULL; + + entry = hash_search(gtt_storage_local_hash, + (void *) &(relid), HASH_FIND, NULL); + + if (entry == NULL && !missing_ok) + elog(ERROR, "relid %u not found in local hash", relid); + + return entry; +} + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 55f6e3711d..fc75533263 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -186,6 +186,91 @@ CREATE VIEW pg_sequences AS WHERE NOT pg_is_other_temp_schema(N.oid) AND relkind = 'S'; +-- For global temporary table +CREATE VIEW pg_gtt_relstats WITH (security_barrier) AS + SELECT n.nspname AS schemaname, + c.relname AS tablename, + s.* + FROM + pg_class c + LEFT JOIN pg_namespace n ON n.oid = c.relnamespace, + pg_get_gtt_relstats(c.oid) as s + WHERE c.relpersistence='g' AND c.relkind in('r','p','i','t') AND (c.relrowsecurity = false OR NOT row_security_active(c.oid)); + +CREATE VIEW pg_gtt_attached_pids WITH (security_barrier) AS + SELECT n.nspname AS schemaname, + c.relname AS tablename, + s.* + FROM + pg_class c + LEFT JOIN pg_namespace n ON n.oid = c.relnamespace, + pg_gtt_attached_pid(c.oid) as s + WHERE c.relpersistence='g' AND c.relkind in('r','S') AND (c.relrowsecurity = false OR NOT row_security_active(c.oid)); + +CREATE VIEW pg_gtt_stats WITH (security_barrier) AS +SELECT n.nspname AS schemaname, + c.relname AS tablename, + a.attname, + s.stainherit AS inherited, + s.stanullfrac AS null_frac, + s.stawidth AS avg_width, + s.stadistinct AS n_distinct, + CASE + WHEN s.stakind1 = 1 THEN s.stavalues1 + WHEN s.stakind2 = 1 THEN s.stavalues2 + WHEN s.stakind3 = 1 THEN s.stavalues3 + WHEN s.stakind4 = 1 THEN s.stavalues4 + WHEN s.stakind5 = 1 THEN s.stavalues5 + END AS most_common_vals, + CASE + WHEN s.stakind1 = 1 THEN s.stanumbers1 + WHEN s.stakind2 = 1 THEN s.stanumbers2 + WHEN s.stakind3 = 1 THEN s.stanumbers3 + WHEN s.stakind4 = 1 THEN s.stanumbers4 + WHEN s.stakind5 = 1 THEN s.stanumbers5 + END AS most_common_freqs, + CASE + WHEN s.stakind1 = 2 THEN s.stavalues1 + WHEN s.stakind2 = 2 THEN s.stavalues2 + WHEN s.stakind3 = 2 THEN s.stavalues3 + WHEN s.stakind4 = 2 THEN s.stavalues4 + WHEN s.stakind5 = 2 THEN s.stavalues5 + END AS histogram_bounds, + CASE + WHEN s.stakind1 = 3 THEN s.stanumbers1[1] + WHEN s.stakind2 = 3 THEN s.stanumbers2[1] + WHEN s.stakind3 = 3 THEN s.stanumbers3[1] + WHEN s.stakind4 = 3 THEN s.stanumbers4[1] + WHEN s.stakind5 = 3 THEN s.stanumbers5[1] + END AS correlation, + CASE + WHEN s.stakind1 = 4 THEN s.stavalues1 + WHEN s.stakind2 = 4 THEN s.stavalues2 + WHEN s.stakind3 = 4 THEN s.stavalues3 + WHEN s.stakind4 = 4 THEN s.stavalues4 + WHEN s.stakind5 = 4 THEN s.stavalues5 + END AS most_common_elems, + CASE + WHEN s.stakind1 = 4 THEN s.stanumbers1 + WHEN s.stakind2 = 4 THEN s.stanumbers2 + WHEN s.stakind3 = 4 THEN s.stanumbers3 + WHEN s.stakind4 = 4 THEN s.stanumbers4 + WHEN s.stakind5 = 4 THEN s.stanumbers5 + END AS most_common_elem_freqs, + CASE + WHEN s.stakind1 = 5 THEN s.stanumbers1 + WHEN s.stakind2 = 5 THEN s.stanumbers2 + WHEN s.stakind3 = 5 THEN s.stanumbers3 + WHEN s.stakind4 = 5 THEN s.stanumbers4 + WHEN s.stakind5 = 5 THEN s.stanumbers5 + END AS elem_count_histogram + FROM + pg_class c + JOIN pg_attribute a ON c.oid = a.attrelid + LEFT JOIN pg_namespace n ON n.oid = c.relnamespace, + pg_get_gtt_statistics(c.oid, a.attnum, ''::text) as s + WHERE c.relpersistence='g' AND c.relkind in('r','p','i','t') and NOT a.attisdropped AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND (c.relrowsecurity = false OR NOT row_security_active(c.oid)); + CREATE VIEW pg_stats WITH (security_barrier) AS SELECT nspname AS schemaname, diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 0c9591415e..2575c084c2 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -34,6 +34,7 @@ #include "catalog/pg_inherits.h" #include "catalog/pg_namespace.h" #include "catalog/pg_statistic_ext.h" +#include "catalog/storage_gtt.h" #include "commands/dbcommands.h" #include "commands/progress.h" #include "commands/tablecmds.h" @@ -104,7 +105,7 @@ static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows); static void update_attstats(Oid relid, bool inh, - int natts, VacAttrStats **vacattrstats); + int natts, VacAttrStats **vacattrstats, char relpersistence); static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); @@ -185,6 +186,17 @@ analyze_rel(Oid relid, RangeVar *relation, return; } + /* + * Skip the global temporary table that did not initialize the storage + * in this backend. + */ + if (RELATION_IS_GLOBAL_TEMP(onerel) && + !gtt_storage_attached(RelationGetRelid(onerel))) + { + relation_close(onerel, ShareUpdateExclusiveLock); + return; + } + /* * We can ANALYZE any table except pg_statistic. See update_attstats */ @@ -601,14 +613,15 @@ do_analyze_rel(Relation onerel, VacuumParams *params, * pg_statistic for columns we didn't process, we leave them alone.) */ update_attstats(RelationGetRelid(onerel), inh, - attr_cnt, vacattrstats); + attr_cnt, vacattrstats, RelationGetRelPersistence(onerel)); for (ind = 0; ind < nindexes; ind++) { AnlIndexData *thisdata = &indexdata[ind]; update_attstats(RelationGetRelid(Irel[ind]), false, - thisdata->attr_cnt, thisdata->vacattrstats); + thisdata->attr_cnt, thisdata->vacattrstats, + RelationGetRelPersistence(Irel[ind])); } /* @@ -1651,7 +1664,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, * by taking a self-exclusive lock on the relation in analyze_rel(). */ static void -update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats) +update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats, char relpersistence) { Relation sd; int attno; @@ -1753,31 +1766,48 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats) } } - /* Is there already a pg_statistic tuple for this attribute? */ - oldtup = SearchSysCache3(STATRELATTINH, - ObjectIdGetDatum(relid), - Int16GetDatum(stats->attr->attnum), - BoolGetDatum(inh)); - - if (HeapTupleIsValid(oldtup)) + /* + * For global temporary table, + * Update column statistic to localhash, not catalog. + */ + if (relpersistence == RELPERSISTENCE_GLOBAL_TEMP) { - /* Yes, replace it */ - stup = heap_modify_tuple(oldtup, - RelationGetDescr(sd), - values, - nulls, - replaces); - ReleaseSysCache(oldtup); - CatalogTupleUpdate(sd, &stup->t_self, stup); + up_gtt_att_statistic(relid, + stats->attr->attnum, + inh, + natts, + RelationGetDescr(sd), + values, + nulls); } else { - /* No, insert new tuple */ - stup = heap_form_tuple(RelationGetDescr(sd), values, nulls); - CatalogTupleInsert(sd, stup); - } + /* Is there already a pg_statistic tuple for this attribute? */ + oldtup = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(stats->attr->attnum), + BoolGetDatum(inh)); - heap_freetuple(stup); + if (HeapTupleIsValid(oldtup)) + { + /* Yes, replace it */ + stup = heap_modify_tuple(oldtup, + RelationGetDescr(sd), + values, + nulls, + replaces); + ReleaseSysCache(oldtup); + CatalogTupleUpdate(sd, &stup->t_self, stup); + } + else + { + /* No, insert new tuple */ + stup = heap_form_tuple(RelationGetDescr(sd), values, nulls); + CatalogTupleInsert(sd, stup); + } + + heap_freetuple(stup); + } } table_close(sd, RowExclusiveLock); diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 6487a9e3fc..9fa0d136ed 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -33,6 +33,7 @@ #include "catalog/namespace.h" #include "catalog/objectaccess.h" #include "catalog/pg_am.h" +#include "catalog/storage_gtt.h" #include "catalog/toasting.h" #include "commands/cluster.h" #include "commands/defrem.h" @@ -73,6 +74,12 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); static List *get_tables_to_cluster(MemoryContext cluster_context); +static void gtt_swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, + bool swap_toast_by_content, + bool is_internal, + TransactionId frozenXid, + MultiXactId cutoffMulti, + Oid *mapped_tables); /*--------------------------------------------------------------------------- @@ -390,6 +397,18 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) errmsg("cannot vacuum temporary tables of other sessions"))); } + /* + * Skip the global temporary table that did not initialize the storage + * in this backend. + */ + if (RELATION_IS_GLOBAL_TEMP(OldHeap) && + !gtt_storage_attached(RelationGetRelid(OldHeap))) + { + relation_close(OldHeap, AccessExclusiveLock); + pgstat_progress_end_command(); + return; + } + /* * Also check for active uses of the relation in the current transaction, * including open scans and pending AFTER trigger events. @@ -774,6 +793,9 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, BlockNumber num_pages; int elevel = verbose ? INFO : DEBUG2; PGRUsage ru0; + bool is_gtt = false; + uint32 gtt_relfrozenxid = 0; + uint32 gtt_relminmxid = 0; pg_rusage_init(&ru0); @@ -787,6 +809,9 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, else OldIndex = NULL; + if (RELATION_IS_GLOBAL_TEMP(OldHeap)) + is_gtt = true; + /* * Their tuple descriptors should be exactly alike, but here we only need * assume that they have the same number of columns. @@ -854,20 +879,38 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff, NULL); - /* - * FreezeXid will become the table's new relfrozenxid, and that mustn't go - * backwards, so take the max. - */ - if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) && - TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid)) - FreezeXid = OldHeap->rd_rel->relfrozenxid; + if (is_gtt) + { + /* Gets transaction information for global temporary table from localhash. */ + get_gtt_relstats(OIDOldHeap, + NULL, NULL, NULL, + >t_relfrozenxid, >t_relminmxid); + + if (TransactionIdIsValid(gtt_relfrozenxid) && + TransactionIdPrecedes(FreezeXid, gtt_relfrozenxid)) + FreezeXid = gtt_relfrozenxid; + + if (MultiXactIdIsValid(gtt_relminmxid) && + MultiXactIdPrecedes(MultiXactCutoff, gtt_relminmxid)) + MultiXactCutoff = gtt_relminmxid; + } + else + { + /* + * FreezeXid will become the table's new relfrozenxid, and that mustn't go + * backwards, so take the max. + */ + if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) && + TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid)) + FreezeXid = OldHeap->rd_rel->relfrozenxid; - /* - * MultiXactCutoff, similarly, shouldn't go backwards either. - */ - if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) && - MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid)) - MultiXactCutoff = OldHeap->rd_rel->relminmxid; + /* + * MultiXactCutoff, similarly, shouldn't go backwards either. + */ + if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) && + MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid)) + MultiXactCutoff = OldHeap->rd_rel->relminmxid; + } /* * Decide whether to use an indexscan or seqscan-and-optional-sort to scan @@ -935,6 +978,15 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, table_close(OldHeap, NoLock); table_close(NewHeap, NoLock); + /* Update relstats of global temporary table to localhash. */ + if (is_gtt) + { + up_gtt_relstats(RelationGetRelid(NewHeap), num_pages, num_tuples, 0, + InvalidTransactionId, InvalidMultiXactId); + CommandCounterIncrement(); + return; + } + /* Update pg_class to reflect the correct values of pages and tuples. */ relRelation = table_open(RelationRelationId, RowExclusiveLock); @@ -1371,10 +1423,22 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, * Swap the contents of the heap relations (including any toast tables). * Also set old heap's relfrozenxid to frozenXid. */ - swap_relation_files(OIDOldHeap, OIDNewHeap, + if (newrelpersistence == RELPERSISTENCE_GLOBAL_TEMP) + { + Assert(!is_system_catalog); + /* For global temporary table modify data in localhash, not pg_class */ + gtt_swap_relation_files(OIDOldHeap, OIDNewHeap, + (OIDOldHeap == RelationRelationId), + swap_toast_by_content, is_internal, + frozenXid, cutoffMulti, mapped_tables); + } + else + { + swap_relation_files(OIDOldHeap, OIDNewHeap, (OIDOldHeap == RelationRelationId), swap_toast_by_content, is_internal, frozenXid, cutoffMulti, mapped_tables); + } /* * If it's a system catalog, queue a sinval message to flush all catcaches @@ -1582,3 +1646,146 @@ get_tables_to_cluster(MemoryContext cluster_context) return rvs; } + +/* + * For global temporary table, storage information is stored in localhash, + * This function like swap_relation_files except that update storage information, + * in the localhash, not pg_class. + */ +static void +gtt_swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, + bool swap_toast_by_content, + bool is_internal, + TransactionId frozenXid, + MultiXactId cutoffMulti, + Oid *mapped_tables) +{ + Relation relRelation; + Oid relfilenode1, + relfilenode2; + Relation rel1; + Relation rel2; + + relRelation = table_open(RelationRelationId, RowExclusiveLock); + + rel1 = relation_open(r1, AccessExclusiveLock); + rel2 = relation_open(r2, AccessExclusiveLock); + + relfilenode1 = gtt_fetch_current_relfilenode(r1); + relfilenode2 = gtt_fetch_current_relfilenode(r2); + + Assert(OidIsValid(relfilenode1) && OidIsValid(relfilenode2)); + gtt_switch_rel_relfilenode(r1, relfilenode1, r2, relfilenode2, true); + + CacheInvalidateRelcache(rel1); + CacheInvalidateRelcache(rel2); + + InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0, + InvalidOid, is_internal); + InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0, + InvalidOid, true); + + if (rel1->rd_rel->reltoastrelid || rel2->rd_rel->reltoastrelid) + { + if (swap_toast_by_content) + { + if (rel1->rd_rel->reltoastrelid && rel2->rd_rel->reltoastrelid) + { + gtt_swap_relation_files(rel1->rd_rel->reltoastrelid, + rel2->rd_rel->reltoastrelid, + target_is_pg_class, + swap_toast_by_content, + is_internal, + frozenXid, + cutoffMulti, + mapped_tables); + } + else + elog(ERROR, "cannot swap toast files by content when there's only one"); + } + else + { + ObjectAddress baseobject, + toastobject; + long count; + + if (IsSystemRelation(rel1)) + elog(ERROR, "cannot swap toast files by links for system catalogs"); + + if (rel1->rd_rel->reltoastrelid) + { + count = deleteDependencyRecordsFor(RelationRelationId, + rel1->rd_rel->reltoastrelid, + false); + if (count != 1) + elog(ERROR, "expected one dependency record for TOAST table, found %ld", + count); + } + if (rel2->rd_rel->reltoastrelid) + { + count = deleteDependencyRecordsFor(RelationRelationId, + rel2->rd_rel->reltoastrelid, + false); + if (count != 1) + elog(ERROR, "expected one dependency record for TOAST table, found %ld", + count); + } + + /* Register new dependencies */ + baseobject.classId = RelationRelationId; + baseobject.objectSubId = 0; + toastobject.classId = RelationRelationId; + toastobject.objectSubId = 0; + + if (rel1->rd_rel->reltoastrelid) + { + baseobject.objectId = r1; + toastobject.objectId = rel1->rd_rel->reltoastrelid; + recordDependencyOn(&toastobject, &baseobject, + DEPENDENCY_INTERNAL); + } + + if (rel2->rd_rel->reltoastrelid) + { + baseobject.objectId = r2; + toastobject.objectId = rel2->rd_rel->reltoastrelid; + recordDependencyOn(&toastobject, &baseobject, + DEPENDENCY_INTERNAL); + } + } + } + + if (swap_toast_by_content && + rel1->rd_rel->relkind == RELKIND_TOASTVALUE && + rel2->rd_rel->relkind == RELKIND_TOASTVALUE) + { + Oid toastIndex1, + toastIndex2; + + /* Get valid index for each relation */ + toastIndex1 = toast_get_valid_index(r1, + AccessExclusiveLock); + toastIndex2 = toast_get_valid_index(r2, + AccessExclusiveLock); + + gtt_swap_relation_files(toastIndex1, + toastIndex2, + target_is_pg_class, + swap_toast_by_content, + is_internal, + InvalidTransactionId, + InvalidMultiXactId, + mapped_tables); + } + + relation_close(rel1, NoLock); + relation_close(rel2, NoLock); + + table_close(relRelation, RowExclusiveLock); + + RelationCloseSmgrByOid(r1); + RelationCloseSmgrByOid(r2); + + CommandCounterIncrement(); +} + diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 6b33951e0c..5f757b00de 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -290,7 +290,7 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, Assert(rel); /* check read-only transaction and parallel mode */ - if (XactReadOnly && !rel->rd_islocaltemp) + if (XactReadOnly && !RELATION_IS_TEMP_ON_CURRENT_SESSION(rel)) PreventCommandIfReadOnly("COPY FROM"); cstate = BeginCopyFrom(pstate, rel, whereClause, diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 40a54ad0bd..7895e7d99b 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -30,6 +30,7 @@ #include "access/xact.h" #include "access/xlog.h" #include "catalog/namespace.h" +#include "catalog/storage_gtt.h" #include "commands/copy.h" #include "commands/copyfrom_internal.h" #include "commands/progress.h" @@ -659,6 +660,9 @@ CopyFrom(CopyFromState cstate) ExecOpenIndices(resultRelInfo, false); + /* Check and init global temporary table storage in current backend */ + init_gtt_storage(CMD_INSERT, resultRelInfo); + /* * Set up a ModifyTableState so we can let FDW(s) init themselves for * foreign-table result relation(s). diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index c14ca27c5e..5f5cb2bbc0 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -570,7 +570,7 @@ DefineIndex(Oid relationId, * is more efficient. Do this before any use of the concurrent option is * done. */ - if (stmt->concurrent && get_rel_persistence(relationId) != RELPERSISTENCE_TEMP) + if (stmt->concurrent && !RelpersistenceTsTemp(get_rel_persistence(relationId))) concurrent = true; else concurrent = false; @@ -2598,7 +2598,7 @@ ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel) if (relkind == RELKIND_PARTITIONED_INDEX) ReindexPartitions(indOid, params, isTopLevel); else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && - persistence != RELPERSISTENCE_TEMP) + !RelpersistenceTsTemp(persistence)) ReindexRelationConcurrently(indOid, params); else { @@ -2707,7 +2707,7 @@ ReindexTable(RangeVar *relation, ReindexParams *params, bool isTopLevel) if (get_rel_relkind(heapOid) == RELKIND_PARTITIONED_TABLE) ReindexPartitions(heapOid, params, isTopLevel); else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && - get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP) + !RelpersistenceTsTemp(get_rel_persistence(heapOid))) { result = ReindexRelationConcurrently(heapOid, params); @@ -3122,7 +3122,7 @@ ReindexMultipleInternal(List *relids, ReindexParams *params) relkind != RELKIND_PARTITIONED_TABLE); if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && - relpersistence != RELPERSISTENCE_TEMP) + !RelpersistenceTsTemp(relpersistence)) { ReindexParams newparams = *params; diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c index 62465bacd8..519c9ea82e 100644 --- a/src/backend/commands/lockcmds.c +++ b/src/backend/commands/lockcmds.c @@ -57,7 +57,10 @@ LockTableCommand(LockStmt *lockstmt) RangeVarCallbackForLockTable, (void *) &lockstmt->mode); - if (get_rel_relkind(reloid) == RELKIND_VIEW) + /* Lock table command ignores global temporary table. */ + if (get_rel_persistence(reloid) == RELPERSISTENCE_GLOBAL_TEMP) + continue; + else if (get_rel_relkind(reloid) == RELKIND_VIEW) LockViewRecurse(reloid, lockstmt->mode, lockstmt->nowait, NIL); else if (recurse) LockTableRecurse(reloid, lockstmt->mode, lockstmt->nowait); diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 72bfdc07a4..5b1bfcd117 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -30,6 +30,8 @@ #include "catalog/objectaccess.h" #include "catalog/pg_sequence.h" #include "catalog/pg_type.h" +#include "catalog/storage.h" +#include "catalog/storage_gtt.h" #include "commands/defrem.h" #include "commands/sequence.h" #include "commands/tablecmds.h" @@ -108,6 +110,7 @@ static void init_params(ParseState *pstate, List *options, bool for_identity, List **owned_by); static void do_setval(Oid relid, int64 next, bool iscalled); static void process_owned_by(Relation seqrel, List *owned_by, bool for_identity); +int64 get_seqence_start_value(Oid seqid); /* @@ -275,8 +278,6 @@ ResetSequence(Oid seq_relid) Buffer buf; HeapTupleData seqdatatuple; HeapTuple tuple; - HeapTuple pgstuple; - Form_pg_sequence pgsform; int64 startv; /* @@ -287,12 +288,7 @@ ResetSequence(Oid seq_relid) init_sequence(seq_relid, &elm, &seq_rel); (void) read_seq_tuple(seq_rel, &buf, &seqdatatuple); - pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(seq_relid)); - if (!HeapTupleIsValid(pgstuple)) - elog(ERROR, "cache lookup failed for sequence %u", seq_relid); - pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple); - startv = pgsform->seqstart; - ReleaseSysCache(pgstuple); + startv = get_seqence_start_value(seq_relid); /* * Copy the existing sequence tuple. @@ -451,6 +447,15 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) init_sequence(relid, &elm, &seqrel); + if (RELATION_IS_GLOBAL_TEMP(seqrel)) + { + if (is_other_backend_use_gtt(RelationGetRelid(seqrel))) + ereport(ERROR, + (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), + errmsg("cannot alter global temporary sequence %s when other backend attached it.", + RelationGetRelationName(seqrel)))); + } + rel = table_open(SequenceRelationId, RowExclusiveLock); seqtuple = SearchSysCacheCopy1(SEQRELID, ObjectIdGetDatum(relid)); @@ -611,7 +616,7 @@ nextval_internal(Oid relid, bool check_permissions) RelationGetRelationName(seqrel)))); /* read-only transactions may only modify temp sequences */ - if (!seqrel->rd_islocaltemp) + if (!RELATION_IS_TEMP_ON_CURRENT_SESSION(seqrel)) PreventCommandIfReadOnly("nextval()"); /* @@ -936,7 +941,7 @@ do_setval(Oid relid, int64 next, bool iscalled) ReleaseSysCache(pgstuple); /* read-only transactions may only modify temp sequences */ - if (!seqrel->rd_islocaltemp) + if (!RELATION_IS_TEMP_ON_CURRENT_SESSION(seqrel)) PreventCommandIfReadOnly("setval()"); /* @@ -1153,6 +1158,14 @@ init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel) /* Return results */ *p_elm = elm; *p_rel = seqrel; + + /* Initializes the storage for sequence which the global temporary table belongs. */ + if (RELATION_IS_GLOBAL_TEMP(seqrel) && + !gtt_storage_attached(RelationGetRelid(seqrel))) + { + RelationCreateStorage(seqrel->rd_node, RELPERSISTENCE_GLOBAL_TEMP, seqrel); + gtt_init_seq(seqrel); + } } @@ -1927,3 +1940,51 @@ seq_mask(char *page, BlockNumber blkno) mask_unused_space(page); } + +/* + * Get the startValue of the sequence from syscache. + */ +int64 +get_seqence_start_value(Oid seqid) +{ + HeapTuple seqtuple; + Form_pg_sequence seqform; + int64 start; + + seqtuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(seqid)); + if (!HeapTupleIsValid(seqtuple)) + elog(ERROR, "cache lookup failed for sequence %u", + seqid); + + seqform = (Form_pg_sequence) GETSTRUCT(seqtuple); + start = seqform->seqstart; + ReleaseSysCache(seqtuple); + + return start; +} + +/* + * Initialize sequence which global temporary table belongs. + */ +void +gtt_init_seq(Relation rel) +{ + Datum value[SEQ_COL_LASTCOL] = {0}; + bool null[SEQ_COL_LASTCOL] = {false}; + HeapTuple tuple; + int64 startv = get_seqence_start_value(RelationGetRelid(rel)); + + /* + * last_value from pg_sequence.seqstart + * log_cnt = 0 + * is_called = false + */ + value[SEQ_COL_LASTVAL-1] = Int64GetDatumFast(startv); /* start sequence with 1 */ + + tuple = heap_form_tuple(RelationGetDescr(rel), value, null); + fill_seq_with_data(rel, tuple); + heap_freetuple(tuple); + + return; +} + diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index a16e749506..ba01c0fe62 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -47,6 +47,7 @@ #include "catalog/storage.h" #include "catalog/storage_xlog.h" #include "catalog/toasting.h" +#include "catalog/storage_gtt.h" #include "commands/cluster.h" #include "commands/comment.h" #include "commands/defrem.h" @@ -601,7 +602,7 @@ static void refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, static List *GetParentedForeignKeyRefs(Relation partition); static void ATDetachCheckNoForeignKeyRefs(Relation partition); static char GetAttributeCompression(Oid atttypid, char *compression); - +static OnCommitAction gtt_oncommit_option(List *options); /* ---------------------------------------------------------------- * DefineRelation @@ -646,6 +647,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, LOCKMODE parentLockmode; const char *accessMethod = NULL; Oid accessMethodId = InvalidOid; + OnCommitAction oncommit_action = ONCOMMIT_NOOP; /* * Truncate relname to appropriate length (probably a waste of time, as @@ -657,7 +659,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, * Check consistency of arguments */ if (stmt->oncommit != ONCOMMIT_NOOP - && stmt->relation->relpersistence != RELPERSISTENCE_TEMP) + && !RelpersistenceTsTemp(stmt->relation->relpersistence)) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("ON COMMIT can only be used on temporary tables"))); @@ -687,7 +689,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, * code. This is needed because calling code might not expect untrusted * tables to appear in pg_temp at the front of its search path. */ - if (stmt->relation->relpersistence == RELPERSISTENCE_TEMP + if (RelpersistenceTsTemp(stmt->relation->relpersistence) && InSecurityRestrictedOperation()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), @@ -788,6 +790,56 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, /* * Parse and validate reloptions, if any. */ + /* For global temporary table */ + oncommit_action = gtt_oncommit_option(stmt->options); + if (stmt->relation->relpersistence == RELPERSISTENCE_GLOBAL_TEMP && + (relkind == RELKIND_RELATION || relkind == RELKIND_PARTITIONED_TABLE)) + { + /* Check parent table */ + if (inheritOids) + { + Oid parent = linitial_oid(inheritOids); + Relation relation = table_open(parent, NoLock); + + if (!RELATION_IS_GLOBAL_TEMP(relation)) + elog(ERROR, "The parent table must be global temporary table"); + + table_close(relation, NoLock); + } + + /* Check oncommit clause and save to reloptions */ + if (oncommit_action != ONCOMMIT_NOOP) + { + if (stmt->oncommit != ONCOMMIT_NOOP) + elog(ERROR, "could not create global temporary table with on commit and with clause at same time"); + + stmt->oncommit = oncommit_action; + } + else + { + DefElem *opt = makeNode(DefElem); + + opt->type = T_DefElem; + opt->defnamespace = NULL; + opt->defname = "on_commit_delete_rows"; + opt->defaction = DEFELEM_UNSPEC; + + /* use reloptions to remember on commit clause */ + if (stmt->oncommit == ONCOMMIT_DELETE_ROWS) + opt->arg = (Node *)makeString("true"); + else if (stmt->oncommit == ONCOMMIT_PRESERVE_ROWS) + opt->arg = (Node *)makeString("false"); + else if (stmt->oncommit == ONCOMMIT_NOOP) + opt->arg = (Node *)makeString("false"); + else + elog(ERROR, "global temporary table not support on commit drop clause"); + + stmt->options = lappend(stmt->options, opt); + } + } + else if (oncommit_action != ONCOMMIT_NOOP) + elog(ERROR, "The parameter on_commit_delete_rows is exclusive to the global temporary table, which cannot be specified by a regular table"); + reloptions = transformRelOptions((Datum) 0, stmt->options, NULL, validnsps, true, false); @@ -1414,7 +1466,7 @@ RemoveRelations(DropStmt *drop) * relation persistence cannot be known without its OID. */ if (drop->concurrent && - get_rel_persistence(relOid) != RELPERSISTENCE_TEMP) + !RelpersistenceTsTemp(get_rel_persistence(relOid))) { Assert(list_length(drop->objects) == 1 && drop->removeType == OBJECT_INDEX); @@ -1623,7 +1675,16 @@ ExecuteTruncate(TruncateStmt *stmt) Relation rel; bool recurse = rv->inh; Oid myrelid; - LOCKMODE lockmode = AccessExclusiveLock; + LOCKMODE lockmode; + + /* + * Truncate global temp table only cleans up the data in current backend, + * only low-level locks are required. + */ + if (rv->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + lockmode = RowExclusiveLock; + else + lockmode = AccessExclusiveLock; myrelid = RangeVarGetRelidExtended(rv, lockmode, 0, RangeVarCallbackForTruncate, @@ -1942,6 +2003,14 @@ ExecuteTruncateGuts(List *explicit_rels, continue; } + /* + * Skip the global temporary table that is not initialized for storage + * in current backend. + */ + if (RELATION_IS_GLOBAL_TEMP(rel) && + !gtt_storage_attached(RelationGetRelid(rel))) + continue; + /* * Normally, we need a transaction-safe truncation here. However, if * the table was either created in the current (sub)transaction or has @@ -4010,6 +4079,16 @@ AlterTable(AlterTableStmt *stmt, LOCKMODE lockmode, /* Caller is required to provide an adequate lock. */ rel = relation_open(context->relid, NoLock); + /* We allow to alter global temporary table only current backend use it */ + if (RELATION_IS_GLOBAL_TEMP(rel)) + { + if (is_other_backend_use_gtt(RelationGetRelid(rel))) + ereport(ERROR, + (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), + errmsg("cannot alter global temporary table %s when other backend attached it.", + RelationGetRelationName(rel)))); + } + CheckTableNotInUse(rel, "ALTER TABLE"); ATController(stmt, rel, stmt->cmds, stmt->relation->inh, lockmode, context); @@ -5369,6 +5448,42 @@ ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot rewrite temporary tables of other sessions"))); + if (RELATION_IS_GLOBAL_TEMP(OldHeap)) + { + if (tab->chgPersistence) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot change global temporary table persistence setting"))); + + /* + * The storage for the global temporary table needs to be initialized + * before rewrite table. + */ + if(!gtt_storage_attached(tab->relid)) + { + ResultRelInfo *resultRelInfo; + MemoryContext oldcontext; + MemoryContext ctx_alter_gtt; + + ctx_alter_gtt = AllocSetContextCreate(CurrentMemoryContext, + "gtt alter table", ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(ctx_alter_gtt); + + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, OldHeap, + 1, NULL, 0); + if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex && + resultRelInfo->ri_IndexRelationDescs == NULL) + ExecOpenIndices(resultRelInfo, false); + + init_gtt_storage(CMD_UTILITY, resultRelInfo); + ExecCloseIndices(resultRelInfo); + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(ctx_alter_gtt); + } + } + /* * Select destination tablespace (same as original unless user * requested a change) @@ -8985,6 +9100,12 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("constraints on temporary tables must involve temporary tables of this session"))); break; + case RELPERSISTENCE_GLOBAL_TEMP: + if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_GLOBAL_TEMP) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraints on global temporary tables may reference only global temporary tables"))); + break; } /* @@ -13661,6 +13782,9 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, if (defList == NIL && operation != AT_ReplaceRelOptions) return; /* nothing to do */ + if (gtt_oncommit_option(defList) != ONCOMMIT_NOOP) + elog(ERROR, "table cannot add or modify on commit parameter by ALTER TABLE command."); + pgclass = table_open(RelationRelationId, RowExclusiveLock); /* Fetch heap tuple */ @@ -13860,6 +13984,9 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) */ rel = relation_open(tableOid, lockmode); + if (RELATION_IS_GLOBAL_TEMP(rel)) + elog(ERROR, "not support alter table set tablespace on global temporary table"); + /* Check first if relation can be moved to new tablespace */ if (!CheckRelationTableSpaceMove(rel, newTableSpace)) { @@ -14163,7 +14290,7 @@ index_copy_data(Relation rel, RelFileNode newrnode) * NOTE: any conflict in relfilenode value will be caught in * RelationCreateStorage(). */ - RelationCreateStorage(newrnode, rel->rd_rel->relpersistence); + RelationCreateStorage(newrnode, rel->rd_rel->relpersistence, rel); /* copy main fork */ RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM, @@ -15761,6 +15888,7 @@ ATPrepChangePersistence(Relation rel, bool toLogged) switch (rel->rd_rel->relpersistence) { case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_GLOBAL_TEMP: ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("cannot change logged status of table \"%s\" because it is temporary", @@ -18713,3 +18841,40 @@ GetAttributeCompression(Oid atttypid, char *compression) return cmethod; } + +/* + * Parse the on commit clause for the temporary table + */ +static OnCommitAction +gtt_oncommit_option(List *options) +{ + ListCell *listptr; + OnCommitAction action = ONCOMMIT_NOOP; + + foreach(listptr, options) + { + DefElem *def = (DefElem *) lfirst(listptr); + + if (strcmp(def->defname, "on_commit_delete_rows") == 0) + { + bool res = false; + char *sval = defGetString(def); + + /* It has to be a Boolean value */ + if (!parse_bool(sval, &res)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("parameter \"on_commit_delete_rows\" requires a Boolean value"))); + + if (res) + action = ONCOMMIT_DELETE_ROWS; + else + action = ONCOMMIT_PRESERVE_ROWS; + + break; + } + } + + return action; +} + diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 5c4bc15b44..1e0512cf9b 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -35,6 +35,7 @@ #include "catalog/pg_database.h" #include "catalog/pg_inherits.h" #include "catalog/pg_namespace.h" +#include "catalog/storage_gtt.h" #include "commands/cluster.h" #include "commands/defrem.h" #include "commands/vacuum.h" @@ -1315,6 +1316,22 @@ vac_update_relstats(Relation relation, HeapTuple ctup; Form_pg_class pgcform; bool dirty; + bool is_gtt = RELATION_IS_GLOBAL_TEMP(relation); + + /* For global temporary table */ + if (is_gtt) + { + /* Store relation statistics and transaction information to the localhash */ + up_gtt_relstats(RelationGetRelid(relation), + num_pages, num_tuples, + num_all_visible_pages, + frozenxid, minmulti); + + /* Update relation statistics to local relcache */ + relation->rd_rel->relpages = (int32) num_pages; + relation->rd_rel->reltuples = (float4) num_tuples; + relation->rd_rel->relallvisible = (int32) num_all_visible_pages; + } rd = table_open(RelationRelationId, RowExclusiveLock); @@ -1328,17 +1345,23 @@ vac_update_relstats(Relation relation, /* Apply statistical updates, if any, to copied tuple */ dirty = false; - if (pgcform->relpages != (int32) num_pages) + + if (!is_gtt && + pgcform->relpages != (int32) num_pages) { pgcform->relpages = (int32) num_pages; dirty = true; } - if (pgcform->reltuples != (float4) num_tuples) + + if (!is_gtt && + pgcform->reltuples != (float4) num_tuples) { pgcform->reltuples = (float4) num_tuples; dirty = true; } - if (pgcform->relallvisible != (int32) num_all_visible_pages) + + if (!is_gtt && + pgcform->relallvisible != (int32) num_all_visible_pages) { pgcform->relallvisible = (int32) num_all_visible_pages; dirty = true; @@ -1383,7 +1406,8 @@ vac_update_relstats(Relation relation, * This should match vac_update_datfrozenxid() concerning what we consider * to be "in the future". */ - if (TransactionIdIsNormal(frozenxid) && + if (!is_gtt && + TransactionIdIsNormal(frozenxid) && pgcform->relfrozenxid != frozenxid && (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) || TransactionIdPrecedes(ReadNextTransactionId(), @@ -1394,7 +1418,8 @@ vac_update_relstats(Relation relation, } /* Similarly for relminmxid */ - if (MultiXactIdIsValid(minmulti) && + if (!is_gtt && + MultiXactIdIsValid(minmulti) && pgcform->relminmxid != minmulti && (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) || MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid))) @@ -1502,6 +1527,13 @@ vac_update_datfrozenxid(void) continue; } + /* + * The relfrozenxid for a global temporary talble is stored in localhash, + * not pg_class, See list_all_session_gtt_frozenxids() + */ + if (classForm->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + continue; + /* * Some table AMs might not need per-relation xid / multixid horizons. * It therefore seems reasonable to allow relfrozenxid and relminmxid @@ -1559,6 +1591,43 @@ vac_update_datfrozenxid(void) Assert(TransactionIdIsNormal(newFrozenXid)); Assert(MultiXactIdIsValid(newMinMulti)); + /* If enable global temporary table */ + if (max_active_gtt > 0) + { + TransactionId safe_age; + /* */ + TransactionId oldest_gtt_frozenxid = + list_all_backend_gtt_frozenxids(0, NULL, NULL, NULL); + + if (TransactionIdIsNormal(oldest_gtt_frozenxid)) + { + safe_age = oldest_gtt_frozenxid + vacuum_gtt_defer_check_age; + if (safe_age < FirstNormalTransactionId) + safe_age += FirstNormalTransactionId; + + /* + * We tolerate that the minimum age of gtt is less than + * the minimum age of conventional tables, otherwise it will + * throw warning message. + */ + if (TransactionIdIsNormal(safe_age) && + TransactionIdPrecedes(safe_age, newFrozenXid)) + { + ereport(WARNING, + (errmsg("global temp table oldest relfrozenxid %u is the oldest in the entire db", + oldest_gtt_frozenxid), + errdetail("The oldest relfrozenxid in pg_class is %u", newFrozenXid), + errhint("If they differ greatly, please consider cleaning up the data in global temp table."))); + } + + /* + * We need to ensure that the clog required by gtt is not cleand. + */ + if (TransactionIdPrecedes(oldest_gtt_frozenxid, newFrozenXid)) + newFrozenXid = oldest_gtt_frozenxid; + } + } + /* Now fetch the pg_database tuple we need to update. */ relation = table_open(DatabaseRelationId, RowExclusiveLock); @@ -1910,6 +1979,19 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params) return false; } + /* + * Skip those global temporary table that are not initialized in + * current backend. + */ + if (RELATION_IS_GLOBAL_TEMP(rel) && + !gtt_storage_attached(RelationGetRelid(rel))) + { + relation_close(rel, lmode); + PopActiveSnapshot(); + CommitTransactionCommand(); + return false; + } + /* * Silently ignore tables that are temp tables of other backends --- * trying to vacuum these will lead to great unhappiness, since their diff --git a/src/backend/commands/view.c b/src/backend/commands/view.c index 4df05a0b33..4c181e2e14 100644 --- a/src/backend/commands/view.c +++ b/src/backend/commands/view.c @@ -527,6 +527,12 @@ DefineView(ViewStmt *stmt, const char *queryString, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("views cannot be unlogged because they do not have storage"))); + /* Global temporary table are not sensible. */ + if (stmt->view->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("views cannot be global temp because they do not have storage"))); + /* * If the user didn't explicitly ask for a temporary view, check whether * we need one implicitly. We allow TEMP to be inserted automatically as diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index b3ce4bae53..611e3f18a7 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -784,6 +784,10 @@ ExecCheckXactReadOnly(PlannedStmt *plannedstmt) if (isTempNamespace(get_rel_namespace(rte->relid))) continue; + /* This is one kind of temp table */ + if (get_rel_persistence(rte->relid) == RELPERSISTENCE_GLOBAL_TEMP) + continue; + PreventCommandIfReadOnly(CreateCommandName((Node *) plannedstmt)); } diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 5c723bc54e..a7edceb1a5 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -18,6 +18,7 @@ #include "catalog/partition.h" #include "catalog/pg_inherits.h" #include "catalog/pg_type.h" +#include "catalog/storage_gtt.h" #include "executor/execPartition.h" #include "executor/executor.h" #include "foreign/fdwapi.h" @@ -533,6 +534,9 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, (node != NULL && node->onConflictAction != ONCONFLICT_NONE)); + /* Init storage for partitioned global temporary table in current backend */ + init_gtt_storage(mtstate->operation, leaf_part_rri); + /* * Build WITH CHECK OPTION constraints for the partition. Note that we * didn't build the withCheckOptionList for partitions within the planner, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index c24684aa6f..bb14314e81 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -38,6 +38,7 @@ #include "access/tableam.h" #include "access/xact.h" #include "catalog/catalog.h" +#include "catalog/storage_gtt.h" #include "commands/trigger.h" #include "executor/execPartition.h" #include "executor/executor.h" @@ -633,6 +634,9 @@ ExecInsert(ModifyTableState *mtstate, resultRelInfo->ri_IndexRelationDescs == NULL) ExecOpenIndices(resultRelInfo, onconflict != ONCONFLICT_NONE); + /* Init storage for global temporary table in current backend */ + init_gtt_storage(CMD_INSERT, resultRelInfo); + /* * BEFORE ROW INSERT Triggers. * @@ -2810,6 +2814,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) i++; } + /* * Now we may initialize the subplan. */ diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 671117314a..f68c82c3f1 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -48,7 +48,7 @@ #include "partitioning/partprune.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" - +#include "utils/rel.h" /* results of subquery_is_pushdown_safe */ typedef struct pushdown_safety_info @@ -619,7 +619,7 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, * the rest of the necessary infrastructure right now anyway. So * for now, bail out if we see a temporary table. */ - if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP) + if (RelpersistenceTsTemp(get_rel_persistence(rte->relid))) return; /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1868c4eff4..d4a958f8e3 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -5917,7 +5917,7 @@ plan_create_index_workers(Oid tableOid, Oid indexOid) * Furthermore, any index predicate or index expressions must be parallel * safe. */ - if (heap->rd_rel->relpersistence == RELPERSISTENCE_TEMP || + if (RELATION_IS_TEMP(heap) || !is_parallel_safe(root, (Node *) RelationGetIndexExpressions(index)) || !is_parallel_safe(root, (Node *) RelationGetIndexPredicate(index))) { diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index c5194fdbbf..38d7c65854 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -30,6 +30,7 @@ #include "catalog/pg_am.h" #include "catalog/pg_proc.h" #include "catalog/pg_statistic_ext.h" +#include "catalog/storage_gtt.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -221,6 +222,14 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, continue; } + /* Ignore empty index for global temporary table in current backend */ + if (RELATION_IS_GLOBAL_TEMP(indexRelation) && + !gtt_storage_attached(RelationGetRelid(indexRelation))) + { + index_close(indexRelation, NoLock); + continue; + } + /* * If the index is valid, but cannot yet be used, ignore it; but * mark the plan we are generating as transient. See diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 438b077004..a9d4ed1878 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -2894,6 +2894,11 @@ transformCreateTableAsStmt(ParseState *pstate, CreateTableAsStmt *stmt) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialized views must not use temporary tables or views"))); + if (is_query_using_gtt(query)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialized views must not use global temporary tables or views"))); + /* * A materialized view would either need to save parameters for use in * maintaining/loading the data or prohibit them entirely. The latter diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 10da5c5c51..0232f59e99 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -3403,17 +3403,11 @@ OptTemp: TEMPORARY { $$ = RELPERSISTENCE_TEMP; } | LOCAL TEMP { $$ = RELPERSISTENCE_TEMP; } | GLOBAL TEMPORARY { - ereport(WARNING, - (errmsg("GLOBAL is deprecated in temporary table creation"), - parser_errposition(@1))); - $$ = RELPERSISTENCE_TEMP; + $$ = RELPERSISTENCE_GLOBAL_TEMP; } | GLOBAL TEMP { - ereport(WARNING, - (errmsg("GLOBAL is deprecated in temporary table creation"), - parser_errposition(@1))); - $$ = RELPERSISTENCE_TEMP; + $$ = RELPERSISTENCE_GLOBAL_TEMP; } | UNLOGGED { $$ = RELPERSISTENCE_UNLOGGED; } | /*EMPTY*/ { $$ = RELPERSISTENCE_PERMANENT; } @@ -11661,19 +11655,13 @@ OptTempTableName: } | GLOBAL TEMPORARY opt_table qualified_name { - ereport(WARNING, - (errmsg("GLOBAL is deprecated in temporary table creation"), - parser_errposition(@1))); $$ = $4; - $$->relpersistence = RELPERSISTENCE_TEMP; + $$->relpersistence = RELPERSISTENCE_GLOBAL_TEMP; } | GLOBAL TEMP opt_table qualified_name { - ereport(WARNING, - (errmsg("GLOBAL is deprecated in temporary table creation"), - parser_errposition(@1))); $$ = $4; - $$->relpersistence = RELPERSISTENCE_TEMP; + $$->relpersistence = RELPERSISTENCE_GLOBAL_TEMP; } | UNLOGGED opt_table qualified_name { diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 7465919044..48b8a68984 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -82,6 +82,7 @@ static void expandTupleDesc(TupleDesc tupdesc, Alias *eref, List **colnames, List **colvars); static int specialAttNum(const char *attname); static bool isQueryUsingTempRelation_walker(Node *node, void *context); +static bool is_query_using_gtt_walker(Node *node, void *context); /* @@ -3665,3 +3666,53 @@ isQueryUsingTempRelation_walker(Node *node, void *context) isQueryUsingTempRelation_walker, context); } + +/* + * Like function isQueryUsingTempRelation_walker + * return true if any relation underlying + * the query is a global temporary table. + */ +static bool +is_query_using_gtt_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + + if (IsA(node, Query)) + { + Query *query = (Query *) node; + ListCell *rtable; + + foreach(rtable, query->rtable) + { + RangeTblEntry *rte = lfirst(rtable); + + if (rte->rtekind == RTE_RELATION) + { + Relation rel = relation_open(rte->relid, AccessShareLock); + char relpersistence = rel->rd_rel->relpersistence; + + relation_close(rel, AccessShareLock); + if (relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + return true; + } + } + + return query_tree_walker(query, + is_query_using_gtt_walker, + context, + QTW_IGNORE_JOINALIASES); + } + + return expression_tree_walker(node, + is_query_using_gtt_walker, + context); +} + +/* Check if the query uses global temporary table */ +bool +is_query_using_gtt(Query *query) +{ + return is_query_using_gtt_walker((Node *) query, NULL); +} + diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 675e400839..967af004aa 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -447,6 +447,13 @@ generateSerialExtraStmts(CreateStmtContext *cxt, ColumnDef *column, seqstmt->sequence = makeRangeVar(snamespace, sname, -1); seqstmt->options = seqoptions; + /* + * If a sequence is bound to a global temporary table, then the sequence + * must been "global temporary" + */ + if (cxt->relation->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + seqstmt->sequence->relpersistence = cxt->relation->relpersistence; + /* * If a sequence data type was specified, add it to the options. Prepend * to the list rather than append; in case a user supplied their own AS @@ -3326,6 +3333,8 @@ transformAlterTableStmt(Oid relid, AlterTableStmt *stmt, cxt.isforeign = false; } cxt.relation = stmt->relation; + /* Sets the table persistence to the context */ + cxt.relation->relpersistence = RelationGetRelPersistence(rel); cxt.rel = rel; cxt.inhRelations = NIL; cxt.isalter = true; diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 912ef9cb54..b8c40f2498 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -2158,6 +2158,14 @@ do_autovacuum(void) } continue; } + else if (classForm->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + { + /* + * Aotuvacuum cannot vacuum the private data stored in each backend + * that belongs to global temporary table, so skip them. + */ + continue; + } /* Fetch reloptions and the pgstat entry for this table */ relopts = extract_autovac_opts(tuple, pg_class_desc); @@ -2224,7 +2232,7 @@ do_autovacuum(void) /* * We cannot safely process other backends' temp tables, so skip 'em. */ - if (classForm->relpersistence == RELPERSISTENCE_TEMP) + if (RelpersistenceTsTemp(classForm->relpersistence)) continue; relid = classForm->oid; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 86ef607ff3..2619f5e919 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -37,6 +37,7 @@ #include "access/xlog.h" #include "catalog/catalog.h" #include "catalog/storage.h" +#include "catalog/storage_gtt.h" #include "executor/instrument.h" #include "lib/binaryheap.h" #include "miscadmin.h" @@ -2938,6 +2939,16 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln) BlockNumber RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum) { + /* + * Returns 0 if this global temporary table is not initialized in current + * backend. + */ + if (RELATION_IS_GLOBAL_TEMP(relation) && + !gtt_storage_attached(RelationGetRelid(relation))) + { + return 0; + } + switch (relation->rd_rel->relkind) { case RELKIND_SEQUENCE: diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 3e4ec53a97..09f676d6a6 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -23,6 +23,7 @@ #include "access/syncscan.h" #include "access/twophase.h" #include "commands/async.h" +#include "catalog/storage_gtt.h" #include "miscadmin.h" #include "pgstat.h" #include "postmaster/autovacuum.h" @@ -150,6 +151,7 @@ CreateSharedMemoryAndSemaphores(void) size = add_size(size, BTreeShmemSize()); size = add_size(size, SyncScanShmemSize()); size = add_size(size, AsyncShmemSize()); + size = add_size(size, active_gtt_shared_hash_size()); #ifdef EXEC_BACKEND size = add_size(size, ShmemBackendArraySize()); #endif @@ -222,6 +224,8 @@ CreateSharedMemoryAndSemaphores(void) SUBTRANSShmemInit(); MultiXactShmemInit(); InitBufferPool(); + /* For global temporary table shared hashtable */ + active_gtt_shared_hash_init(); /* * Set up lock manager diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 4c91e721d0..1b3a204333 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -65,6 +65,7 @@ #include "utils/builtins.h" #include "utils/rel.h" #include "utils/snapmgr.h" +#include "utils/guc.h" #define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) @@ -5100,3 +5101,78 @@ KnownAssignedXidsReset(void) LWLockRelease(ProcArrayLock); } + +/* + * search all active backend to get oldest frozenxid + * for global temporary table. + */ +int +list_all_backend_gtt_frozenxids(int max_size, int *pids, uint32 *xids, int *n) +{ + ProcArrayStruct *arrayP = procArray; + TransactionId result = InvalidTransactionId; + int index; + uint8 flags = 0; + int i = 0; + + /* return 0 if feature is disabled */ + if (max_active_gtt <= 0) + return InvalidTransactionId; + + if (max_size > 0) + { + Assert(pids); + Assert(xids); + Assert(n); + *n = 0; + } + + /* Disable in standby node */ + if (RecoveryInProgress()) + return InvalidTransactionId; + + flags |= PROC_IS_AUTOVACUUM; + flags |= PROC_IN_LOGICAL_DECODING; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + if (max_size > 0 && max_size < arrayP->numProcs) + { + LWLockRelease(ProcArrayLock); + elog(ERROR, "list_all_gtt_frozenxids require more array"); + } + + for (index = 0; index < arrayP->numProcs; index++) + { + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; + uint8 statusFlags = ProcGlobal->statusFlags[index]; + + if (statusFlags & flags) + continue; + + /* Fetch all backend that is belonging to MyDatabaseId */ + if (proc->databaseId == MyDatabaseId && + TransactionIdIsNormal(proc->backend_gtt_frozenxid)) + { + if (result == InvalidTransactionId) + result = proc->backend_gtt_frozenxid; + else if (TransactionIdPrecedes(proc->backend_gtt_frozenxid, result)) + result = proc->backend_gtt_frozenxid; + + /* save backend pid and backend level oldest relfrozenxid */ + if (max_size > 0) + { + pids[i] = proc->pid; + xids[i] = proc->backend_gtt_frozenxid; + i++; + } + } + } + LWLockRelease(ProcArrayLock); + + if (max_size > 0) + *n = i; + + return result; +} + diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 862097352b..4edd3b31f7 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -176,7 +176,9 @@ static const char *const BuiltinTrancheNames[] = { /* LWTRANCHE_PARALLEL_APPEND: */ "ParallelAppend", /* LWTRANCHE_PER_XACT_PREDICATE_LIST: */ - "PerXactPredicateList" + "PerXactPredicateList", + /* LWTRANCHE_GTT_CTL */ + "GlobalTempTableControl" }; StaticAssertDecl(lengthof(BuiltinTrancheNames) == diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 2575ea1ca0..16e7e0825e 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -393,6 +393,7 @@ InitProcess(void) MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; MyProc->tempNamespaceId = InvalidOid; + MyProc->backend_gtt_frozenxid = InvalidTransactionId; /* init backend level gtt frozenxid */ MyProc->isBackgroundWorker = IsBackgroundWorker; MyProc->delayChkpt = false; MyProc->statusFlags = 0; @@ -578,6 +579,7 @@ InitAuxiliaryProcess(void) MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; MyProc->tempNamespaceId = InvalidOid; + MyProc->backend_gtt_frozenxid = InvalidTransactionId; /* init backend level gtt frozenxid */ MyProc->isBackgroundWorker = IsBackgroundWorker; MyProc->delayChkpt = false; MyProc->statusFlags = 0; diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index d5a7fb13f3..8225cf6219 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -982,6 +982,13 @@ pg_relation_filepath(PG_FUNCTION_ARGS) Assert(backend != InvalidBackendId); } break; + case RELPERSISTENCE_GLOBAL_TEMP: + /* + * For global temporary table ,each backend has its own storage, + * also only sees its own storage. Use Backendid to identify them. + */ + backend = BackendIdForTempRelations(); + break; default: elog(ERROR, "invalid relpersistence: %c", relform->relpersistence); backend = InvalidBackendId; /* placate compiler */ diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 0c8c05f6c2..699507a24c 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -108,6 +108,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_statistic.h" #include "catalog/pg_statistic_ext.h" +#include "catalog/storage_gtt.h" #include "executor/nodeAgg.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -5115,12 +5116,26 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, } else if (index->indpred == NIL) { - vardata->statsTuple = - SearchSysCache3(STATRELATTINH, - ObjectIdGetDatum(index->indexoid), - Int16GetDatum(pos + 1), - BoolGetDatum(false)); - vardata->freefunc = ReleaseSysCache; + char rel_persistence = get_rel_persistence(index->indexoid); + + if (rel_persistence == RELPERSISTENCE_GLOBAL_TEMP) + { + /* For global temporary table, get statistic data from localhash */ + vardata->statsTuple = + get_gtt_att_statistic(index->indexoid, + Int16GetDatum(pos + 1), + false); + vardata->freefunc = release_gtt_statistic_cache; + } + else + { + vardata->statsTuple = + SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(index->indexoid), + Int16GetDatum(pos + 1), + BoolGetDatum(false)); + vardata->freefunc = ReleaseSysCache; + } if (HeapTupleIsValid(vardata->statsTuple)) { @@ -5368,15 +5383,28 @@ examine_simple_variable(PlannerInfo *root, Var *var, } else if (rte->rtekind == RTE_RELATION) { - /* - * Plain table or parent of an inheritance appendrel, so look up the - * column in pg_statistic - */ - vardata->statsTuple = SearchSysCache3(STATRELATTINH, - ObjectIdGetDatum(rte->relid), - Int16GetDatum(var->varattno), - BoolGetDatum(rte->inh)); - vardata->freefunc = ReleaseSysCache; + char rel_persistence = get_rel_persistence(rte->relid); + + if (rel_persistence == RELPERSISTENCE_GLOBAL_TEMP) + { + /* For global temporary table, get statistic data from localhash */ + vardata->statsTuple = get_gtt_att_statistic(rte->relid, + var->varattno, + rte->inh); + vardata->freefunc = release_gtt_statistic_cache; + } + else + { + /* + * Plain table or parent of an inheritance appendrel, so look up the + * column in pg_statistic + */ + vardata->statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(rte->relid), + Int16GetDatum(var->varattno), + BoolGetDatum(rte->inh)); + vardata->freefunc = ReleaseSysCache; + } if (HeapTupleIsValid(vardata->statsTuple)) { @@ -6800,6 +6828,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, { /* Simple variable --- look to stats for the underlying table */ RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); + char rel_persistence = get_rel_persistence(rte->relid); Assert(rte->rtekind == RTE_RELATION); relid = rte->relid; @@ -6817,6 +6846,14 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, !vardata.freefunc) elog(ERROR, "no function provided to release variable stats with"); } + else if (rel_persistence == RELPERSISTENCE_GLOBAL_TEMP) + { + /* For global temporary table, get statistic data from localhash */ + vardata.statsTuple = get_gtt_att_statistic(relid, + colnum, + rte->inh); + vardata.freefunc = release_gtt_statistic_cache; + } else { vardata.statsTuple = SearchSysCache3(STATRELATTINH, @@ -6828,6 +6865,8 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, } else { + char rel_persistence = get_rel_persistence(index->indexoid); + /* Expression --- maybe there are stats for the index itself */ relid = index->indexoid; colnum = 1; @@ -6843,6 +6882,14 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, !vardata.freefunc) elog(ERROR, "no function provided to release variable stats with"); } + else if (rel_persistence == RELPERSISTENCE_GLOBAL_TEMP) + { + /* For global temporary table, get statistic data from localhash */ + vardata.statsTuple = get_gtt_att_statistic(relid, + colnum, + false); + vardata.freefunc = release_gtt_statistic_cache; + } else { vardata.statsTuple = SearchSysCache3(STATRELATTINH, @@ -7761,6 +7808,8 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, /* attempt to lookup stats in relation for this index column */ if (attnum != 0) { + char rel_persistence = get_rel_persistence(rte->relid); + /* Simple variable -- look to stats for the underlying table */ if (get_relation_stats_hook && (*get_relation_stats_hook) (root, rte, attnum, &vardata)) @@ -7773,6 +7822,15 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, elog(ERROR, "no function provided to release variable stats with"); } + else if (rel_persistence == RELPERSISTENCE_GLOBAL_TEMP) + { + /* For global temporary table, get statistic data from localhash */ + vardata.statsTuple = + get_gtt_att_statistic(rte->relid, + attnum, + false); + vardata.freefunc = release_gtt_statistic_cache; + } else { vardata.statsTuple = @@ -7785,6 +7843,8 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, } else { + char rel_persistence = get_rel_persistence(index->indexoid); + /* * Looks like we've found an expression column in the index. Let's * see if there's any stats for it. @@ -7804,6 +7864,15 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, !vardata.freefunc) elog(ERROR, "no function provided to release variable stats with"); } + else if (rel_persistence == RELPERSISTENCE_GLOBAL_TEMP) + { + /* For global temporary table, get statistic data from localhash */ + vardata.statsTuple = + get_gtt_att_statistic(index->indexoid, + attnum, + false); + vardata.freefunc = release_gtt_statistic_cache; + } else { vardata.statsTuple = SearchSysCache3(STATRELATTINH, diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 6bba5f8ec4..fa81808df6 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -35,6 +35,7 @@ #include "catalog/pg_statistic.h" #include "catalog/pg_transform.h" #include "catalog/pg_type.h" +#include "catalog/storage_gtt.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "utils/array.h" @@ -3113,6 +3114,19 @@ get_attavgwidth(Oid relid, AttrNumber attnum) if (stawidth > 0) return stawidth; } + if (get_rel_persistence(relid) == RELPERSISTENCE_GLOBAL_TEMP) + { + /* For global temporary table, get statistic data from localhash */ + tp = get_gtt_att_statistic(relid, attnum, false); + if (!HeapTupleIsValid(tp)) + return 0; + + stawidth = ((Form_pg_statistic) GETSTRUCT(tp))->stawidth; + if (stawidth > 0) + return stawidth; + else + return 0; + } tp = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(relid), Int16GetDatum(attnum), diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 13d9994af3..9f13fbe487 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -65,6 +65,7 @@ #include "catalog/pg_type.h" #include "catalog/schemapg.h" #include "catalog/storage.h" +#include "catalog/storage_gtt.h" #include "commands/policy.h" #include "commands/trigger.h" #include "miscadmin.h" @@ -1116,6 +1117,28 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) relation->rd_islocaltemp = false; } break; + case RELPERSISTENCE_GLOBAL_TEMP: + { + BlockNumber relpages = 0; + double reltuples = 0; + BlockNumber relallvisible = 0; + + relation->rd_backend = BackendIdForTempRelations(); + relation->rd_islocaltemp = false; + + /* For global temporary table, get relstat data from localhash */ + get_gtt_relstats(RelationGetRelid(relation), + &relpages, + &reltuples, + &relallvisible, + NULL, NULL); + + /* And put them to local relcache */ + relation->rd_rel->relpages = (int32)relpages; + relation->rd_rel->reltuples = (float4)reltuples; + relation->rd_rel->relallvisible = (int32)relallvisible; + } + break; default: elog(ERROR, "invalid relpersistence: %c", relation->rd_rel->relpersistence); @@ -1173,6 +1196,8 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) case RELKIND_PARTITIONED_INDEX: Assert(relation->rd_rel->relam != InvalidOid); RelationInitIndexAccessInfo(relation); + /* The state of the global temporary table's index may need to be set */ + gtt_fix_index_backend_state(relation); break; case RELKIND_RELATION: case RELKIND_TOASTVALUE: @@ -1300,7 +1325,22 @@ RelationInitPhysicalAddr(Relation relation) heap_freetuple(phys_tuple); } - relation->rd_node.relNode = relation->rd_rel->relfilenode; + if (RELATION_IS_GLOBAL_TEMP(relation)) + { + Oid newrelnode = gtt_fetch_current_relfilenode(RelationGetRelid(relation)); + + /* + * For global temporary table, get the latest relfilenode + * from localhash and put it in relcache. + */ + if (OidIsValid(newrelnode) && + newrelnode != relation->rd_rel->relfilenode) + relation->rd_node.relNode = newrelnode; + else + relation->rd_node.relNode = relation->rd_rel->relfilenode; + } + else + relation->rd_node.relNode = relation->rd_rel->relfilenode; } else { @@ -2251,6 +2291,9 @@ RelationReloadIndexInfo(Relation relation) HeapTupleHeaderGetXmin(tuple->t_data)); ReleaseSysCache(tuple); + + /* The state of the global temporary table's index may need to be set */ + gtt_fix_index_backend_state(relation); } /* Okay, now it's valid again */ @@ -3489,6 +3532,10 @@ RelationBuildLocalRelation(const char *relname, rel->rd_backend = BackendIdForTempRelations(); rel->rd_islocaltemp = true; break; + case RELPERSISTENCE_GLOBAL_TEMP: + rel->rd_backend = BackendIdForTempRelations(); + rel->rd_islocaltemp = false; + break; default: elog(ERROR, "invalid relpersistence: %c", relpersistence); break; @@ -3598,28 +3645,38 @@ void RelationSetNewRelfilenode(Relation relation, char persistence) { Oid newrelfilenode; - Relation pg_class; - HeapTuple tuple; + Relation pg_class = NULL; + HeapTuple tuple = NULL; Form_pg_class classform; MultiXactId minmulti = InvalidMultiXactId; TransactionId freezeXid = InvalidTransactionId; RelFileNode newrnode; + /* + * For global temporary table, storage information for the table is + * maintained locally, not in catalog. + */ + bool update_catalog = !RELATION_IS_GLOBAL_TEMP(relation); /* Allocate a new relfilenode */ newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL, persistence); - /* - * Get a writable copy of the pg_class tuple for the given relation. - */ - pg_class = table_open(RelationRelationId, RowExclusiveLock); + memset(&classform, 0, sizeof(classform)); - tuple = SearchSysCacheCopy1(RELOID, - ObjectIdGetDatum(RelationGetRelid(relation))); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "could not find tuple for relation %u", - RelationGetRelid(relation)); - classform = (Form_pg_class) GETSTRUCT(tuple); + if (update_catalog) + { + /* + * Get a writable copy of the pg_class tuple for the given relation. + */ + pg_class = table_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(relation))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "could not find tuple for relation %u", + RelationGetRelid(relation)); + classform = (Form_pg_class) GETSTRUCT(tuple); + } /* * Schedule unlinking of the old storage at transaction commit. @@ -3645,7 +3702,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence) /* handle these directly, at least for now */ SMgrRelation srel; - srel = RelationCreateStorage(newrnode, persistence); + srel = RelationCreateStorage(newrnode, persistence, relation); smgrclose(srel); } break; @@ -3665,6 +3722,18 @@ RelationSetNewRelfilenode(Relation relation, char persistence) break; } + /* For global temporary table */ + if (!update_catalog) + { + Oid relnode = gtt_fetch_current_relfilenode(RelationGetRelid(relation)); + + Assert(RELATION_IS_GLOBAL_TEMP(relation)); + Assert(!RelationIsMapped(relation)); + + /* Make cache invalid and set new relnode to local cache. */ + CacheInvalidateRelcache(relation); + relation->rd_node.relNode = relnode; + } /* * If we're dealing with a mapped index, pg_class.relfilenode doesn't * change; instead we have to send the update to the relation mapper. @@ -3674,7 +3743,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence) * possibly-inaccurate values of relpages etc, but those will be fixed up * later. */ - if (RelationIsMapped(relation)) + else if (RelationIsMapped(relation)) { /* This case is only supported for indexes */ Assert(relation->rd_rel->relkind == RELKIND_INDEX); @@ -3720,9 +3789,12 @@ RelationSetNewRelfilenode(Relation relation, char persistence) CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); } - heap_freetuple(tuple); + if (update_catalog) + { + heap_freetuple(tuple); - table_close(pg_class, RowExclusiveLock); + table_close(pg_class, RowExclusiveLock); + } /* * Make the pg_class row change or relation map change visible. This will diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index a2e0f8de7e..b1a50e82d9 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -44,6 +44,7 @@ #include "catalog/namespace.h" #include "catalog/pg_authid.h" #include "catalog/storage.h" +#include "catalog/storage_gtt.h" #include "commands/async.h" #include "commands/prepare.h" #include "commands/trigger.h" @@ -152,6 +153,18 @@ char *GUC_check_errmsg_string; char *GUC_check_errdetail_string; char *GUC_check_errhint_string; +/* + * num = 0 means disable global temporary table feature. + * table schema are still saved in catalog. + * + * num > 0 means allows the database to manage multiple active tables at the same time. + */ +#define MIN_NUM_ACTIVE_GTT 0 +#define DEFAULT_NUM_ACTIVE_GTT 1000 +#define MAX_NUM_ACTIVE_GTT 1000000 + +int max_active_gtt = MIN_NUM_ACTIVE_GTT; + static void do_serialize(char **destptr, Size *maxbytes, const char *fmt,...) pg_attribute_printf(3, 4); static void set_config_sourcefile(const char *name, char *sourcefile, @@ -2125,6 +2138,15 @@ static struct config_bool ConfigureNamesBool[] = static struct config_int ConfigureNamesInt[] = { + { + {"max_active_global_temporary_table", PGC_POSTMASTER, UNGROUPED, + gettext_noop("max active global temporary table."), + NULL + }, + &max_active_gtt, + DEFAULT_NUM_ACTIVE_GTT, MIN_NUM_ACTIVE_GTT, MAX_NUM_ACTIVE_GTT, + NULL, NULL, NULL + }, { {"archive_timeout", PGC_SIGHUP, WAL_ARCHIVING, gettext_noop("Forces a switch to the next WAL file if a " @@ -2673,6 +2695,16 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"vacuum_gtt_defer_check_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("The defer check age of GTT, used to check expired data after vacuum."), + NULL + }, + &vacuum_gtt_defer_check_age, + 10000, 0, 1000000, + NULL, NULL, NULL + }, + /* * See also CheckRequiredParameterValues() if this parameter changes */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 34b91bb226..eb27727ff7 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -2527,6 +2527,10 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo) dopt->no_unlogged_table_data) return; + /* Don't dump data in global temporary table/sequence */ + if (tbinfo->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + return; + /* Check that the data is not explicitly excluded */ if (simple_oid_list_member(&tabledata_exclude_oids, tbinfo->dobj.catId.oid)) @@ -15962,6 +15966,7 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) char *ftoptions = NULL; char *srvname = NULL; char *foreign = ""; + char *table_type = NULL; switch (tbinfo->relkind) { @@ -16015,9 +16020,15 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) binary_upgrade_set_pg_class_oids(fout, q, tbinfo->dobj.catId.oid, false); + if (tbinfo->relpersistence == RELPERSISTENCE_UNLOGGED) + table_type = "UNLOGGED "; + else if (tbinfo->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + table_type = "GLOBAL TEMPORARY "; + else + table_type = ""; + appendPQExpBuffer(q, "CREATE %s%s %s", - tbinfo->relpersistence == RELPERSISTENCE_UNLOGGED ? - "UNLOGGED " : "", + table_type, reltypename, qualrelname); @@ -16381,6 +16392,15 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) } } + /* + * Transaction information for the global temporary table is not stored + * in the pg_class. + */ + if (tbinfo->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + { + Assert(tbinfo->frozenxid == 0); + Assert(tbinfo->minmxid == 0); + } /* * In binary_upgrade mode, arrange to restore the old relfrozenxid and * relminmxid of all vacuumable relations. (While vacuum.c processes @@ -16388,7 +16408,7 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) * of other relations; so this "if" lacks RELKIND_TOASTVALUE, and the * child toast table is handled below.) */ - if (dopt->binary_upgrade && + else if (dopt->binary_upgrade && (tbinfo->relkind == RELKIND_RELATION || tbinfo->relkind == RELKIND_MATVIEW)) { @@ -17391,6 +17411,7 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) PQExpBuffer query = createPQExpBuffer(); PQExpBuffer delqry = createPQExpBuffer(); char *qseqname; + bool global_temp_seq = false; qseqname = pg_strdup(fmtId(tbinfo->dobj.name)); @@ -17400,9 +17421,12 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) "SELECT format_type(seqtypid, NULL), " "seqstart, seqincrement, " "seqmax, seqmin, " - "seqcache, seqcycle " - "FROM pg_catalog.pg_sequence " - "WHERE seqrelid = '%u'::oid", + "seqcache, seqcycle, " + "c.relpersistence " + "FROM pg_catalog.pg_sequence s, " + "pg_catalog.pg_class c " + "WHERE seqrelid = '%u'::oid " + "and s.seqrelid = c.oid", tbinfo->dobj.catId.oid); } else if (fout->remoteVersion >= 80400) @@ -17447,6 +17471,9 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) cache = PQgetvalue(res, 0, 5); cycled = (strcmp(PQgetvalue(res, 0, 6), "t") == 0); + if (fout->remoteVersion >= 140000) + global_temp_seq = (strcmp(PQgetvalue(res, 0, 7), "g") == 0); + /* Calculate default limits for a sequence of this type */ is_ascending = (incby[0] != '-'); if (strcmp(seqtype, "smallint") == 0) @@ -17524,9 +17551,13 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) } else { - appendPQExpBuffer(query, - "CREATE SEQUENCE %s\n", - fmtQualifiedDumpable(tbinfo)); + appendPQExpBuffer(query, "CREATE "); + + if (global_temp_seq) + appendPQExpBuffer(query, "GLOBAL TEMP "); + + appendPQExpBuffer(query, "SEQUENCE %s\n", + fmtQualifiedDumpable(tbinfo)); if (strcmp(seqtype, "bigint") != 0) appendPQExpBuffer(query, " AS %s\n", seqtype); diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 0c47a6b8cc..1760f372b5 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -88,7 +88,7 @@ check_and_dump_old_cluster(bool live_check) start_postmaster(&old_cluster, true); /* Extract a list of databases and tables from the old cluster */ - get_db_and_rel_infos(&old_cluster); + get_db_and_rel_infos(&old_cluster, true); init_tablespaces(); @@ -178,7 +178,7 @@ check_and_dump_old_cluster(bool live_check) void check_new_cluster(void) { - get_db_and_rel_infos(&new_cluster); + get_db_and_rel_infos(&new_cluster, false); check_new_cluster_is_empty(); check_databases_are_compatible(); diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index 5d9a26cf82..2de11d5d70 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -21,7 +21,7 @@ static void report_unmatched_relation(const RelInfo *rel, const DbInfo *db, bool is_new_db); static void free_db_and_rel_infos(DbInfoArr *db_arr); static void get_db_infos(ClusterInfo *cluster); -static void get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo); +static void get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo, bool skip_gtt); static void free_rel_infos(RelInfoArr *rel_arr); static void print_db_infos(DbInfoArr *dbinfo); static void print_rel_infos(RelInfoArr *rel_arr); @@ -304,9 +304,11 @@ print_maps(FileNameMap *maps, int n_maps, const char *db_name) * * higher level routine to generate dbinfos for the database running * on the given "port". Assumes that server is already running. + * for check object need check global temp table, + * for create object skip global temp table. */ void -get_db_and_rel_infos(ClusterInfo *cluster) +get_db_and_rel_infos(ClusterInfo *cluster, bool skip_gtt) { int dbnum; @@ -316,7 +318,7 @@ get_db_and_rel_infos(ClusterInfo *cluster) get_db_infos(cluster); for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) - get_rel_infos(cluster, &cluster->dbarr.dbs[dbnum]); + get_rel_infos(cluster, &cluster->dbarr.dbs[dbnum], skip_gtt); if (cluster == &old_cluster) pg_log(PG_VERBOSE, "\nsource databases:\n"); @@ -404,7 +406,7 @@ get_db_infos(ClusterInfo *cluster) * This allows later processing to match up old and new databases efficiently. */ static void -get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) +get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo, bool skip_gtt) { PGconn *conn = connectToServer(cluster, dbinfo->db_name); @@ -447,8 +449,17 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) " FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n " " ON c.relnamespace = n.oid " " WHERE relkind IN (" CppAsString2(RELKIND_RELATION) ", " - CppAsString2(RELKIND_MATVIEW) ") AND " + CppAsString2(RELKIND_MATVIEW) ") AND "); + + if (skip_gtt) + { + /* exclude global temp tables */ + snprintf(query + strlen(query), sizeof(query) - strlen(query), + " relpersistence != " CppAsString2(RELPERSISTENCE_GLOBAL_TEMP) " AND "); + } + /* exclude possible orphaned temp tables */ + snprintf(query + strlen(query), sizeof(query) - strlen(query), " ((n.nspname !~ '^pg_temp_' AND " " n.nspname !~ '^pg_toast_temp_' AND " " n.nspname NOT IN ('pg_catalog', 'information_schema', " diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index e23b8ca88d..729a9c61e8 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -407,7 +407,7 @@ create_new_objects(void) set_frozenxids(true); /* update new_cluster info now that we have objects in the databases */ - get_db_and_rel_infos(&new_cluster); + get_db_and_rel_infos(&new_cluster, true); } /* @@ -638,7 +638,10 @@ set_frozenxids(bool minmxid_only) "UPDATE pg_catalog.pg_class " "SET relfrozenxid = '%u' " /* only heap, materialized view, and TOAST are vacuumed */ - "WHERE relkind IN (" + "WHERE " + /* exclude global temp tables */ + " relpersistence != " CppAsString2(RELPERSISTENCE_GLOBAL_TEMP) " AND " + "relkind IN (" CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ", " CppAsString2(RELKIND_TOASTVALUE) ")", @@ -649,7 +652,10 @@ set_frozenxids(bool minmxid_only) "UPDATE pg_catalog.pg_class " "SET relminmxid = '%u' " /* only heap, materialized view, and TOAST are vacuumed */ - "WHERE relkind IN (" + "WHERE " + /* exclude global temp tables */ + " relpersistence != " CppAsString2(RELPERSISTENCE_GLOBAL_TEMP) " AND " + "relkind IN (" CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ", " CppAsString2(RELKIND_TOASTVALUE) ")", diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index f7eb2349e6..54fe4e38c4 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -387,7 +387,7 @@ void check_loadable_libraries(void); FileNameMap *gen_db_file_maps(DbInfo *old_db, DbInfo *new_db, int *nmaps, const char *old_pgdata, const char *new_pgdata); -void get_db_and_rel_infos(ClusterInfo *cluster); +void get_db_and_rel_infos(ClusterInfo *cluster, bool skip_gtt); void print_maps(FileNameMap *maps, int n, const char *db_name); diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index ba658f731b..92e5f1f035 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -4067,7 +4067,8 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys if (pset.sversion >= 90100) { appendPQExpBuffer(&buf, - ",\n CASE c.relpersistence WHEN 'p' THEN '%s' WHEN 't' THEN '%s' WHEN 'u' THEN '%s' END as \"%s\"", + ",\n CASE c.relpersistence WHEN 'g' THEN '%s' WHEN 'p' THEN '%s' WHEN 't' THEN '%s' WHEN 'u' THEN '%s' END as \"%s\"", + gettext_noop("session"), gettext_noop("permanent"), gettext_noop("temporary"), gettext_noop("unlogged"), diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index d6bf725971..140870197f 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -1055,6 +1055,8 @@ static const pgsql_thing_t words_after_create[] = { {"FOREIGN TABLE", NULL, NULL, NULL}, {"FUNCTION", NULL, NULL, Query_for_list_of_functions}, {"GROUP", Query_for_list_of_roles}, + {"GLOBAL", NULL, NULL, NULL, THING_NO_DROP | THING_NO_ALTER}, /* for CREATE GLOBAL TEMP/TEMPORARY TABLE + * ... */ {"INDEX", NULL, NULL, &Query_for_list_of_indexes}, {"LANGUAGE", Query_for_list_of_languages}, {"LARGE OBJECT", NULL, NULL, NULL, THING_NO_CREATE | THING_NO_DROP}, @@ -2488,6 +2490,9 @@ psql_completion(const char *text, int start, int end) /* CREATE FOREIGN DATA WRAPPER */ else if (Matches("CREATE", "FOREIGN", "DATA", "WRAPPER", MatchAny)) COMPLETE_WITH("HANDLER", "VALIDATOR", "OPTIONS"); + /* CREATE GLOBAL TEMP/TEMPORARY*/ + else if (Matches("CREATE", "GLOBAL")) + COMPLETE_WITH("TEMP", "TEMPORARY"); /* CREATE INDEX --- is allowed inside CREATE SCHEMA, so use TailMatches */ /* First off we complete CREATE UNIQUE with "INDEX" */ @@ -2696,6 +2701,8 @@ psql_completion(const char *text, int start, int end) /* Complete "CREATE TEMP/TEMPORARY" with the possible temp objects */ else if (TailMatches("CREATE", "TEMP|TEMPORARY")) COMPLETE_WITH("SEQUENCE", "TABLE", "VIEW"); + else if (TailMatches("CREATE", "GLOBAL", "TEMP|TEMPORARY")) + COMPLETE_WITH("TABLE", "SEQUENCE"); /* Complete "CREATE UNLOGGED" with TABLE or MATVIEW */ else if (TailMatches("CREATE", "UNLOGGED")) COMPLETE_WITH("TABLE", "MATERIALIZED VIEW"); diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index 6ce480b49c..a0ccfb3d77 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -59,7 +59,8 @@ extern Relation heap_create(const char *relname, bool mapped_relation, bool allow_system_table_mods, TransactionId *relfrozenxid, - MultiXactId *relminmxid); + MultiXactId *relminmxid, + bool skip_create_storage); extern Oid heap_create_with_catalog(const char *relname, Oid relnamespace, diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index fef9945ed8..9176b7dcc0 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -172,6 +172,7 @@ DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, ClassTblspcRelfilenodeInd #define RELPERSISTENCE_PERMANENT 'p' /* regular table */ #define RELPERSISTENCE_UNLOGGED 'u' /* unlogged permanent table */ #define RELPERSISTENCE_TEMP 't' /* temporary table */ +#define RELPERSISTENCE_GLOBAL_TEMP 'g' /* global temporary table */ /* default selection for replica identity (primary key or nothing) */ #define REPLICA_IDENTITY_DEFAULT 'd' diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 8cd0252082..dddf4ba3b9 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5671,6 +5671,40 @@ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', prosrc => 'pg_stat_get_xact_function_self_time' }, +# For global temporary table +{ oid => '9874', + descr => 'List local statistics for global temporary table', + proname => 'pg_get_gtt_statistics', provolatile => 'v', proparallel => 'u', + prorettype => 'record', proretset => 't', prorows => '10', proargtypes => 'oid int4 anyelement', + proallargtypes => '{oid,int4,anyelement,oid,int2,bool,float4,int4,float4,int2,int2,int2,int2,int2,oid,oid,oid,oid,oid,oid,oid,oid,oid,oid,_float4,_float4,_float4,_float4,_float4,anyarray,anyarray,anyarray,anyarray,anyarray}', + proargmodes => '{i,i,i,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{relid,att,x,starelid,staattnum,stainherit,stanullfrac,stawidth,stadistinct,stakind1,stakind2,stakind3,stakind4,stakind5,staop1,staop2,staop3,staop4,staop5,stacoll1,stacoll2,stacoll3,stacoll4,stacoll5,stanumbers1,stanumbers2,stanumbers3,stanumbers4,stanumbers5,stavalues1,stavalues2,stavalues3,stavalues4,stavalues5}', + prosrc => 'pg_get_gtt_statistics' }, +{ oid => '9875', + descr => 'List local relstats for global temporary table', + proname => 'pg_get_gtt_relstats', provolatile => 'v', proparallel => 'u', + prorettype => 'record', proretset => 't', prorows => '10', proargtypes => 'oid', + proallargtypes => '{oid,oid,int4,float4,int4,xid,xid}', + proargmodes => '{i,o,o,o,o,o,o}', + proargnames => '{relid,relfilenode,relpages,reltuples,relallvisible,relfrozenxid,relminmxid}', + prosrc => 'pg_get_gtt_relstats' }, +{ oid => '9876', + descr => 'List attached pid for one global temporary table', + proname => 'pg_gtt_attached_pid', provolatile => 'v', proparallel => 'u', + prorettype => 'record', proretset => 't', prorows => '10', proargtypes => 'oid', + proallargtypes => '{oid,oid,int4}', + proargmodes => '{i,o,o}', + proargnames => '{relid,relid,pid}', + prosrc => 'pg_gtt_attached_pid' }, +{ oid => '9877', + descr => 'List those backends that have used global temporary table', + proname => 'pg_list_gtt_relfrozenxids', provolatile => 'v', proparallel => 'u', + prorettype => 'record', proretset => 't', prorows => '10', proargtypes => '', + proallargtypes => '{int4,xid}', + proargmodes => '{o,o}', + proargnames => '{pid,relfrozenxid}', + prosrc => 'pg_list_gtt_relfrozenxids' }, + { oid => '3788', descr => 'statistics: timestamp of the current statistics snapshot', proname => 'pg_stat_get_snapshot_timestamp', provolatile => 's', diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h index 0ab32b44e9..92e9f8ba48 100644 --- a/src/include/catalog/storage.h +++ b/src/include/catalog/storage.h @@ -22,7 +22,7 @@ /* GUC variables */ extern int wal_skip_threshold; -extern SMgrRelation RelationCreateStorage(RelFileNode rnode, char relpersistence); +extern SMgrRelation RelationCreateStorage(RelFileNode rnode, char relpersistence, Relation rel); extern void RelationDropStorage(Relation rel); extern void RelationPreserveStorage(RelFileNode rnode, bool atCommit); extern void RelationPreTruncate(Relation rel); diff --git a/src/include/catalog/storage_gtt.h b/src/include/catalog/storage_gtt.h new file mode 100644 index 0000000000..d48162c6b8 --- /dev/null +++ b/src/include/catalog/storage_gtt.h @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------- + * + * storage_gtt.h + * prototypes for functions in backend/catalog/storage_gtt.c + * + * src/include/catalog/storage_gtt.h + * + *------------------------------------------------------------------------- + */ +#ifndef STORAGE_GTT_H +#define STORAGE_GTT_H + +#include "access/htup.h" +#include "storage/block.h" +#include "storage/relfilenode.h" +#include "nodes/execnodes.h" +#include "utils/relcache.h" + +extern int vacuum_gtt_defer_check_age; + +extern Size active_gtt_shared_hash_size(void); +extern void active_gtt_shared_hash_init(void); +extern void remember_gtt_storage_info(RelFileNode rnode, Relation rel); +extern void forget_gtt_storage_info(Oid relid, RelFileNode relfilenode, bool isCommit); +extern bool is_other_backend_use_gtt(Oid relid); +extern bool gtt_storage_attached(Oid relid); +extern void up_gtt_att_statistic(Oid reloid, int attnum, bool inh, int natts, + TupleDesc tupleDescriptor, Datum *values, bool *isnull); +extern HeapTuple get_gtt_att_statistic(Oid reloid, int attnum, bool inh); +extern void release_gtt_statistic_cache(HeapTuple tup); +extern void up_gtt_relstats(Oid relid, + BlockNumber num_pages, + double num_tuples, + BlockNumber num_all_visible_pages, + TransactionId relfrozenxid, + TransactionId relminmxid); +extern bool get_gtt_relstats(Oid relid, BlockNumber *relpages, double *reltuples, + BlockNumber *relallvisible, TransactionId *relfrozenxid, + TransactionId *relminmxid); +extern void force_enable_gtt_index(Relation index); +extern void gtt_fix_index_backend_state(Relation index); +extern void init_gtt_storage(CmdType operation, ResultRelInfo *resultRelInfo); +extern Oid gtt_fetch_current_relfilenode(Oid relid); +extern void gtt_switch_rel_relfilenode(Oid rel1, Oid relfilenode1, Oid rel2, Oid relfilenode2, bool footprint); + +#endif /* STORAGE_H */ diff --git a/src/include/commands/sequence.h b/src/include/commands/sequence.h index 40544dd4c7..7b66d808fc 100644 --- a/src/include/commands/sequence.h +++ b/src/include/commands/sequence.h @@ -65,5 +65,6 @@ extern void seq_redo(XLogReaderState *rptr); extern void seq_desc(StringInfo buf, XLogReaderState *rptr); extern const char *seq_identify(uint8 info); extern void seq_mask(char *pagedata, BlockNumber blkno); +extern void gtt_init_seq(Relation rel); #endif /* SEQUENCE_H */ diff --git a/src/include/parser/parse_relation.h b/src/include/parser/parse_relation.h index 8336c2c5a2..bddcfe7256 100644 --- a/src/include/parser/parse_relation.h +++ b/src/include/parser/parse_relation.h @@ -120,4 +120,7 @@ extern Oid attnumTypeId(Relation rd, int attid); extern Oid attnumCollationId(Relation rd, int attid); extern bool isQueryUsingTempRelation(Query *query); +/* global temp table check */ +extern bool is_query_using_gtt(Query *query); + #endif /* PARSE_RELATION_H */ diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index c86ccdaf60..6b395551c1 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -399,6 +399,8 @@ do { \ #define PageClearPrunable(page) \ (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) +#define GlobalTempRelationPageIsNotInitialized(rel, page) \ + ((rel)->rd_rel->relpersistence == RELPERSISTENCE_GLOBAL_TEMP && PageIsNew(page)) /* ---------------------------------------------------------------- * extern declarations diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index a8f052e484..4b4ed1a13a 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -189,6 +189,7 @@ typedef enum BuiltinTrancheIds LWTRANCHE_SHARED_TIDBITMAP, LWTRANCHE_PARALLEL_APPEND, LWTRANCHE_PER_XACT_PREDICATE_LIST, + LWTRANCHE_GTT_CTL, LWTRANCHE_FIRST_USER_DEFINED } BuiltinTrancheIds; diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index be67d8a861..e2f8bb5162 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -157,6 +157,8 @@ struct PGPROC Oid tempNamespaceId; /* OID of temp schema this backend is * using */ + TransactionId backend_gtt_frozenxid; /* backend level global temp table relfrozenxid */ + bool isBackgroundWorker; /* true if background worker. */ /* diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index b01fa52139..8efffa55ac 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -94,4 +94,6 @@ extern void ProcArraySetReplicationSlotXmin(TransactionId xmin, extern void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, TransactionId *catalog_xmin); +extern int list_all_backend_gtt_frozenxids(int max_size, int *pids, uint32 *xids, int *n); + #endif /* PROCARRAY_H */ diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index a7c3a4958e..ce98b47d74 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -282,6 +282,10 @@ extern int tcp_user_timeout; extern bool trace_sort; #endif +/* global temporary table */ +extern int max_active_gtt; +/* end */ + /* * Functions exported by guc.c */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index b4faa1c123..a74558a838 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -57,7 +57,7 @@ typedef struct RelationData SMgrRelation rd_smgr; /* cached file handle, or NULL */ int rd_refcnt; /* reference count */ BackendId rd_backend; /* owning backend id, if temporary relation */ - bool rd_islocaltemp; /* rel is a temp rel of this session */ + bool rd_islocaltemp; /* rel is a temp rel of this session */ bool rd_isnailed; /* rel is nailed in cache */ bool rd_isvalid; /* relcache entry is valid */ bool rd_indexvalid; /* is rd_indexlist valid? (also rd_pkindex and @@ -326,6 +326,7 @@ typedef struct StdRdOptions int parallel_workers; /* max number of parallel workers */ StdRdOptIndexCleanup vacuum_index_cleanup; /* controls index vacuuming */ bool vacuum_truncate; /* enables vacuum to truncate a relation */ + bool on_commit_delete_rows; /* global temp table */ } StdRdOptions; #define HEAP_MIN_FILLFACTOR 10 @@ -608,11 +609,13 @@ RelationGetSmgr(Relation rel) * True if relation's pages are stored in local buffers. */ #define RelationUsesLocalBuffers(relation) \ - ((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + ((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP || \ + (relation)->rd_rel->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) /* * RELATION_IS_LOCAL - * If a rel is either temp or newly created in the current transaction, + * If a rel is either local temp or global temp relation + * or newly created in the current transaction, * it can be assumed to be accessible only to the current backend. * This is typically used to decide that we can skip acquiring locks. * @@ -620,6 +623,7 @@ RelationGetSmgr(Relation rel) */ #define RELATION_IS_LOCAL(relation) \ ((relation)->rd_islocaltemp || \ + (relation)->rd_rel->relpersistence == RELPERSISTENCE_GLOBAL_TEMP || \ (relation)->rd_createSubid != InvalidSubTransactionId) /* @@ -632,6 +636,30 @@ RelationGetSmgr(Relation rel) ((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP && \ !(relation)->rd_islocaltemp) +/* + * RELATION_IS_TEMP_ON_CURRENT_SESSION + * Test a rel is either local temp relation of this session + * or global temp relation. + */ +#define RELATION_IS_TEMP_ON_CURRENT_SESSION(relation) \ + ((relation)->rd_islocaltemp || \ + (relation)->rd_rel->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + +/* + * RELATION_IS_TEMP + * Test a rel is local temp relation or global temporary relation. + */ +#define RELATION_IS_TEMP(relation) \ + ((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP || \ + (relation)->rd_rel->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + +/* + * RelpersistenceTsTemp + * Test a relpersistence is local temp relation or global temporary relation. + */ +#define RelpersistenceTsTemp(relpersistence) \ + (relpersistence == RELPERSISTENCE_TEMP || \ + relpersistence == RELPERSISTENCE_GLOBAL_TEMP) /* * RelationIsScannable @@ -677,6 +705,19 @@ RelationGetSmgr(Relation rel) (relation)->rd_rel->relkind != RELKIND_FOREIGN_TABLE && \ !IsCatalogRelation(relation)) +/* For global temporary table */ +#define RELATION_IS_GLOBAL_TEMP(relation) ((relation)->rd_rel->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) + +/* Get on commit clause value only for global temporary table */ +#define RELATION_GTT_ON_COMMIT_DELETE(relation) \ + ((relation)->rd_options && \ + ((relation)->rd_rel->relkind == RELKIND_RELATION || (relation)->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) && \ + (relation)->rd_rel->relpersistence == RELPERSISTENCE_GLOBAL_TEMP ? \ + ((StdRdOptions *) (relation)->rd_options)->on_commit_delete_rows : false) + +/* Get relpersistence for relation */ +#define RelationGetRelPersistence(relation) ((relation)->rd_rel->relpersistence) + /* routines in utils/cache/relcache.c */ extern void RelationIncrementReferenceCount(Relation rel); extern void RelationDecrementReferenceCount(Relation rel); -- 2.27.0