From 063c8cc9db40a76581515db12f50432b97725abb Mon Sep 17 00:00:00 2001 From: Peter Smith Date: Tue, 23 Dec 2025 15:25:30 +1100 Subject: [PATCH v20251223] VCI - main - part3 --- contrib/vci/include/vci_supported_oid.h | 34 + contrib/vci/storage/Makefile | 8 +- contrib/vci/storage/meson.build | 8 +- contrib/vci/storage/vci_index.c | 2142 +++++++++++++++++++++++++++++++ contrib/vci/storage/vci_internal_view.c | 663 ++++++++++ contrib/vci/storage/vci_tidcrid.c | 1774 +++++++++++++++++++++++++ contrib/vci/storage/vci_wos.c | 263 ++++ 7 files changed, 4884 insertions(+), 8 deletions(-) create mode 100644 contrib/vci/include/vci_supported_oid.h create mode 100644 contrib/vci/storage/vci_index.c create mode 100644 contrib/vci/storage/vci_internal_view.c create mode 100644 contrib/vci/storage/vci_tidcrid.c create mode 100644 contrib/vci/storage/vci_wos.c diff --git a/contrib/vci/include/vci_supported_oid.h b/contrib/vci/include/vci_supported_oid.h new file mode 100644 index 0000000..504de68 --- /dev/null +++ b/contrib/vci/include/vci_supported_oid.h @@ -0,0 +1,34 @@ +/*------------------------------------------------------------------------- + * + * vci_supported_oid.h + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/include/vci_supported_oid.h + * + *------------------------------------------------------------------------- + */ + +#ifndef VCI_SUPPORTED_OID_H +#define VCI_SUPPORTED_OID_H + +#include "utils/snapshot.h" + +#define VCI_MAX_APPLICABLE_UDFS (32) + +typedef struct +{ + int num_applicable_udfs; + Oid applicable_udfs[VCI_MAX_APPLICABLE_UDFS]; + Oid vci_runs_in_plan_funcoid; + Oid vci_always_return_true_funcoid; +} vci_special_udf_info_t; + +extern vci_special_udf_info_t vci_special_udf_info; + +extern bool vci_is_supported_type(Oid oid); +extern bool vci_is_supported_function(Oid oid); +extern void vci_register_applicable_udf(Snapshot snapshot); + +#endif /* VCI_SUPPORTED_OID_H */ diff --git a/contrib/vci/storage/Makefile b/contrib/vci/storage/Makefile index 2ea8365..364a944 100644 --- a/contrib/vci/storage/Makefile +++ b/contrib/vci/storage/Makefile @@ -6,15 +6,15 @@ SUBOBJS = \ # vci_columns_data.o \ # vci_fetch.o \ # vci_freelist.o \ -# vci_index.o \ -# vci_internal_view.o \ + vci_index.o \ + vci_internal_view.o \ # vci_low_utils.o \ # vci_memory_entry.o \ vci_ros.o \ vci_ros_command.o \ vci_ros_daemon.o \ -# vci_tidcrid.o \ -# vci_wos.o \ + vci_tidcrid.o \ + vci_wos.o \ # vci_xact.o EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) diff --git a/contrib/vci/storage/meson.build b/contrib/vci/storage/meson.build index fefe15b..87fa17a 100644 --- a/contrib/vci/storage/meson.build +++ b/contrib/vci/storage/meson.build @@ -6,14 +6,14 @@ vci_storage_sources = files( # 'vci_columns_data.c', # 'vci_fetch.c', # 'vci_freelist.c', -# 'vci_index.c', -# 'vci_internal_view.c', + 'vci_index.c', + 'vci_internal_view.c', # 'vci_low_utils.c', # 'vci_memory_entry.c', 'vci_ros.c', 'vci_ros_command.c', 'vci_ros_daemon.c', -# 'vci_tidcrid.c', -# 'vci_wos.c', + 'vci_tidcrid.c', + 'vci_wos.c', # 'vci_xact.c', ) diff --git a/contrib/vci/storage/vci_index.c b/contrib/vci/storage/vci_index.c new file mode 100644 index 0000000..7dbaa31 --- /dev/null +++ b/contrib/vci/storage/vci_index.c @@ -0,0 +1,2142 @@ +/*------------------------------------------------------------------------- + * + * vci_index.c + * Index Access Method + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/storage/vci_index.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/heapam_xlog.h" +#include "access/htup_details.h" +#include "access/multixact.h" +#include "access/reloptions.h" +#include "access/sysattr.h" +#include "access/toast_compression.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "access/xloginsert.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/heap.h" +#include "catalog/index.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/pg_rewrite.h" +#include "catalog/pg_type.h" +#include "catalog/storage.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/tablecmds.h" +#include "executor/executor.h" +#include "executor/nodeModifyTable.h" +#include "executor/spi.h" +#include "fmgr.h" +#include "lib/stringinfo.h" +#include "miscadmin.h" +#include "nodes/execnodes.h" +#include "nodes/makefuncs.h" +#include "nodes/pathnodes.h" +#include "access/relation.h" +#include "port.h" +#include "rewrite/rewriteDefine.h" +#include "rewrite/rewriteRemove.h" +#include "rewrite/rewriteSupport.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/predicate.h" +#include "storage/smgr.h" +#include "tcop/utility.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/relcache.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/varlena.h" + +#include "vci.h" +#include "vci_columns.h" +#include "vci_columns_data.h" + +#include "vci_mem.h" +#include "vci_ros.h" +#include "vci_ros_command.h" +#include "vci_ros_daemon.h" +#include "vci_supported_oid.h" +#include "vci_tidcrid.h" +#include "vci_wos.h" +#include "vci_xact.h" + +#ifdef WIN32 +#define __func__ __FUNCTION__ +#endif + +#ifdef HAVE_DESIGNATED_INITIALIZERS +#define SFINIT(f, ...) f = __VA_ARGS__ +#else +#define SFINIT(f, ...) __VA_ARGS__ +#endif + +/** + * Data Relation + */ +#define VCI_RELTYPE_DATA ('d') + +/** + * Meta Relation + */ +#define VCI_RELTYPE_META ('m') + +/** + * WOS Relation + */ +#define VCI_RELTYPE_WOS ('W') + +/** + * ROS Relation + */ +#define VCI_RELTYPE_ROS ('R') + +/** + * TIDCRID Relation + */ +#define VCI_RELTYPE_TIDCRID ('T') + +/* local functions */ +static TupleDesc get_tuple_desc_for_build(Relation heapRel, Relation indexRel, bool isctid); +static IndexBuildResult *vci_inner_build(Relation, Relation, IndexInfo *); +static void vci_inner_buildempty(Relation indexRelation); +static bool vci_inner_insert(Relation, ItemPointer); +static bool vci_inner_insert_in_copy(Relation, ItemPointer); +static IndexBulkDeleteResult *vci_inner_vacuumcleanup(IndexVacuumInfo *, IndexBulkDeleteResult *); +static void vci_modify_column_information(bool isctid, Relation indexRel, Relation heapRel); + +IndexBuildResult *vci_build(Relation heap, Relation index, IndexInfo *indexInfo); +void vci_buildempty(Relation index); +bool vci_insert(Relation indexRel, Datum *values, bool *isnull, + ItemPointer heap_tid, Relation heapRel, + IndexUniqueCheck checkUnique, + bool indexUnchanged, + struct IndexInfo *indexInfo); +IndexBulkDeleteResult *vci_bulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callback_state); +IndexBulkDeleteResult *vci_vacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats); +void vci_costestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages); +int vci_gettreeheight(Relation rel); +bytea *vci_options(Datum reloptions, bool validate); +IndexScanDesc vci_beginscan(Relation rel, int nkeys, int norderbys); +void vci_rescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, + ScanKey orderbys, int norderbys); +bool vci_validate(Oid opclassoid); +void vci_endscan(IndexScanDesc scan); +void vci_markpos(IndexScanDesc scan); +void vci_restrpos(IndexScanDesc scan); + +static char relNameBuf[NAMEDATALEN]; + +static bool copy_with_freeze_option; + +bool +vci_isVciAdditionalRelation(Relation rel) +{ + return vci_isVciAdditionalRelationTuple(rel->rd_id, rel->rd_rel); +} + +bool +vci_isVciAdditionalRelationTuple(Oid reloid, Form_pg_class reltuple) +{ + if (reltuple->relkind == RELKIND_MATVIEW) + { + int ret; + int dummy1; + int dummy2; + char dummy3; + + ret = sscanf(NameStr(reltuple->relname), VCI_INTERNAL_RELATION_TEMPLATE, + &dummy1, &dummy2, &dummy3); + + return (ret == 3); + } + + return false; +} + +/* custom index */ + +IndexBuildResult * +vci_build(Relation heapRel, Relation indexRel, IndexInfo *indexInfo) +{ + IndexBuildResult *result; + vci_id_t vciid; + + if (!fullPageWrites) + { + if (vci_rebuild_command == vcirc_invalid) + /* CREATE INDEX */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not work under full_page_writes=off", VCI_STRING))); + else + /* TRUNCATE, VACUUM FULL */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not work under full_page_writes=off", VCI_STRING), + errhint("Use DROP INDEX \"%s\"", RelationGetRelationName(indexRel)))); + } + + result = vci_inner_build(heapRel, indexRel, indexInfo); + + vciid.oid = RelationGetRelid(indexRel); + vciid.dbid = MyDatabaseId; + + vci_TouchMemoryEntry(&vciid, + get_rel_tablespace(indexRel->rd_id)); + + return result; +} + +void +vci_buildempty(Relation indexRel) +{ + vci_inner_buildempty(indexRel); + + return; +} + +/* for COPY command */ +#define EXTENT_LIST_UNIT_EXTENSION (1024) + +typedef struct CopyCommandInfo +{ + TransactionId xid; + CommandId cid; + uint64 numAppendedRows; + uint32 *extentList; + uint32 numFilledExtent; + uint32 numAllocatedExtent; +} CopyCommandInfo; + +static CopyCommandInfo copyInfo = { + SFINIT(xid, InvalidTransactionId), + SFINIT(cid, InvalidCommandId), + SFINIT(numAppendedRows, 0), + SFINIT(extentList, NULL), + SFINIT(numFilledExtent, 0), + SFINIT(numAllocatedExtent, 0) +}; +static vci_RosCommandContext copyConvContext; + +bool +vci_insert(Relation indexRel, Datum *values, bool *isnull, + ItemPointer heap_tid, Relation heapRel, + IndexUniqueCheck checkUnique, + bool indexUnchanged, + struct IndexInfo *indexInfo) +{ + bool result; + TransactionId xid = GetCurrentTransactionId(); + CommandId cid = GetCurrentCommandId(false); + + Assert(TransactionIdIsValid(xid)); + Assert(InvalidCommandId != cid); + + if (!fullPageWrites) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not work under full_page_writes=off", VCI_STRING), + errhint("Use DROP INDEX \"%s\"", RelationGetRelationName(indexRel)))); + + if (ItemPointerGetOffsetNumber(heap_tid) == FirstOffsetNumber) + { + vci_id_t vciid; + + vciid.oid = RelationGetRelid(indexRel); + vciid.dbid = MyDatabaseId; + + vci_TouchMemoryEntry(&vciid, + get_rel_tablespace(indexRel->rd_id)); + } + + if (TransactionIdEquals(xid, copyInfo.xid) && (cid == copyInfo.cid)) + result = vci_inner_insert_in_copy(indexRel, heap_tid); /* LCOV_EXCL_LINE */ + else + result = vci_inner_insert(indexRel, heap_tid); + + return result; +} + +/** + * vci_bulkdelete + */ +IndexBulkDeleteResult * +vci_bulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callback_state) +{ + return stats; +} + +/** + * vci_vacuumcleanup + */ +IndexBulkDeleteResult * +vci_vacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) +{ + if (info->analyze_only) + return stats; + + vci_inner_vacuumcleanup(info, stats); + + return stats; +} + +/** + * vci_costestimate + */ +void +vci_costestimate(PlannerInfo *root, IndexPath *path, double loop_count, + Cost *indexStartupCost, Cost *indexTotalCost, + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) +{ + /* + * PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexPath + * *path = (IndexPath *) PG_GETARG_POINTER(1); double loop_count = + * PG_GETARG_FLOAT8(2); + */ + + /* always return worst cost value */ + *indexStartupCost = DBL_MAX; + *indexTotalCost = DBL_MAX; + *indexSelectivity = 1.0; + *indexCorrelation = 0.0; + *indexPages = ((BlockNumber) 0xFFFFFFFE); /* MaxBlockNumber */ + + /** + * Disabled nodes are also a cost metric (see Commit e222534), so set a + * high value to ensure an Index Scan will not be chosen. + */ + path->path.disabled_nodes = INT_MAX; + + return; +} + +int +vci_gettreeheight(Relation rel) +{ + int result; + + result = 0; + return result; +} + +bytea * +vci_options(Datum reloptions, bool validate) +{ + return NULL; +} + +bool +vci_validate(Oid opclassoid) +{ + /* pass */ + return true; +} + +/* LCOV_EXCL_START */ +IndexScanDesc +vci_beginscan(Relation rel, int nkeys, int norderbys) +{ + IndexScanDesc result; + + /* + * Relation indexRel = (Relation) PG_GETARG_POINTER(0); int nkeys = + * PG_GETARG_INT32(1); int norderbys = PG_GETARG_INT32(2); + */ + + result = NULL; + + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected index access method call : \"%s\" ", __func__))); + + return result; +} + +void +vci_rescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, + ScanKey orderbys, int norderbys) +{ + /* + * IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey keys + * = (ScanKey) PG_GETARG_POINTER(1); int nkeys = PG_GETARG_INT32(2); + * ScanKey orderbys = (ScanKey) PG_GETARG_POINTER(3); int norderbys = + * PG_GETARG_INT32(4); + */ + + /* pass */ + return; +} + +void +vci_endscan(IndexScanDesc scan) +{ + /* + * IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + */ + + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected index access method call : \"%s\" ", __func__))); + + /* pass */ + return; +} + +void +vci_markpos(IndexScanDesc scan) +{ + /* + * IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + */ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected index access method call : \"%s\" ", __func__))); + + /* pass */ + return; +} + +void +vci_restrpos(IndexScanDesc scan) +{ + /* + * IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + */ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected index access method call : \"%s\" ", __func__))); + + /* pass */ + return; +} + +/* LCOV_EXCL_STOP */ + +/* --body-- */ + +static Oid +vci_create_relation(const char *rel_identifier, Relation indexRel, IndexInfo *indexInfo, char vci_reltype) +{ + int natts; + + /* system catalog relation id */ + Relation pg_class; + Relation pg_attr; + + /* new rel, oid, tupdesc */ + Relation new_rel; + Oid new_oid; + TupleDesc new_tupdesc; + + /* attributes */ + Oid ownerid = GetUserId(); + + char relkind = RELKIND_MATVIEW; + + Oid new_type_oid = InvalidOid; + Oid reloftypeid = InvalidOid; + CatalogIndexState indstate; + + char relname[NAMEDATALEN]; /* max 64 characters */ + Oid reltablespace; + Oid relnamespace; + char relpersistence; + Oid accessmtd; + + /* variables for pg_class */ + Form_pg_class new_rel_reltup; + + RangeVar *relrv; + + /* Insert pg_depend table */ + ObjectAddress oaIndex; + ObjectAddress oaNewRel; + + relnamespace = indexRel->rd_rel->relnamespace; + reltablespace = indexRel->rd_rel->reltablespace; + relpersistence = indexRel->rd_rel->relpersistence; + accessmtd = HEAP_TABLE_AM_OID; + + /* function start */ + memset(relname, 0, sizeof(relname)); + strncpy(relname, rel_identifier, sizeof(relname)); + + relrv = makeRangeVar(get_namespace_name(relnamespace), relname, -1); + new_oid = RangeVarGetRelid(relrv, AccessShareLock, true); + + if (OidIsValid(new_oid)) + { + new_rel = relation_open(new_oid, AccessExclusiveLock); + RelationSetNewRelfilenumber(new_rel, new_rel->rd_rel->relpersistence); + + /* + * if (new_rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) + * heap_create_init_fork(new_rel); + */ + + relation_close(new_rel, NoLock); /* do not unlock till end of xact */ + + return new_oid; + } + + /* Generate Data WOS */ + pg_class = table_open(RelationRelationId, RowExclusiveLock); + + /* 4.6.1 get new Oid for new relation */ + + new_oid = GetNewRelFileNumber(reltablespace, pg_class, relpersistence); + + /* TODO */ + + /* + * The following line is meaningful? Or shoud we remove it? + */ + get_user_default_acl(OBJECT_TABLE, ownerid, relnamespace); + + /* 4.6.1.2 create new relation cache entry */ + + /* new tuple descriptor has TID column */ + + switch (vci_reltype) + { + /* WOS */ + case VCI_RELTYPE_WOS: + natts = 2; + new_tupdesc = CreateTemplateTupleDesc(natts); /* no Oid */ + TupleDescInitEntry(new_tupdesc, (AttrNumber) 1, "original_tid", TIDOID, -1, 0); + TupleDescInitEntry(new_tupdesc, (AttrNumber) 2, "xid", INT8OID, -1, 0); + break; + + /* ROS */ + case VCI_RELTYPE_ROS: + natts = 1; + new_tupdesc = CreateTemplateTupleDesc(natts); /* no Oid */ + TupleDescInitEntry(new_tupdesc, (AttrNumber) 1, "bindata", BYTEAOID, -1, 0); /* */ + break; + + /* TID-CRID */ + case VCI_RELTYPE_TIDCRID: + natts = 1; + new_tupdesc = CreateTemplateTupleDesc(natts); /* no Oid */ + TupleDescInitEntry(new_tupdesc, (AttrNumber) 1, "bindata", BYTEAOID, -1, 0); /* */ + break; + + /* LCOV_EXCL_START */ + default: + elog(ERROR, "unexpected vci_reltype"); + break; + /* LCOV_EXCL_STOP */ + } + + /* + * Create the relcache entry (mostly dummy at this point) and the physical + * disk file. (If we fail further down, it's the smgr's responsibility to + * remove the disk file again.) + */ + new_rel = RelationBuildLocalRelation(relname, + relnamespace, + new_tupdesc, + new_oid, + accessmtd, + new_oid, /* relfilenumber */ + reltablespace, + false, /* shared_relation */ + false, /* mapped_relation */ + relpersistence, + relkind); + + /* 4.6.1.3 create new starge for new relation */ + RelationCreateStorage(new_rel->rd_locator, relpersistence, true); + + Assert(new_oid == RelationGetRelid(new_rel)); + + /* 4.6.1.4 add new entry into pg_class */ + new_rel_reltup = new_rel->rd_rel; + new_rel_reltup->relpages = 0; + new_rel_reltup->reltuples = -1; + new_rel_reltup->relallvisible = 0; + new_rel_reltup->relfrozenxid = RecentXmin; + new_rel_reltup->relminmxid = GetOldestMultiXactId(); + new_rel_reltup->relowner = ownerid; + new_rel_reltup->reltype = new_type_oid; + new_rel_reltup->reloftype = reloftypeid; + + /* + * Flag the VCI internal relation MATVIEW as already populated. + * + * Users are not supposed to be querying these internal relations, but + * just in case they do, setting 'relispopulated' prevents an error saying + * the view has not been populated, hinting a "REFRESH MATERIALIZED VIEW" + * is needed. That hint only causes confusion, since the REFRESH is + * disallowed for VCI internal relations. + */ + new_rel_reltup->relispopulated = true; + + /* + * @see + * https://www.postgresql.jp/document/9.4/html/catalog-pg-rewrite.html + */ + new_rel_reltup->relhasrules = true; + + new_rel->rd_att->tdtypeid = new_type_oid; + + InsertPgClassTuple(pg_class, new_rel, new_oid, (Datum) 0, (Datum) 0); + + /* + * 4.6.1.5 -now add tuples to pg_attribute for the attributes in our new + * relation. + */ + + /* + * open pg_attribute and its indexes. + */ + pg_attr = table_open(AttributeRelationId, RowExclusiveLock); + indstate = CatalogOpenIndexes(pg_attr); + + /* + * First we add the user attributes. This is also a convenient place to + * add dependencies on their datatypes and collations. + */ + for (int i = 0; i < natts; i++) + { + Form_pg_attribute attrs; + + /* [TODO] Make sure these are OK? */ + new_tupdesc->compact_attrs[i].attcacheoff = -1; + attrs = TupleDescAttr(new_tupdesc, i); + attrs->attstorage = TYPSTORAGE_PLAIN; + attrs->attcompression = InvalidCompressionMethod; + } + InsertPgAttributeTuples(pg_attr, new_tupdesc, new_oid, NULL, indstate); + + /* + * clean up pg_attribute + */ + CatalogCloseIndexes(indstate); + table_close(pg_attr, RowExclusiveLock); + + /* + * VCI internal relations are dependent on the parent index. + */ + ObjectAddressSet(oaIndex, RelationRelationId, indexRel->rd_id); + ObjectAddressSet(oaNewRel, RelationRelationId, new_oid); + recordDependencyOn(&oaNewRel, &oaIndex, DEPENDENCY_INTERNAL); + + table_close(new_rel, NoLock); /* do not unlock till end of xact */ + table_close(pg_class, RowExclusiveLock); + + return new_oid; +} + +static char * +GenRelName(Relation rel, int16 columnId, char suffix) +{ + snprintf(relNameBuf, NAMEDATALEN, VCI_INTERNAL_RELATION_TEMPLATE, RelationGetRelid(rel), + (0xFFFF & columnId), suffix); + + return relNameBuf; +} + +static void +CheckIndexedRelationKind(Relation rel) +{ + if (rel->rd_rel->relkind == RELKIND_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support index on materialized view", VCI_STRING))); + + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support index on temporary table", VCI_STRING))); +} + +static void +CheckIndexInfo(IndexInfo *indexInfo, Relation indexRel) +{ + /* check Concurrent option first. */ + if (indexInfo->ii_Concurrent) + /* LCOV_EXCL_START */ + elog(PANIC, "should not reach here"); + /* LCOV_EXCL_STOP */ + + if (indexInfo->ii_Predicate != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support partial-index", VCI_STRING))); + + if (indexInfo->ii_Expressions != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support to CREATE INDEX on the expression", VCI_STRING))); + + if (indexInfo->ii_ExclusionOps != NULL || + indexInfo->ii_ExclusionProcs != NULL || + indexInfo->ii_ExclusionStrats != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support EXCLUDE clause", VCI_STRING))); + + for (int i = 0; i < indexInfo->ii_NumIndexAttrs; i++) + { + AttrNumber an = indexInfo->ii_IndexAttrNumbers[i]; + + for (int j = i + 1; j < indexInfo->ii_NumIndexAttrs; j++) + { + TupleDesc tupdesc = RelationGetDescr(indexRel); + + if (an == indexInfo->ii_IndexAttrNumbers[j]) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("duplicated columns in vci index creation: %s", + NameStr(TupleDescAttr(tupdesc, an - 1)->attname)), + errhint("duplicated columns are specified"))); + } + } +} + +static void +CheckIndexColumnTypes(TupleDesc tupdesc, bool *isctid) +{ + *isctid = false; + + for (int i = 0; i < tupdesc->natts; i++) + { + Oid typeoid = TupleDescAttr(tupdesc, i)->atttypid; + + /* + * In general, the type 'tid' is not supported. However, 'ctid' column + * (that is exist in all tables) is accepted as a dummy column. In + * this case, the real columns should be registered in the + * 'vci_column_ids' option. + */ + if (!vci_is_supported_type(typeoid)) + { + if (strcmp(NameStr(TupleDescAttr(tupdesc, i)->attname), "ctid") != 0) + { + HeapTuple tuple; + Form_pg_type typetuple; + + tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for type %u", typeoid); + + typetuple = (Form_pg_type) GETSTRUCT(tuple); + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("data type %s is not supported for access method \"%s\"", + NameStr(typetuple->typname), VCI_STRING))); + + ReleaseSysCache(tuple); + } + else if (tupdesc->natts != 1) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use \"ctid\" column with other columns"))); + } + *isctid = true; + } + } +} + +/* TODO - is this function needed? */ +static void +CheckColumnReloptions(Relation indexRel, bool isctid) +{ + char *ids = NULL; + bool hasoption = false; + + if (hasoption) + ereport(DEBUG2, + (errmsg_internal("vci_column_ids: %s", ids))); + + if (isctid == hasoption) + return; + else if (isctid && !hasoption) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("The \"vci_column_ids\" option is required when \"ctid\" column is specified"))); + else if (!isctid && hasoption) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("The \"vci_column_ids\" option cannot be used without \"ctid\" column"))); + else + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("unrecognized state in vci_inner_build: isctid = %d, ids = %p", isctid, ids))); +} + +/* + * get_tuple_desc_for_build -- create TupleDesc for build. + * + * The VCI can be created by 2 interfaces. The first interface is the same to + * an ordinal index (Ex. CREATE INDEX idx ON table USING vci(c1, c2)). The + * second interface is by the original function 'vci_create' (SELECT vci_create + * ('idx', 'table', ARRAY['c1', 'c2'])). It generates such SQL as 'CREATE + * INDEX idx ON table USING vci(ctid) WITH (vci_column_ids = '1,2')'. The + * following codes distinguish this 2 cases. + * + * XXX - function vci_create is not implemented by this OSS patch, so this + * code may be able to be further simplified. + */ +static TupleDesc +get_tuple_desc_for_build(Relation heapRel, Relation indexRel, bool isctid) +{ + if (isctid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"ctid\" column is specified"))); + + return RelationGetDescr(indexRel); +} + +static void +vci_modify_column_information(bool isctid, Relation indexRel, Relation heapRel) +{ + if (!isctid) + return; + + /* + * XXX. The code which previously existed below here is now removed. It + * relied on vci_MakeDroppedColumnBitmap which asserted + * vci_IsExtendedToMoreThan32Columns, and that is no longer possible since + * "vci_create() function is not supported by this OSS patch. + */ +} + +static IndexBuildResult * +vci_inner_build(Relation heapRel, Relation indexRel, IndexInfo *indexInfo) +{ + IndexBuildResult *result; + Oid oid; + + vci_MainRelHeaderInfo *vmr_info; + + TupleDesc tupdesc; + bool isctid; + + /* for checking type after getting 'real' TupleDesc. */ + bool dummy_isctid; + + uint32 offsetToExtentInfo; + + double reltuples = -1; + + CheckIndexedRelationKind(heapRel); + CheckIndexInfo(indexInfo, indexRel); + CheckIndexColumnTypes(RelationGetDescr(indexRel), &isctid); + CheckColumnReloptions(indexRel, isctid); + + vci_modify_column_information(isctid, indexRel, heapRel); + + /* create VCI main relation */ + vmr_info = palloc0_object(vci_MainRelHeaderInfo); + vci_InitMainRelHeaderInfo(vmr_info, indexRel, vci_rc_wos_ros_conv_build); + + if (RelationGetNumberOfBlocks(indexRel) != 0) + elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(indexRel)); + + /* create blank page * VCI_NUM_MAIN_REL_HEADER_PAGES */ + vci_PreparePagesWithOneItemIfNecessary(indexRel, + lengthof(vmr_info->buffer) - 1); + + vci_KeepMainRelHeaderWithoutVersionCheck(vmr_info); + + /* write ROS format version */ + vci_SetMainRelVar(vmr_info, vcimrv_ros_version_major, 0, + VCI_ROS_VERSION_MAJOR); + vci_SetMainRelVar(vmr_info, vcimrv_ros_version_minor, 0, + VCI_ROS_VERSION_MINOR); + + /* create WOS relations */ + /* register WOS relation's OID to VCI Main relation */ + + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_DATA_WOS, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_WOS); + vci_SetMainRelVar(vmr_info, vcimrv_data_wos_oid, 0, oid); + + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_WHITEOUT_WOS, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_WOS); + vci_SetMainRelVar(vmr_info, vcimrv_whiteout_wos_oid, 0, oid); + + /* create ROS relations */ + + /* TID */ + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_tid_data_oid, 0, oid); + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_tid_meta_oid, 0, oid); + + /* NUll */ + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_NULL, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_null_data_oid, 0, oid); + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_NULL, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_null_meta_oid, 0, oid); + + /* Delete Vector */ + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_DELETE, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_delete_data_oid, 0, oid); + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_DELETE, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS); + vci_SetMainRelVar(vmr_info, vcimrv_delete_meta_oid, 0, oid); + + /* Column Stores */ + tupdesc = get_tuple_desc_for_build(heapRel, indexRel, isctid); + CheckIndexColumnTypes(tupdesc, &dummy_isctid); + + /* + * When using 'vci_create', PostgreSQL registers only a 'ctid' column as + * as a dependency object. So self registration is required in such case. + * + * Note: A tupdesc->attrs[i]->attnum doesn't point an attribute number of + * the heap but is a sequential number in the index. + */ + if (isctid) + { + ObjectAddress myself, + referenced; + TupleDesc heapTupleDesc; + ObjectAddresses *addrs; + + heapTupleDesc = RelationGetDescr(heapRel); + + addrs = new_object_addresses(); + + ObjectAddressSet(myself, RelationRelationId, RelationGetRelid(indexRel)); + ObjectAddressSet(referenced, RelationRelationId, RelationGetRelid(heapRel)); + + for (int i = 0; i < tupdesc->natts; i++) + { + referenced.objectSubId = vci_GetAttNum(heapTupleDesc, + NameStr(TupleDescAttr(tupdesc, i)->attname)); + + add_exact_object_address(&referenced, addrs); + } + + record_object_address_dependencies(&myself, addrs, DEPENDENCY_AUTO); + free_object_addresses(addrs); + } + + vci_SetMainRelVar(vmr_info, vcimrv_num_columns, 0, tupdesc->natts); + for (int i = 0; i < tupdesc->natts; i++) + { + Oid column_store_oid; + Oid column_meta_oid; + vcis_m_column_t *columnPointer; + + column_store_oid = vci_create_relation(GenRelName(indexRel, i, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_ROS); + column_meta_oid = vci_create_relation(GenRelName(indexRel, i, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_ROS); + + /* set ROS column pointer, */ + columnPointer = vci_GetMColumn(vmr_info, i); + + columnPointer->meta_oid = column_meta_oid; + columnPointer->data_oid = column_store_oid; + columnPointer->max_columns_size = vci_GetColumnWorstSize(TupleDescAttr(tupdesc, i)); + if (TupleDescAttr(tupdesc, i)->attlen == -1) + { + columnPointer->comp_type = vcis_compression_type_variable_raw; + } + else if (TupleDescAttr(tupdesc, i)->attlen > 0) + { + columnPointer->comp_type = vcis_compression_type_fixed_raw; + } + else + { + Assert(false); + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected attribute length"))); + } + /* put default extent(free_page) to each columns */ + } + vci_SetMainRelVar(vmr_info, vcimrv_num_nullable_columns, 0, + vci_GetNumberOfNullableColumn(tupdesc)); + vci_SetMainRelVar(vmr_info, vcimrv_null_width_in_byte, 0, + (vci_GetNumberOfNullableColumn(tupdesc) + BITS_PER_BYTE - 1) / + BITS_PER_BYTE); + + /* create TID-CRID relations */ + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID, VCI_RELTYPE_META), indexRel, indexInfo, VCI_RELTYPE_TIDCRID); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_meta_oid, 0, oid); + + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID, VCI_RELTYPE_DATA), indexRel, indexInfo, VCI_RELTYPE_TIDCRID); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_data_oid, 0, oid); + + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID_UPDATE, '0'), indexRel, indexInfo, VCI_RELTYPE_TIDCRID); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_update_oid_0, 0, oid); + + oid = vci_create_relation(GenRelName(indexRel, VCI_COLUMN_ID_TID_CRID_UPDATE, '1'), indexRel, indexInfo, VCI_RELTYPE_TIDCRID); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_update_oid_1, 0, oid); + + /* other variables */ + vci_SetMainRelVar(vmr_info, vcimrv_column_info_offset, 0, vcimrv_column_info - VCI_MIN_PAGE_HEADER); + + offsetToExtentInfo = (vci_MRVGetBlockNumber(vcimrv_extent_info) * VCI_MAX_PAGE_SPACE) + + vci_MRVGetOffset(vcimrv_extent_info) - VCI_MIN_PAGE_HEADER; + + vci_SetMainRelVar(vmr_info, vcimrv_extent_info_offset, 0, offsetToExtentInfo); + vci_SetMainRelVar(vmr_info, vcimrv_size_mr, 0, offsetToExtentInfo); + vci_SetMainRelVar(vmr_info, vcimrv_size_mr_old, 0, offsetToExtentInfo); + + vci_SetMainRelVar(vmr_info, vcimrv_current_ros_version, 0, FrozenTransactionId); + vci_SetMainRelVar(vmr_info, vcimrv_last_ros_version, 0, FrozenTransactionId); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_diff_sel, 0, 0); + vci_SetMainRelVar(vmr_info, vcimrv_tid_crid_diff_sel_old, 0, 0); + + vci_SetMainRelVar(vmr_info, vcimrv_xid_generation, 0, 1); /* xid generation starts + * from 1 */ + vci_SetMainRelVar(vmr_info, vcimrv_xid_gen_update_xid, 0, GetCurrentTransactionId()); + + vci_SetMainRelVar(vmr_info, vcimrv_ros_command, 0, vci_rc_invalid); + vci_SetMainRelVar(vmr_info, vcimrv_num_unterminated_copy_cmd, 0, 0); + + vci_SetMainRelVar(vmr_info, vcimrv_num_extents, 0, 0); + vci_SetMainRelVar(vmr_info, vcimrv_num_extents_old, 0, 0); + + /* flush */ + vci_WriteMainRelVar(vmr_info, vci_wmrv_all); + + /* initialize meta data relations and data relations */ + vci_InitializeColumnRelations(vmr_info, tupdesc, heapRel); + + /* initialize meta data relations and data relations */ + vci_InitializeTidCridUpdateLists(vmr_info); + vci_InitializeTidCridTree(vmr_info); + + /* unlock */ + vci_ReleaseMainRelHeader(vmr_info); + pfree(vmr_info); + + /* convert data in the relations */ + if (vcirc_truncate != vci_rebuild_command && + indexRel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED) + reltuples = vci_ConvertWos2RosForBuild(indexRel, + VciGuc.maintenance_work_mem * (Size) 1024, indexInfo); + + /* + * create statistics for return to caller + */ + result = palloc0_object(IndexBuildResult); + result->heap_tuples = reltuples; + result->index_tuples = -1; + + return result; +} + +/* + * Put or Copy page into INIT_FORK. + * If valid page is given, that page will be put into INIT_FORK. + * If invalid page (NULL pointer) is given, MAIN_FORK page will be copied. + */ +static void +vci_putInitPage(Oid oid, Page page, BlockNumber blkno) +{ + Relation rel; + Page pageCopyFrom; + Buffer buffer = InvalidBuffer; + + rel = relation_open(oid, AccessExclusiveLock); + + /* + * If there is no INIT_FORK, create it. VCI Main Relation may have, but + * others may not have. + */ + + if (!smgrexists(RelationGetSmgr(rel), INIT_FORKNUM)) + smgrcreate(RelationGetSmgr(rel), INIT_FORKNUM, false); + + pageCopyFrom = page; + + if (pageCopyFrom == NULL) + { + buffer = ReadBuffer(rel, blkno); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + pageCopyFrom = BufferGetPage(buffer); + } + + PageSetChecksumInplace(pageCopyFrom, blkno); + smgrwrite(RelationGetSmgr(rel), INIT_FORKNUM, blkno, + (char *) pageCopyFrom, true); + + if (XLogIsNeeded()) + log_newpage(&rel->rd_smgr->smgr_rlocator.locator, INIT_FORKNUM, + blkno, pageCopyFrom, false); + + smgrimmedsync(RelationGetSmgr(rel), INIT_FORKNUM); + + if (buffer != InvalidBuffer) + UnlockReleaseBuffer(buffer); + relation_close(rel, AccessExclusiveLock); +} + +static void +vci_inner_buildempty(Relation indexRel) +{ + Oid oid; + Page tmpPage; + TupleDesc itupDesc; + + IndexInfo *indexInfo; + + vci_MainRelHeaderInfo vmr_infoData; + vci_MainRelHeaderInfo *vmr_info = &vmr_infoData; + + Relation heapRel; + bool isctid; + + /* for checking type after getting 'real' TupleDesc. */ + bool dummy_isctid; + + CheckIndexColumnTypes(RelationGetDescr(indexRel), &isctid); + + /* create VCI main relation */ + vci_InitMainRelHeaderInfo(vmr_info, indexRel, vci_rc_wos_ros_conv_build); + vci_KeepMainRelHeader(vmr_info); + + /* + * WOS relation : a blank page is put again, because the ambuild data + * might been inserted in WOS. (it may be OK, WOS can be assumed heap + * relation.) + */ + + tmpPage = (Page) palloc(BLCKSZ); + PageInit(tmpPage, BLCKSZ, 0); + + oid = vci_GetMainRelVar(vmr_info, vcimrv_data_wos_oid, 0); + vci_putInitPage(oid, tmpPage, 0); + oid = vci_GetMainRelVar(vmr_info, vcimrv_whiteout_wos_oid, 0); + vci_putInitPage(oid, tmpPage, 0); + + pfree(tmpPage); + + oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_meta_oid, 0); + vci_putInitPage(oid, NULL, 0); + oid = vci_GetMainRelVar(vmr_info, vcimrv_null_meta_oid, 0); + vci_putInitPage(oid, NULL, 0); + oid = vci_GetMainRelVar(vmr_info, vcimrv_delete_meta_oid, 0); + vci_putInitPage(oid, NULL, 0); + + oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_data_oid, 0); + vci_putInitPage(oid, NULL, 0); + oid = vci_GetMainRelVar(vmr_info, vcimrv_null_data_oid, 0); + vci_putInitPage(oid, NULL, 0); + oid = vci_GetMainRelVar(vmr_info, vcimrv_delete_data_oid, 0); + vci_putInitPage(oid, NULL, 0); + + /* column store */ + heapRel = table_open(indexRel->rd_index->indrelid, AccessShareLock); + itupDesc = get_tuple_desc_for_build(heapRel, indexRel, isctid); + table_close(heapRel, AccessShareLock); + + CheckIndexColumnTypes(itupDesc, &dummy_isctid); + + for (int attn = 0; attn < itupDesc->natts; attn++) + { + /* get ROS column pointer, */ + vcis_m_column_t *columnPointer; + + columnPointer = vci_GetMColumn(vmr_info, attn); + + vci_putInitPage(columnPointer->meta_oid, NULL, 0); + vci_putInitPage(columnPointer->data_oid, NULL, 0); + } + + oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_crid_meta_oid, 0); + vci_putInitPage(oid, NULL, 0); + oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_crid_data_oid, 0); + vci_putInitPage(oid, NULL, 0); + oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_crid_update_oid_0, 0); + vci_putInitPage(oid, NULL, 0); + oid = vci_GetMainRelVar(vmr_info, vcimrv_tid_crid_update_oid_1, 0); + vci_putInitPage(oid, NULL, 0); + /* Copy default content into VCI Main rel INIT_FORK */ + oid = indexRel->rd_id; + for (BlockNumber blkno = 0; blkno < lengthof(vmr_info->buffer); blkno++) + { + vci_putInitPage(oid, NULL, blkno); + } + + vci_ReleaseMainRelHeader(vmr_info); + + if (vcirc_truncate != vci_rebuild_command) + { + /* extract index key information from the index's pg_index info */ + indexInfo = BuildIndexInfo(indexRel); + vci_ConvertWos2RosForBuild(indexRel, + VciGuc.maintenance_work_mem * (Size) 1024, indexInfo); + } +} + +/* LCOV_EXCL_START */ +void +vci_set_copy_transaction_and_command_id(TransactionId xid, CommandId cid) +{ + Assert(NULL == copyInfo.extentList); + Assert(0 == copyInfo.numAllocatedExtent); + copyInfo.xid = xid; + copyInfo.cid = cid; + copyInfo.numAppendedRows = 0; + copyInfo.extentList = NULL; + copyInfo.numFilledExtent = 0; + copyInfo.numAllocatedExtent = 0; +} + +/* LCOV_EXCL_STOP */ + +static bool +vci_inner_insert(Relation indexRel, ItemPointer heap_tid) +{ + TransactionId xid = GetCurrentTransactionId(); + TupleDesc tdesc; + HeapTuple htup; + int options = 0; + + Oid data_wos_oid; + Relation data_wos_rel; + + Datum new_values[2]; + bool new_isnull[2]; + + vci_MainRelHeaderInfo *vmr_info; + + /* get Data WOS relation from vci main rel */ + vmr_info = palloc0_object(vci_MainRelHeaderInfo); + vci_InitMainRelHeaderInfo(vmr_info, indexRel, vci_rc_wos_insert); + vci_KeepMainRelHeader(vmr_info); + data_wos_oid = (Oid) vci_GetMainRelVar(vmr_info, vcimrv_data_wos_oid, 0); + + data_wos_rel = table_open(data_wos_oid, RowExclusiveLock); + + /* get tuple desc */ + tdesc = RelationGetDescr(data_wos_rel); /* */ + + /* create new tuple for insert */ + new_values[0] = ItemPointerGetDatum(heap_tid); + new_values[1] = Int64GetDatum(vci_GenerateXid64(xid, vmr_info)); + new_isnull[0] = false; + new_isnull[1] = false; + htup = heap_form_tuple(tdesc, new_values, new_isnull); + + /* insert (+ WAL) */ + + if (copy_with_freeze_option) + options |= HEAP_INSERT_FROZEN; + + heap_insert(data_wos_rel, htup, GetCurrentCommandId(true), options, NULL); + + heap_freetuple(htup); + table_close(data_wos_rel, RowExclusiveLock); + + /* unlock */ + vci_ReleaseMainRelHeader(vmr_info); + + return false; +} + +/* LCOV_EXCL_START */ +static void +WriteOneExtentForCopy(Relation indexRel) +{ + const LOCKMODE lockmode = ShareUpdateExclusiveLock; + + LockRelation(indexRel, lockmode); + vci_InitMainRelHeaderInfo(&(copyConvContext.info), + indexRel, vci_rc_copy_command); + vci_KeepMainRelHeader(&(copyConvContext.info)); + /* obtain target extent ID */ + copyConvContext.extentId = vci_GetFreeExtentId(&(copyConvContext.info)); + if (copyInfo.numAllocatedExtent <= copyInfo.numFilledExtent) + { + copyInfo.numAllocatedExtent += EXTENT_LIST_UNIT_EXTENSION; + copyInfo.extentList = repalloc_array(copyInfo.extentList, uint32, copyInfo.numAllocatedExtent); + } + copyInfo.extentList[++(copyInfo.numFilledExtent)] = + copyConvContext.extentId; + + /* write one extent into ROS */ + vci_AddTidCridUpdateList(&(copyConvContext.info), + &(copyConvContext.storage), + copyConvContext.extentId); + vci_WriteOneExtent(&(copyConvContext.info), + &(copyConvContext.storage), + copyConvContext.extentId, + InvalidTransactionId, + copyConvContext.xid, + copyConvContext.xid); + /* write header of the main relation */ + vci_WriteMainRelVar(&(copyConvContext.info), + vci_wmrv_update); + UnlockRelation(indexRel, lockmode); + vci_ReleaseMainRelInCommandContext(©ConvContext); + + vci_ResetRosChunkStorage(&(copyConvContext.storage)); +} + +static bool +vci_inner_insert_in_copy(Relation indexRel, ItemPointer heap_tid) +{ + vci_MainRelHeaderInfo *vmr_info = &(copyConvContext.info); + + if (0 == copyInfo.numAppendedRows) + { + uint32 val; + + vci_InitRosCommandContext0(©ConvContext, indexRel, + vci_rc_copy_command); + vci_RecoverOneVCIIfNecessary(vmr_info); + + vci_InitRosCommandContext1(©ConvContext, + VciGuc.maintenance_work_mem * INT64CONST(1024), + VCI_NUM_ROWS_IN_EXTENT, 0, + false); + vci_ResetRosChunkStorage(&(copyConvContext.storage)); + + vci_WriteExtentInfoInMainRosForCopyInit(vmr_info, + copyConvContext.extentId, + copyConvContext.xid); + + /* increment number of copy commands */ + val = vci_GetMainRelVar(vmr_info, vcimrv_num_unterminated_copy_cmd, 0); + ++val; + vci_SetMainRelVar(vmr_info, vcimrv_num_unterminated_copy_cmd, 0, val); + + vci_SetMainRelVar(vmr_info, vcimrv_ros_command, 0, vci_rc_copy_command); + + /* flush */ + vci_WriteMainRelVar(vmr_info, vci_wmrv_update); + + /* unlock */ + vci_ReleaseMainRelInCommandContext(©ConvContext); + + /* close heap relation */ + vci_CloseHeapRelInCommandContext(©ConvContext); + } + + { + Relation rel = table_open(copyConvContext.heapOid, AccessShareLock); + Buffer buffer = ReadBuffer(rel, ItemPointerGetBlockNumber(heap_tid)); + Page page = BufferGetPage(buffer); + ItemId lp = PageGetItemId(page, ItemPointerGetOffsetNumber(heap_tid)); + HeapTupleData tupleData; + HeapTuple tuple = &tupleData; + + Assert(ItemIdIsNormal(lp)); + + tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); + tuple->t_len = ItemIdGetLength(lp); + tuple->t_tableOid = RelationGetRelid(rel); + tuple->t_self = *heap_tid; + + vci_FillOneRowInRosChunkBuffer(&(copyConvContext.buffer), + &(copyConvContext.info), + &tuple->t_self, + tuple, + copyConvContext.indxColumnIdList, + copyConvContext.heapAttrNumList, + vci_GetTupleDescr(vmr_info)); + + if (copyConvContext.buffer.numRowsAtOnce <= copyConvContext.buffer.numFilled) + vci_RegisterChunkBuffer(&(copyConvContext.storage), + &(copyConvContext.buffer)); + + if (copyConvContext.numRowsToConvert <= copyConvContext.storage.numTotalRows) + { + Assert(copyConvContext.numRowsToConvert == copyConvContext.storage.numTotalRows); + WriteOneExtentForCopy(indexRel); + } + + table_close(rel, AccessShareLock); + } + + return false; +} + +void +vci_FinalizeCopyCommand(void) +{ + if (0 < copyConvContext.storage.numTotalRows) + { + Relation rel = table_open(copyConvContext.indexOid, RowExclusiveLock); + + WriteOneExtentForCopy(rel); + table_close(rel, RowExclusiveLock); + } + + vci_FinRosCommandContext(©ConvContext, false); +} + +static IndexBulkDeleteResult * +vci_inner_vacuumcleanup(IndexVacuumInfo *info, + IndexBulkDeleteResult *stats) +{ + elog(DEBUG2, "%s is called.", __func__); + + LockRelation(info->index, ShareUpdateExclusiveLock); + + vci_VacuumRos(info->index, info); + + UnlockRelation(info->index, ShareUpdateExclusiveLock); + + return NULL; +} + +/* LCOV_EXCL_STOP */ + +/** + * vci_add_index_delete + */ +void +vci_add_index_delete(Relation heapRel, const ItemPointerData *heap_tid, TransactionId xmin) +{ + List *indexoidlist; + ListCell *l; + + /* Fast path if definitely no indexes */ + if (!RelationGetForm(heapRel)->relhasindex) + return; + + /* + * Get cached list of index OIDs + */ + indexoidlist = RelationGetIndexList(heapRel); + + /* Iterate for indexes */ + foreach(l, indexoidlist) + { + Oid indexOid = lfirst_oid(l); + Relation indexRel; + + Oid whiteoutWosOid; + Relation whiteoutWOSRel; + Datum new_values[2]; + bool new_isnull[2]; + HeapTuple htup; + TupleDesc tdesc; + + vci_MainRelHeaderInfo vmr_info_data; + vci_MainRelHeaderInfo *vmr_info = &vmr_info_data; + + TransactionId xid; + + /* Skip if Index is NOT VCI index */ + indexRel = index_open(indexOid, RowExclusiveLock); + if (!isVciIndexRelation(indexRel)) + { + index_close(indexRel, RowExclusiveLock); + continue; + } + + if (!fullPageWrites) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not work under full_page_writes=off", VCI_STRING), + errhint("Use DROP INDEX \"%s\"", RelationGetRelationName(indexRel)))); + + vci_InitMainRelHeaderInfo(vmr_info, indexRel, vci_rc_wos_delete); + vci_KeepMainRelHeader(vmr_info); + + /* Open Whiteout WOS */ + whiteoutWosOid = (Oid) vci_GetMainRelVar(vmr_info, vcimrv_whiteout_wos_oid, 0); + whiteoutWOSRel = table_open(whiteoutWosOid, RowExclusiveLock); + + tdesc = RelationGetDescr(whiteoutWOSRel); + + /* @see generateXidDiff() in vci_ros_command.c */ + if (!TransactionIdEquals(xmin, FrozenTransactionId)) + xid = xmin; + else + xid = GetCurrentTransactionId(); + + /* create new tuple for insert */ + new_values[0] = ItemPointerGetDatum(heap_tid); + new_values[1] = Int64GetDatum(vci_GenerateXid64(xid, vmr_info)); + new_isnull[0] = false; + new_isnull[1] = false; + + htup = heap_form_tuple(tdesc, new_values, new_isnull); + + /* insert TID into Whiteout WOS */ + simple_heap_insert(whiteoutWOSRel, htup); + heap_freetuple(htup); + table_close(whiteoutWOSRel, RowExclusiveLock); + + /* flush & unlock */ + vci_ReleaseMainRelHeader(vmr_info); + + index_close(indexRel, RowExclusiveLock); + } + + list_free(indexoidlist); +} + +List * +vci_add_should_index_insert(ResultRelInfo *resultRelInfo, + TupleTableSlot *slot, + ItemPointer tupleid, + EState *estate) +{ + int numIndices; + RelationPtr relationDescs; + Relation heapRelation; + IndexInfo **indexInfoArray; + ExprContext *econtext; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + + /* + * Get information from the result relation info structure. + */ + numIndices = resultRelInfo->ri_NumIndices; + relationDescs = resultRelInfo->ri_IndexRelationDescs; + indexInfoArray = resultRelInfo->ri_IndexRelationInfo; + heapRelation = resultRelInfo->ri_RelationDesc; + + /* + * We will use the EState's per-tuple context for evaluating predicates + * and index expressions (creating it if it's not already there). + */ + econtext = GetPerTupleExprContext(estate); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* + * for each index, form and insert the index tuple + */ + for (int i = 0; i < numIndices; i++) + { + Relation indexRelation = relationDescs[i]; + IndexInfo *indexInfo; + + if (indexRelation == NULL) + continue; + + /* Skip if Index is NOT VCI index */ + if (!isVciIndexRelation(indexRelation)) + continue; + + indexInfo = indexInfoArray[i]; + + /* If the index is marked as read-only, ignore it */ + if (!indexInfo->ii_ReadyForInserts) + continue; + + /* Check for partial index */ + if (indexInfo->ii_Predicate != NIL) + { + ExprState *predicate; + + /* + * If predicate state not set up yet, create it (in the estate's + * per-query context) + */ + predicate = indexInfo->ii_PredicateState; + if (predicate == NULL) + { + predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); + indexInfo->ii_PredicateState = predicate; + } + + /* Skip this index-update if the predicate isn't satisfied */ + if (!ExecQual(predicate, econtext)) + continue; + } + + /* + * FormIndexDatum fills in its values and isnull parameters with the + * appropriate values for the column(s) of the index. + */ + FormIndexDatum(indexInfo, + slot, + estate, + values, + isnull); + + index_insert(indexRelation, /* index relation */ + values, /* array of index Datums */ + isnull, /* null flags */ + tupleid, /* tid of heap tuple */ + heapRelation, /* heap relation */ + UNIQUE_CHECK_NO, /* it is ignored in VCI */ + false, /* 'logically unchanged index' hint */ + indexInfo); /* index AM may need this */ + } + + return NIL; +} + +static bool +vci_add_drop_column(const ObjectAddress *object, int flags) +{ + Relation tableRel; + + if (vci_rebuild_command != vcirc_alter_table) + return false; + + Assert(object->objectSubId != 0); + + /* + * If object->objectSubId < 0, it means that the column is a system + * column. Such case occurs only when OID column is modified, but this is + * checked in other places. So simply skip in this place. + */ + if (object->objectSubId < 0) + return false; + + tableRel = relation_open(object->objectId, AccessExclusiveLock); + + if (tableRel->rd_rel->relkind != RELKIND_RELATION) + { + relation_close(tableRel, AccessExclusiveLock); + return false; + } + + relation_close(tableRel, AccessExclusiveLock); + + return false; +} + +bool +vci_add_drop_relation(const ObjectAddress *object, int flags) +{ + Relation rel; + Oid ruleId; + Oid oid = object->objectId; + char relKind = get_rel_relkind(oid); + bool concurrent = ((flags & PERFORM_DELETION_CONCURRENTLY) + == PERFORM_DELETION_CONCURRENTLY); + bool concurrent_lock_mode = ((flags & PERFORM_DELETION_CONCURRENT_LOCK) != 0); + vci_id_t vciid; + + if (object->objectSubId != 0) + return vci_add_drop_column(object, flags); + + if (relKind == RELKIND_INDEX) + { + rel = relation_open(oid, AccessExclusiveLock); + + if (!isVciIndexRelation(rel)) + { + relation_close(rel, NoLock); + return false; + } + relation_close(rel, NoLock); + + /* + * Deletion of VCI index by ALTER TABLE command is not supported + * + * Ereport only if the relation is vci main relation so that it does + * not give unnecessary messages. + * + * Return true when so that the post-processing does not continue. + */ + if (vci_rebuild_command == vcirc_alter_table) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter table because the table is indexed by VCI"), + errhint("You must drop index \"%s\" before using this command.", RelationGetRelationName(rel)))); + } + + if (concurrent) + elog(PANIC, "should not reach here"); + + index_drop(oid, concurrent, concurrent_lock_mode); + + vciid.oid = oid; + vciid.dbid = MyDatabaseId; + vci_freeMemoryEntry(&vciid); + } + else + { + rel = relation_open(oid, AccessExclusiveLock); + + if (!vci_isVciAdditionalRelation(rel)) + { + relation_close(rel, NoLock); + return false; + } + + /* + * Deletion of VCI index by ALTER TABLE command is not supported + * + * Ereport only if the relation is vci main relation so that it does + * not give unneccesary messages. + * + * Return true when so that the post-processing does not continue. + */ + if (vci_rebuild_command == vcirc_alter_table) + { + relation_close(rel, NoLock); + return true; + } + + if (concurrent) + elog(PANIC, "should not reach here"); + + /* 2.1 Is relation used? */ + CheckTableNotInUse(rel, "DROP TABLE"); + CheckTableForSerializableConflictIn(rel); + + ruleId = get_rewrite_oid(oid, rel->rd_rel->relname.data, true); + + /* 2.2 Drop relation storage */ + RelationDropStorage(rel); + + relation_close(rel, NoLock); + remove_on_commit_action(oid); + + /* 2.3 release relation cache */ + RelationForgetRelation(oid); + + /* 2.4 remove statistic info */ + RemoveStatistics(oid, 0); + + /* 2.5 remove pg_rewrite entry */ + if (OidIsValid(ruleId)) + RemoveRewriteRuleById(ruleId); + + /* 2.6 remove pg_attributes entry */ + DeleteAttributeTuples(oid); + + /* 2.7 remove pg_system entry */ + DeleteRelationTuple(oid); + + } + + return true; +} + +bool +vci_add_reindex_index(Relation indexRel) +{ + bool continue_after_return; + + /* if it is not VCI relation */ + if (!isVciIndexRelation(indexRel)) + continue_after_return = true; + + /* it is the VCI indexed relation */ + else + { + switch (vci_rebuild_command) + { + case vcirc_reindex: + /* called by the command REINDEX except REINDEX INDEX */ + continue_after_return = false; + break; + + case vcirc_alter_table: + + /* + * alter table for columns indexed by vci index, it is not + * work + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter table because the table is indexed by VCI"), + errhint("You must drop index \"%s\" before using this command.", RelationGetRelationName(indexRel)))); + /* remaining work of reindex_index() must be cancelled */ + continue_after_return = false; + break; + + case vcirc_truncate: + + /* + * This is reindex_index called in truncation Command. In this + * case, before RelationSetNewRelfilenumber(indexRel,...) we + * must drop other relations for VCI. + */ + /* vci_add_drop_index(indexRel->rd_id); */ + continue_after_return = true; + break; + + case vcirc_cluster: + case vcirc_vacuum_full: + /* called by the command CLUSTER or VACUUM FULL */ + continue_after_return = true; + break; + + default: + elog(ERROR, "unexpected vci_RebuildCommand"); + break; + } + } + + return continue_after_return; +} + +bool +vci_add_skip_vci_index(Relation indexRel) +{ + return isVciIndexRelation(indexRel); +} + +bool +vci_add_alter_tablespace(Relation indexRel) +{ + if (isVciIndexRelation(indexRel)) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ALTER INDEX SET TABLESPACE is not supported for VCI"), + errhint("DROP INDEX and CREATE INDEX instead"))); + return true; + } + else + return false; +} + +static uint32 +GetNumberOfBlocksFromOid(Oid oid) +{ + uint32 result; + Relation rel = relation_open(oid, AccessShareLock); + + result = RelationGetNumberOfBlocks(rel); + relation_close(rel, AccessShareLock); + + return result; +} + +static int64 +GetNumDBPagesOfVCIElement(vcis_attribute_type_t attrType, + int index, + vci_MainRelHeaderInfo *info) +{ +#ifdef USE_ASSERT_CHECKING + int numColumns = vci_GetMainRelVar(info, vcimrv_num_columns, 0); +#endif /* #ifdef USE_ASSERT_CHECKING */ + Oid dataOid = InvalidOid; + Oid metaOid = InvalidOid; + int64 result = 0; + + Assert((0 <= attrType) && (attrType < num_vcis_attribute_type)); + Assert((0 <= index) && (index < vci_GetNumIndexForAttributeType(attrType, numColumns))); + switch (attrType) + { + case vcis_attribute_type_main: + return RelationGetNumberOfBlocks(info->rel); + case vcis_attribute_type_data_wos: + dataOid = vci_GetMainRelVar(info, vcimrv_data_wos_oid, 0); + break; + case vcis_attribute_type_whiteout_wos: + dataOid = vci_GetMainRelVar(info, vcimrv_whiteout_wos_oid, 0); + break; + case vcis_attribute_type_tid_crid: + dataOid = vci_GetMainRelVar(info, vcimrv_tid_crid_data_oid, 0); + metaOid = vci_GetMainRelVar(info, vcimrv_tid_crid_meta_oid, 0); + break; + case vcis_attribute_type_tid_crid_update: + dataOid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, index); + break; + case vcis_attribute_type_delete_vec: + dataOid = vci_GetMainRelVar(info, vcimrv_delete_data_oid, 0); + metaOid = vci_GetMainRelVar(info, vcimrv_delete_meta_oid, 0); + break; + case vcis_attribute_type_null_vec: + dataOid = vci_GetMainRelVar(info, vcimrv_null_data_oid, 0); + metaOid = vci_GetMainRelVar(info, vcimrv_null_meta_oid, 0); + break; + case vcis_attribute_type_tid: + dataOid = vci_GetMainRelVar(info, vcimrv_tid_data_oid, 0); + metaOid = vci_GetMainRelVar(info, vcimrv_tid_meta_oid, 0); + break; + case vcis_attribute_type_pgsql: + { + vcis_m_column_t *mColumn; + + mColumn = vci_GetMColumn(info, index); + dataOid = mColumn->data_oid; + metaOid = mColumn->meta_oid; + break; + } + default: + elog(ERROR, "internal error. invalid attribute type"); + } + + if (OidIsValid(dataOid)) + result += GetNumberOfBlocksFromOid(dataOid); + if (OidIsValid(metaOid)) + result += GetNumberOfBlocksFromOid(metaOid); + + return result; +} + +PG_FUNCTION_INFO_V1(vci_index_size); +Datum +vci_index_size(PG_FUNCTION_ARGS) +{ + Relation rel; + uint32 numColumns; + uint32 numEntries; + int64 result = 0; + vci_MainRelHeaderInfo infoData; + vci_MainRelHeaderInfo *info = &infoData; + LOCKMODE lockmode = AccessShareLock; + + text *relname = PG_GETARG_TEXT_P(0); + + if (PG_NARGS() != 1) + ereport(ERROR, + (errmsg("vci_index_size requires 1 argument"))); + + { + RangeVar *relrv; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, lockmode); + if (!isVciIndexRelation(rel)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("only VCI index is supported"))); + } + + vci_InitMainRelHeaderInfo(info, rel, vci_rc_probe); + vci_KeepMainRelHeader(info); + numColumns = vci_GetMainRelVar(info, vcimrv_num_columns, 0); + numEntries = vci_GetSumOfAttributeIndices(numColumns); + + for (uint32 aId = 0; aId < numEntries; ++aId) + { + vcis_attribute_type_t attrType; + int index; + + vci_GetAttrTypeAndIndexFromSumOfIndices(&attrType, + &index, + numColumns, + aId); + result += GetNumDBPagesOfVCIElement(attrType, index, info); + } + + vci_ReleaseMainRelHeader(info); + relation_close(rel, lockmode); + + PG_RETURN_INT64(result * BLCKSZ); +} + +/* + * Process Utility Hook + */ + +void +vci_process_utility(PlannedStmt *pstmt, + const char *queryString, + bool readOnlyTree, + ProcessUtilityContext context, + ParamListInfo params, + QueryEnvironment *queryEnv, + DestReceiver *dest, + QueryCompletion *qc) +{ + bool creating_vci_extension = false; + volatile bool saved_vci_is_in_vci_create_extension; + + Node *parseTree = pstmt->utilityStmt; + + vci_check_prohibited_operation(parseTree, &creating_vci_extension); + + saved_vci_is_in_vci_create_extension = vci_is_in_vci_create_extension; + + if (creating_vci_extension) + vci_is_in_vci_create_extension = true; + + vci_rebuild_command = vcirc_invalid; + copy_with_freeze_option = false; + +#define UNUSE_COPY_INSERT + + switch (nodeTag(parseTree)) + { + /* check if the statement is a "COPY table FROM ..." statement */ + case T_CopyStmt: + { + CopyStmt *stmt; + ListCell *lc; + +#ifndef UNUSE_COPY_INSERT + TransactionId xid = GetCurrentTransactionId(); + CommandId cid = GetCurrentCommandId(false); + + Assert(TransactionIdIsValid(xid)); + Assert(InvalidCommandId != cid); + vci_set_copy_transaction_and_command_id(xid, cid); +#endif /* #ifndef UNUSE_COPY_INSERT */ + + stmt = (CopyStmt *) parseTree; + + foreach(lc, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + if (strcmp(defel->defname, "freeze") == 0) + { + if (defGetBoolean(defel)) + { + copy_with_freeze_option = true; + break; + } + } + } + } + break; + + /* check if the statement is a TRUNCATE for VCI Indexed table */ + case T_TruncateStmt: + vci_rebuild_command = vcirc_truncate; + break; + + /* check if the statement is a REINDEX for VCI Indexed table */ + case T_ReindexStmt: + vci_rebuild_command = vcirc_reindex; + break; + + /* check if the statement is a REINDEX for VCI Indexed table */ + case T_AlterTableStmt: + vci_rebuild_command = vcirc_alter_table; + break; + + /* check if the statement is a VACUUM for VCI Indexed table */ + case T_VacuumStmt: + vci_rebuild_command = vcirc_vacuum_full; + break; + + /* check if the statement is a CLUSTER for VCI Indexed table */ + case T_ClusterStmt: + vci_rebuild_command = vcirc_cluster; + break; + + default: + break; + } + + if (creating_vci_extension) + { + PG_TRY(); + { + if (process_utility_prev != NULL) + process_utility_prev(pstmt, queryString, readOnlyTree, + context, params, queryEnv, + dest, qc); + else + standard_ProcessUtility(pstmt, queryString, readOnlyTree, + context, params, queryEnv, + dest, qc); + } + PG_CATCH(); + { + vci_is_in_vci_create_extension = saved_vci_is_in_vci_create_extension; + + PG_RE_THROW(); + } + PG_END_TRY(); + } + else + { + if (process_utility_prev != NULL) + process_utility_prev(pstmt, queryString, readOnlyTree, + context, params, queryEnv, + dest, qc); + else + standard_ProcessUtility(pstmt, queryString, readOnlyTree, + context, params, queryEnv, + dest, qc); + } + + vci_rebuild_command = vcirc_invalid; + + vci_is_in_vci_create_extension = saved_vci_is_in_vci_create_extension; + +#ifndef UNUSE_COPY_INSERT + /* check if the statement is a "COPY table FROM ..." statement */ + if (nodeTag(parseTree) == T_CopyStmt) + vci_FinalizeCopyCommand(); +#endif /* #ifndef UNUSE_COPY_INSERT */ +} + +/* + * VCI handler function: return IndexAmRoutine with access method parameters + * and callbacks. + */ +PG_FUNCTION_INFO_V1(vci_handler); + +Datum +vci_handler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + amroutine->amstrategies = 1; + amroutine->amsupport = 0; + amroutine->amoptsprocnum = 0; + amroutine->amcanorder = false; + amroutine->amcanorderbyop = false; + amroutine->amcanhash = false; + amroutine->amconsistentequality = false; + amroutine->amconsistentordering = false; + amroutine->amcanbackward = false; + amroutine->amcanunique = false; + amroutine->amcanmulticol = true; + amroutine->amoptionalkey = false; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = false; + amroutine->amstorage = false; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amcanparallel = false; + amroutine->amcanbuildparallel = false; + amroutine->amcaninclude = false; + amroutine->amusemaintenanceworkmem = false; + amroutine->amsummarizing = false; + amroutine->amparallelvacuumoptions = VACUUM_OPTION_NO_PARALLEL; + amroutine->amkeytype = InvalidOid; + + amroutine->ambuild = vci_build; + amroutine->ambuildempty = vci_buildempty; + amroutine->aminsert = vci_insert; + amroutine->aminsertcleanup = NULL; + amroutine->ambulkdelete = vci_bulkdelete; + amroutine->amvacuumcleanup = vci_vacuumcleanup; + amroutine->amcanreturn = NULL; + amroutine->amcostestimate = vci_costestimate; + amroutine->amgettreeheight = vci_gettreeheight; + amroutine->amoptions = vci_options; + amroutine->amproperty = NULL; + amroutine->ambuildphasename = NULL; + amroutine->amvalidate = vci_validate; + amroutine->amadjustmembers = NULL; + amroutine->ambeginscan = vci_beginscan; + amroutine->amrescan = vci_rescan; + amroutine->amgettuple = NULL; + amroutine->amgetbitmap = NULL; + amroutine->amendscan = vci_endscan; + amroutine->ammarkpos = vci_markpos; + amroutine->amrestrpos = vci_restrpos; + + amroutine->amestimateparallelscan = NULL; + amroutine->aminitparallelscan = NULL; + amroutine->amparallelrescan = NULL; + + amroutine->amtranslatestrategy = NULL; + amroutine->amtranslatecmptype = NULL; + + PG_RETURN_POINTER(amroutine); +} diff --git a/contrib/vci/storage/vci_internal_view.c b/contrib/vci/storage/vci_internal_view.c new file mode 100644 index 0000000..d5422d7 --- /dev/null +++ b/contrib/vci/storage/vci_internal_view.c @@ -0,0 +1,663 @@ +/*------------------------------------------------------------------------- + * + * vci_internal_view.c + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/storage/vci_internal_view.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup.h" +#include "access/htup_details.h" +#include "access/skey.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" /* for RangeVarGetRelid */ +#include "catalog/pg_am.h" +#include "catalog/pg_class.h" +#include "catalog/pg_depend.h" +#include "catalog/pg_index.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_namespace.h" +#include "commands/tablecmds.h" +#include "commands/defrem.h" +#include "nodes/nodes.h" +#include "nodes/parsenodes.h" +#include "nodes/primnodes.h" +#include "storage/lock.h" +#include "utils/acl.h" +#include "utils/fmgroids.h" /* for F_OIDEQ */ +#include "utils/rel.h" +#include "utils/relcache.h" +#include "utils/syscache.h" + +#include "vci.h" + +#include "vci_ros.h" + +bool vci_is_in_vci_create_extension; + +static List *make_dependent_view_list(Oid relOid); +static void change_owner_or_schema_of_internal_view_list(List *internal_view_oid_list, Oid newOid, bool is_owner); +static void check_prohibited_operation_for_extension(const char *extname); +static void check_prohibited_operation_for_access_method(const char *amname); +static void check_prohibited_operation_for_range_var(RangeVar *rel); +static void check_prohibited_operation_for_object(ObjectType objtype, Node *object); +static void check_prohibited_operation_for_relation(Relation rel); +static bool is_vci_access_method(Oid accessMethodObjectId); + +void +vci_alter_table_change_owner(Oid relOid, char relKind, Oid newOwnerId) +{ + List *view_oid_list = NIL; + + if (relKind != RELKIND_INDEX) + return; + + view_oid_list = make_dependent_view_list(relOid); + + if (view_oid_list == NIL) + return; + + change_owner_or_schema_of_internal_view_list(view_oid_list, newOwnerId, true); + + list_free(view_oid_list); +} + +void +vci_alter_table_change_schema(Oid relOid, char relKind, Oid newNspOid) +{ + List *view_oid_list = NIL; + + if (relKind != RELKIND_INDEX) + return; + + view_oid_list = make_dependent_view_list(relOid); + + if (view_oid_list == NIL) + return; + + change_owner_or_schema_of_internal_view_list(view_oid_list, newNspOid, false); + + list_free(view_oid_list); +} + +static List * +make_dependent_view_list(Oid relOid) +{ + Relation depRel; + ScanKeyData key[2]; + SysScanDesc depScan; + HeapTuple depTup; + List *view_oid_list = NIL; + + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relOid)); + + depScan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid(depTup = systable_getnext(depScan))) + { + Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup); + + Assert(pg_depend->refclassid == RelationRelationId); + Assert(pg_depend->refobjid == relOid); + + /* Ignore dependees that aren't user columns of relations */ + /* (we assume system columns are never of rowtypes) */ + if (pg_depend->classid != RelationRelationId || + pg_depend->refobjsubid != 0) + continue; + + view_oid_list = lappend_oid(view_oid_list, pg_depend->objid); + } + + systable_endscan(depScan); + + relation_close(depRel, AccessShareLock); + + return view_oid_list; +} + +static void +change_owner_or_schema_of_internal_view_list(List *view_oid_list, Oid newOid, bool is_owner) +{ + ListCell *lc; + + foreach(lc, view_oid_list) + { + Oid childRelOid = lfirst_oid(lc); + Relation class_rel; + HeapTuple tuple; + Form_pg_class tuple_class; + + class_rel = table_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(childRelOid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", childRelOid); + + tuple_class = (Form_pg_class) GETSTRUCT(tuple); + + if (vci_isVciAdditionalRelationTuple(childRelOid, tuple_class)) + { + Datum repl_val[Natts_pg_class]; + bool repl_null[Natts_pg_class]; + bool repl_repl[Natts_pg_class]; + Acl *newAcl; + Datum aclDatum; + bool isNull; + HeapTuple newtuple; + + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + if (is_owner) + { + repl_repl[Anum_pg_class_relowner - 1] = true; + repl_val[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(newOid); + + aclDatum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relacl, + &isNull); + if (!isNull) + { + newAcl = aclnewowner(DatumGetAclP(aclDatum), + tuple_class->relowner, newOid); + repl_repl[Anum_pg_class_relacl - 1] = true; + repl_val[Anum_pg_class_relacl - 1] = PointerGetDatum(newAcl); + } + } + else + { + repl_repl[Anum_pg_class_relnamespace - 1] = true; + repl_val[Anum_pg_class_relnamespace - 1] = ObjectIdGetDatum(newOid); + } + + newtuple = heap_modify_tuple(tuple, RelationGetDescr(class_rel), repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(class_rel, &newtuple->t_self, newtuple); + + heap_freetuple(newtuple); + } + + ReleaseSysCache(tuple); + table_close(class_rel, RowExclusiveLock); + } +} + +void +vci_check_prohibited_operation(Node *parseTree, bool *creating_vci_extension) +{ + switch (nodeTag(parseTree)) + { + case T_CreateExtensionStmt: + { + CreateExtensionStmt *stmt = (CreateExtensionStmt *) parseTree; + + if (strcmp(stmt->extname, VCI_STRING) == 0) + { + ListCell *lc; + + foreach(lc, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + if (strcmp(defel->defname, "schema") == 0 + && get_namespace_oid(defGetString(defel), false) != PG_PUBLIC_NAMESPACE) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("extension \"%s\" cannot specify a schema name", VCI_STRING))); + } + } + + *creating_vci_extension = true; + } + } + break; + + case T_AlterExtensionStmt: + check_prohibited_operation_for_extension(((AlterExtensionStmt *) parseTree)->extname); + break; + + case T_AlterExtensionContentsStmt: + check_prohibited_operation_for_extension(((AlterExtensionContentsStmt *) parseTree)->extname); + break; + + case T_ViewStmt: /* CREATE (OR REPLACE) VIEW */ + check_prohibited_operation_for_range_var(((ViewStmt *) parseTree)->view); + break; + + case T_AlterTableStmt: /* ALTER VIEW */ + check_prohibited_operation_for_range_var(((AlterTableStmt *) parseTree)->relation); + break; + + case T_RuleStmt: /* CREATE RULE */ + check_prohibited_operation_for_range_var(((RuleStmt *) parseTree)->relation); + break; + + case T_CreateTrigStmt: /* CREATE TRIGGER */ + check_prohibited_operation_for_range_var(((CreateTrigStmt *) parseTree)->relation); + break; + + case T_GrantStmt: + { + GrantStmt *stmt = (GrantStmt *) parseTree; + + if ((stmt->targtype == ACL_TARGET_OBJECT) && (stmt->objtype == OBJECT_TABLE)) + { + ListCell *lc; + + foreach(lc, stmt->objects) + check_prohibited_operation_for_range_var((RangeVar *) lfirst(lc)); + } + } + break; + + case T_GrantRoleStmt: + break; + + case T_CreateOpClassStmt: + if (!vci_is_in_vci_create_extension) + check_prohibited_operation_for_access_method(((CreateOpClassStmt *) parseTree)->amname); + break; + + case T_CreateOpFamilyStmt: + if (!vci_is_in_vci_create_extension) + check_prohibited_operation_for_access_method(((CreateOpFamilyStmt *) parseTree)->amname); + break; + + case T_AlterOpFamilyStmt: + if (!vci_is_in_vci_create_extension) + check_prohibited_operation_for_access_method(((AlterOpFamilyStmt *) parseTree)->amname); + break; + + case T_ReindexStmt: + { + ReindexStmt *stmt = (ReindexStmt *) parseTree; + Relation rel; + + if (stmt->kind != REINDEX_OBJECT_INDEX) + break; + + rel = relation_openrv_extended(stmt->relation, AccessShareLock, true); + + if (rel == NULL) + break; + + if (isVciIndexRelation(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("REINDEX is not supported for VCI"), + errhint("DROP INDEX and CREATE INDEX instead"))); + + relation_close(rel, AccessShareLock); + } + break; + + case T_ClusterStmt: + { + ClusterStmt *stmt = (ClusterStmt *) parseTree; + Relation rel; + + /* + * Do nothing, if CLUSTER command issued without relation + * name. As this command will only cluster previously + * clustered tables, VCI indexed tables will not be clustered + * anyways + */ + if (stmt->relation == NULL) + break; + + rel = relation_openrv_extended(stmt->relation, AccessShareLock, true); + + if (rel == NULL) + break; + + if (RelationGetForm(rel)->relhasindex) + { + List *indexoidlist; + ListCell *lc; + + indexoidlist = RelationGetIndexList(rel); + + foreach(lc, indexoidlist) + { + Oid indexOid = lfirst_oid(lc); + Relation indexRel; + + indexRel = index_open(indexOid, AccessShareLock); + + if (isVciIndexRelation(indexRel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot cluster tables including %s index(es)", VCI_STRING), + errhint("Use DROP INDEX %s first", RelationGetRelationName(indexRel)))); + + index_close(indexRel, AccessShareLock); + } + } + + relation_close(rel, AccessShareLock); + } + break; + + case T_CommentStmt: /* COMMENT */ + { + CommentStmt *stmt = (CommentStmt *) parseTree; + + if (stmt->objtype == OBJECT_MATVIEW) + check_prohibited_operation_for_object(stmt->objtype, stmt->object); + } + break; + + case T_SecLabelStmt: /* SECURITY LABEL */ + { + SecLabelStmt *stmt = (SecLabelStmt *) parseTree; + + if (stmt->objtype == OBJECT_MATVIEW) + check_prohibited_operation_for_object(stmt->objtype, stmt->object); + } + break; + + case T_RenameStmt: + { + RenameStmt *stmt = (RenameStmt *) parseTree; + + switch (stmt->renameType) + { + case OBJECT_MATVIEW: + check_prohibited_operation_for_range_var(stmt->relation); + break; + + case OBJECT_OPCLASS: + case OBJECT_OPFAMILY: + check_prohibited_operation_for_object(stmt->renameType, stmt->object); + break; + default: + break; + } + } + break; + + case T_AlterObjectSchemaStmt: + { + AlterObjectSchemaStmt *stmt = (AlterObjectSchemaStmt *) parseTree; + + switch (stmt->objectType) + { + case OBJECT_MATVIEW: + check_prohibited_operation_for_range_var(stmt->relation); + break; + + case OBJECT_EXTENSION: + case OBJECT_OPCLASS: + case OBJECT_OPFAMILY: + check_prohibited_operation_for_object(stmt->objectType, stmt->object); + break; + + default: + break; + } + } + break; + + case T_AlterOwnerStmt: + { + AlterOwnerStmt *stmt = (AlterOwnerStmt *) parseTree; + + switch (stmt->objectType) + { + case OBJECT_OPCLASS: + case OBJECT_OPFAMILY: + check_prohibited_operation_for_object(stmt->objectType, stmt->object); + break; + + default: + break; + } + } + break; + + case T_IndexStmt: + { + IndexStmt *stmt = (IndexStmt *) parseTree; + + if (strcmp(stmt->accessMethod, VCI_STRING) == 0) + { + if (stmt->concurrent) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support concurrent index build", VCI_STRING), + errhint("Use DROP INDEX to remove an vci index and try again without CONCURRENTLY option"))); + } + } + } + break; + + case T_DropStmt: + { + DropStmt *stmt = (DropStmt *) parseTree; + + if (stmt->removeType == OBJECT_INDEX) + { + ListCell *lc; + + if (stmt->concurrent) + { + foreach(lc, stmt->objects) + { + RangeVar *range_var = makeRangeVarFromNameList((List *) lfirst(lc)); + Relation relation; + + relation = relation_openrv_extended(range_var, AccessShareLock, true); + + if (relation == NULL) + break; + + if (isVciIndexRelation(relation)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support concurrent index drop", VCI_STRING), + errhint("Try again without CONCURRENTLY option"))); + + relation_close(relation, AccessShareLock); + } + } + } + } + break; + + /* + * REFRESH MATERIALIZED VIEW on a VCI internal materialized view + * is prohibited. + */ + case T_RefreshMatViewStmt: + check_prohibited_operation_for_range_var(((RefreshMatViewStmt *) parseTree)->relation); + break; + + default: + break; + } +} + +static void +check_prohibited_operation_for_extension(const char *extname) +{ + if (strcmp(extname, VCI_STRING) == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("extension \"%s\" prohibits this operation", VCI_STRING))); +} + +static void +check_prohibited_operation_for_access_method(const char *amname) +{ + if (strcmp(amname, VCI_STRING) == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("extension \"%s\" prohibits this operation on access method \"%s\"", + VCI_STRING, VCI_STRING))); +} + +static void +check_prohibited_operation_for_range_var(RangeVar *range_var) +{ + Relation rel; + + rel = relation_openrv_extended(range_var, AccessShareLock, true); + + if (rel == NULL) + return; + + check_prohibited_operation_for_relation(rel); + + relation_close(rel, AccessShareLock); +} + +static void +check_prohibited_operation_for_object(ObjectType objtype, Node *object) +{ + switch (objtype) + { + case OBJECT_EXTENSION: + check_prohibited_operation_for_extension(strVal(object)); + break; + + case OBJECT_MATVIEW: + case OBJECT_OPCLASS: + case OBJECT_OPFAMILY: + { + ObjectAddress address; + Relation relation = NULL; + + address = get_object_address(objtype, object, &relation, AccessShareLock, true); + + if (!OidIsValid(address.objectId)) + goto done; + + switch (objtype) + { + case OBJECT_MATVIEW: + check_prohibited_operation_for_relation(relation); + break; + + case OBJECT_OPCLASS: + { + Relation opclass_rel; + HeapTuple opclass_tuple; + Form_pg_opclass opclass_form; + + opclass_rel = table_open(OperatorClassRelationId, AccessShareLock); + + opclass_tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(address.objectId)); + if (!HeapTupleIsValid(opclass_tuple)) /* should not happen */ + elog(ERROR, "cache lookup failed for opclass %u", address.objectId); + + opclass_form = (Form_pg_opclass) GETSTRUCT(opclass_tuple); + + if (is_vci_access_method(opclass_form->opcmethod)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("extension \"%s\" prohibits this operation on operation class \"%s\"", + VCI_STRING, NameStr(opclass_form->opcname)))); + + ReleaseSysCache(opclass_tuple); + table_close(opclass_rel, AccessShareLock); + } + break; + + case OBJECT_OPFAMILY: + { + Relation opfamily_rel; + HeapTuple opfamily_tuple; + Form_pg_opfamily opfamily_form; + + opfamily_rel = table_open(OperatorFamilyRelationId, AccessShareLock); + + opfamily_tuple = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(address.objectId)); + if (!HeapTupleIsValid(opfamily_tuple)) /* should not happen */ + elog(ERROR, "cache lookup failed for opfamily %u", address.objectId); + + opfamily_form = (Form_pg_opfamily) GETSTRUCT(opfamily_tuple); + + if (is_vci_access_method(opfamily_form->opfmethod)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("extension \"%s\" prohibits this operation on operation family \"%s\"", + VCI_STRING, NameStr(opfamily_form->opfname)))); + + ReleaseSysCache(opfamily_tuple); + table_close(opfamily_rel, AccessShareLock); + } + break; + + default: + break; + } + + done: + if (relation != NULL) + relation_close(relation, AccessShareLock); + } + break; + + default: + break; + } +} + +static void +check_prohibited_operation_for_relation(Relation rel) +{ + if (vci_isVciAdditionalRelation(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("extension \"%s\" prohibits this operation on view \"%s\"", + VCI_STRING, NameStr(rel->rd_rel->relname)))); +} + +static bool +is_vci_access_method(Oid accessMethodObjectId) +{ + HeapTuple amtuple; + bool result = false; + Form_pg_am amform; + + amtuple = SearchSysCache1(AMOID, + ObjectIdGetDatum(accessMethodObjectId)); + + if (!HeapTupleIsValid(amtuple)) + { + elog(WARNING, + "cache lookup failed for access method %u", accessMethodObjectId); + + return false; + } + + amform = (Form_pg_am) GETSTRUCT(amtuple); + + if (strcmp(NameStr(amform->amname), VCI_STRING) == 0) + result = true; + + ReleaseSysCache(amtuple); + + return result; +} diff --git a/contrib/vci/storage/vci_tidcrid.c b/contrib/vci/storage/vci_tidcrid.c new file mode 100644 index 0000000..8174b47 --- /dev/null +++ b/contrib/vci/storage/vci_tidcrid.c @@ -0,0 +1,1774 @@ +/*------------------------------------------------------------------------- + * + * vci_tidcrid.c + * TIDCRID update list and TIDCRID Tree relation handlings + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/storage/vci_tidcrid.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "catalog/storage.h" +#include "utils/tuplesort.h" + +#include "vci.h" +#include "vci_freelist.h" +#include "vci_ros.h" +#include "vci_tidcrid.h" + +/* + * Add TID-CRID tree page to the free list if number of free items exceeds + * VCI_TID_CRID_FREESPACE_THRESHOLD + */ +#define VCI_TID_CRID_FREESPACE_THRESHOLD (4) + +/* + * Dummy column id for the main relation + */ +#define VCI_TID_CRID_COLID_DUMMY ((int16) 1) + +#define VCI_TID_CRID_RECOVERY_CURRENT_VAL (InvalidOffsetNumber) + +static void InitializeTidCridUpdateList(Oid relOid); + +static void WriteTidCridUpdateList(vci_MainRelHeaderInfo *info, int sel, bool (*callback) (vcis_tidcrid_pair_item_t *item, void *data), void *data); +static void SampleTidCridUpdateList(Relation rel, uint64 count, vcis_tidcrid_pair_list_t *dest); + +static vcis_tidcrid_meta_t *vci_GetTidCridMeta(vci_TidCridRelations *relPair); +static vcis_tidcrid_pagetag_t *vci_GetTidCridTag(vci_TidCridRelations *relPair, BlockNumber blk); +static void GetTidCridMetaItemPosition(BlockNumber *blockNumber, uint32 *offset, BlockNumber blkNum); +static vcis_tidcrid_meta_item_t *vci_GetTidCridMetaItem(vci_TidCridRelations *relPair, BlockNumber blkNum); +static char *vci_GetTidCridTreeNode(vci_TidCridRelations *relPair, ItemPointer trunkPtr, int64 leafNo, ItemPointer retPtr); + +static void RemoveLeafTidCridTree(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo); +static void AddNewLeafTidCridTree(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo); + +static uint64 SearchFromTidCridTree(vci_MainRelHeaderInfo *info, ItemPointer tId); + +static uint64 SearchCridFromTidCridUpdateListContext(vci_TidCridUpdateListContext *context, ItemPointer tId); +static uint64 SearchCridInBlockRange(vci_TidCridUpdateListContext *context, ItemPointer tId, BlockNumber start, BlockNumber end); +static uint64 SearchCridInBlock(vci_TidCridUpdateListContext *context, ItemPointer tId, vcis_tidcrid_pair_item_t *array, int first, int last); + +static OffsetNumber FindFreeItem(vci_TidCridRelations *relPair, BlockNumber freeBlk); + +static void SetFreeSpaceBitmap(vci_TidCridRelations *relPair, BlockNumber blk, OffsetNumber bit); +static void UnsetFreeSpaceBitmap(vci_TidCridRelations *relPair, BlockNumber blk, OffsetNumber bit); + +static void WriteRecoveryRecordForTidCridTrunk(vci_TidCridRelations *relPair, BlockNumber origBlkno, BlockNumber trunkBlkno, OffsetNumber trunkOffset); +static void WriteRecoveryRecordForTidCridLeaf(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo, BlockNumber leafBlkno, OffsetNumber leafOffset); +static void WriteRecoveryRecordForTidCridCommon(vci_TidCridRelations *relPair, vcis_tid_crid_op_type_t operation, BlockNumber targetBlkno, uint32 targetInfo, BlockNumber freeBlkno, OffsetNumber freeOffset); + +/** + * function to cast from Page to (vcis_tidcrid_pair_list_t *). + */ +#define vci_GetTidCridPairListT(page) \ + ((vcis_tidcrid_pair_list_t *) &((page)[VCI_MIN_PAGE_HEADER])) + +#define vci_GetTidCridPairItemT(page) \ + ((vcis_tidcrid_pair_item_t *) &((page)[VCI_MIN_PAGE_HEADER])) + +#define ROUND_UP(value, size) ((((value) + (size) - 1) / (size)) * (size)) + +/* + * Initialize TID-CRID update list and create on the storage + */ +static void +InitializeTidCridUpdateList(Oid relOid) +{ + Relation rel = table_open(relOid, ShareLock); + Buffer buffer; + Page page; + vcis_tidcrid_pair_list_t *pairList; + BlockNumber blockNumber = VCI_TID_CRID_UPDATE_HEADER_PAGE_ID; + + Assert(offsetof(vcis_tidcrid_pair_list_t, body) == VCI_TID_CRID_UPDATE_PAGE_SPACE); + + vci_PreparePagesWithOneItemIfNecessary(rel, blockNumber); + buffer = ReadBuffer(rel, blockNumber); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buffer); + pairList = vci_GetTidCridPairListT(page); + pairList->num = 0; + + vci_WriteOneItemPage(rel, buffer); + UnlockReleaseBuffer(buffer); + table_close(rel, ShareLock); +} + +/* + * Same as above, but the argument is the main relation info + */ +void +vci_InitializeTidCridUpdateLists(vci_MainRelHeaderInfo *info) +{ + Oid oid; + + oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, 0); + InitializeTidCridUpdateList(oid); + oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_1, 0); + InitializeTidCridUpdateList(oid); +} + +/* + * Initialize TID-CRID tree relation and create on the storage + */ +void +vci_InitializeTidCridTree(vci_MainRelHeaderInfo *info) +{ + LOCKMODE lockmode = ShareLock; + + vci_TidCridRelations relPairData = {0}; + vci_TidCridRelations *relPair = &relPairData; + vcis_tidcrid_meta_t *tidcridMeta; + vcis_tidcrid_pagetag_t *tidcridTag; + + vci_OpenTidCridRelations(relPair, info, lockmode); + + /* --- Meta --- */ + + vci_FormatPageWithOneItem(relPair->meta, + VCI_TID_CRID_DATA_FIRST_PAGE_ID); + + tidcridMeta = vci_GetTidCridMeta(relPair); + LockBuffer(relPair->bufMeta, BUFFER_LOCK_EXCLUSIVE); + + tidcridMeta->free_page_begin_id = VCI_TID_CRID_DATA_FIRST_PAGE_ID; + tidcridMeta->free_page_begin_id_old = VCI_TID_CRID_DATA_FIRST_PAGE_ID; + tidcridMeta->free_page_end_id = VCI_TID_CRID_DATA_FIRST_PAGE_ID; + tidcridMeta->free_page_end_id_old = VCI_TID_CRID_DATA_FIRST_PAGE_ID; + tidcridMeta->free_page_prev_id = InvalidBlockNumber; + tidcridMeta->free_page_next_id = InvalidBlockNumber; + tidcridMeta->num_free_pages = 1; + tidcridMeta->num_free_pages_old = 1; + tidcridMeta->num_free_page_blocks = 1; + tidcridMeta->num_free_page_blocks_old = 1; + + tidcridMeta->num = 0; + tidcridMeta->num_old = 0; + tidcridMeta->free_block_number = 1; + tidcridMeta->offset = offsetof(vcis_tidcrid_meta_t, body); + + /* need to set invalid to first item ? */ + + vci_WriteOneItemPage(relPair->meta, relPair->bufMeta); + UnlockReleaseBuffer(relPair->bufMeta); + + /* --- Data --- */ + + vci_FormatPageWithItems(relPair->data, + VCI_TID_CRID_DATA_FIRST_PAGE_ID, + VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE); + + tidcridTag = vci_GetTidCridTag(relPair, VCI_TID_CRID_DATA_FIRST_PAGE_ID); + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + + tidcridTag->size = MaxBlockNumber; + tidcridTag->type = vcis_tidcrid_type_pagetag; + tidcridTag->prev_pos = InvalidBlockNumber; + tidcridTag->next_pos = InvalidBlockNumber; + + tidcridTag->num = 0; + + /* Meta data has already been added, so subtract from the free_size */ + tidcridTag->free_size = VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - 1; + tidcridTag->bitmap = 0x1; + + vci_WriteItem(relPair->data, relPair->bufData, VCI_TID_CRID_PAGETAG_ITEM_ID); + UnlockReleaseBuffer(relPair->bufData); + + vci_CloseTidCridRelations(relPair, lockmode); +} + +/* ************************************** + * TID CRID Update List Functions + * ************************************* + */ + +/* + * Open TID-CRID Update List + * + * Returns the alloced vci_TidCridUpdateListContext + */ +vci_TidCridUpdateListContext * +vci_OpenTidCridUpdateList(vci_MainRelHeaderInfo *info, int sel) +{ + Oid oid; + Buffer buffer; + Page page; + BlockNumber blkno; + vcis_tidcrid_pair_list_t *src; + vci_TidCridUpdateListContext *context; + + context = palloc0_object(vci_TidCridUpdateListContext); + + Assert((0 <= sel) && (sel < 2)); + oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, sel); + + context->info = info; + context->rel = table_open(oid, AccessShareLock); + + blkno = VCI_TID_CRID_UPDATE_HEADER_PAGE_ID; + + buffer = vci_ReadBufferWithPageInit(context->rel, blkno); + + page = BufferGetPage(buffer); + src = vci_GetTidCridPairListT(page); + + /* Copy header parts */ + MemCpy(&context->header, src, offsetof(vcis_tidcrid_pair_list_t, body)); + + ReleaseBuffer(buffer); + + context->count = src->num; + + /* Calculate number of blocks in CRID-TID Update List */ + context->nblocks = + VCI_TID_CRID_UPDATE_BODY_PAGE_ID + ROUND_UP(context->count, VCI_TID_CRID_UPDATE_PAGE_ITEMS) / VCI_TID_CRID_UPDATE_PAGE_ITEMS; + + return context; +} + +/* + * Close TID-CRID Update List + */ +void +vci_CloseTidCridUpdateList(vci_TidCridUpdateListContext *context) +{ + table_close(context->rel, AccessShareLock); + + pfree(context); +} + +/* + * Read one TID-CRID pair from TID-CRID update list + */ +void +vci_ReadOneBlockFromTidCridUpdateList(vci_TidCridUpdateListContext *context, BlockNumber blkno, vcis_tidcrid_pair_item_t *array) +{ + Buffer buffer; + Page page; + + buffer = vci_ReadBufferWithPageInit(context->rel, blkno); + page = BufferGetPage(buffer); + MemCpy(array, &page[VCI_MIN_PAGE_HEADER], VCI_TID_CRID_UPDATE_PAGE_SPACE); + ReleaseBuffer(buffer); +} + +/* + * Get the length of TID-CRID update list + */ +int32 +vci_GetTidCridUpdateListLength(vci_MainRelHeaderInfo *info, int sel) +{ + Oid oid; + Relation rel; + Buffer buffer; + Page page; + vcis_tidcrid_pair_list_t *src; + int32 length; + BlockNumber blockNumber; + + Assert((0 <= sel) && (sel < 2)); + oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, sel); + rel = table_open(oid, AccessShareLock); + + blockNumber = VCI_TID_CRID_UPDATE_HEADER_PAGE_ID; + buffer = vci_ReadBufferWithPageInit(rel, blockNumber); + page = BufferGetPage(buffer); + + src = vci_GetTidCridPairListT(page); + length = src->num; + ReleaseBuffer(buffer); + + table_close(rel, AccessShareLock); + + return length; +} + +/* + * Serialize TID-CRID update list + */ +static void +WriteTidCridUpdateList(vci_MainRelHeaderInfo *info, + int sel, + bool (*callback) (vcis_tidcrid_pair_item_t *item, void *data), + void *data) +{ + Oid oid; + Relation rel; + BlockNumber blockNumber; + vcis_tidcrid_pair_item_t *array; + Page page; + Buffer buffer; + bool is_terminated = false; + vcis_tidcrid_pair_list_t tidcrid_pair_list = {0}; + uint64 count = 0; + + array = palloc_array(vcis_tidcrid_pair_item_t, VCI_TID_CRID_UPDATE_PAGE_ITEMS); + + Assert((0 <= sel) && (sel < 2)); + oid = vci_GetMainRelVar(info, vcimrv_tid_crid_update_oid_0, sel); + rel = table_open(oid, AccessExclusiveLock); + + RelationTruncate(rel, 0); + + vci_PreparePagesWithOneItemIfNecessary(rel, VCI_TID_CRID_UPDATE_HEADER_PAGE_ID); + + blockNumber = VCI_TID_CRID_UPDATE_BODY_PAGE_ID; + + while (!is_terminated) + { + int count_in_page = 0; + + for (int i = 0; i < VCI_TID_CRID_UPDATE_PAGE_ITEMS; i++) + { + if (!callback(&array[i], data)) + { + is_terminated = true; + break; + } + + count_in_page++; + } + + if (count_in_page > 0) + { + vci_PreparePagesWithOneItemIfNecessary(rel, blockNumber); + buffer = ReadBuffer(rel, blockNumber); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buffer); + MemCpy(&page[VCI_MIN_PAGE_HEADER], array, VCI_TID_CRID_UPDATE_PAGE_SPACE); + vci_WriteOneItemPage(rel, buffer); + UnlockReleaseBuffer(buffer); + + blockNumber++; + count += count_in_page; + } + } + + /* Write the initial block */ + tidcrid_pair_list.num = count; + + if (count > 0) + SampleTidCridUpdateList(rel, count, &tidcrid_pair_list); + + buffer = vci_ReadBufferWithPageInit(rel, VCI_TID_CRID_UPDATE_HEADER_PAGE_ID); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buffer); + MemCpy(&page[VCI_MIN_PAGE_HEADER], &tidcrid_pair_list, offsetof(vcis_tidcrid_pair_list_t, body)); + vci_WriteOneItemPage(rel, buffer); + UnlockReleaseBuffer(buffer); + + table_close(rel, AccessExclusiveLock); + + vci_SetMainRelVar(info, vcimrv_tid_crid_diff_sel, 0, sel); + + pfree(array); +} + +static void +SampleTidCridUpdateList(Relation rel, uint64 count, vcis_tidcrid_pair_list_t *dest) +{ + BlockNumber nblocks; + BlockNumber blkno; + Buffer buffer; + Page page; + + nblocks = VCI_TID_CRID_UPDATE_BODY_PAGE_ID + ROUND_UP(count, VCI_TID_CRID_UPDATE_PAGE_ITEMS) / VCI_TID_CRID_UPDATE_PAGE_ITEMS; + + dest->blocks_per_samp = + ROUND_UP(nblocks - 1 /* Except the header */ , VCI_TID_CRID_UPDATE_CONTEXT_SAMPLES) / VCI_TID_CRID_UPDATE_CONTEXT_SAMPLES; + + blkno = VCI_TID_CRID_UPDATE_BODY_PAGE_ID; + + while (blkno < nblocks) + { + buffer = vci_ReadBufferWithPageInit(rel, blkno); + page = BufferGetPage(buffer); + + Assert(dest->num_samples < VCI_TID_CRID_UPDATE_CONTEXT_SAMPLES); + + dest->sample_tids[dest->num_samples++] = vci_GetTidCridPairItemT(page)[0].page_item_id; + + ReleaseBuffer(buffer); + + blkno += dest->blocks_per_samp; + } + + /* Put final entry */ + buffer = vci_ReadBufferWithPageInit(rel, nblocks - 1); + page = BufferGetPage(buffer); + + dest->sample_tids[dest->num_samples++] = vci_GetTidCridPairItemT(page)[(count - 1) % VCI_TID_CRID_UPDATE_PAGE_ITEMS].page_item_id; + + ReleaseBuffer(buffer); + + /* Discard if the final entry is duplicated */ + if (ItemPointerEquals(&dest->sample_tids[dest->num_samples - 1], + &dest->sample_tids[dest->num_samples - 2])) + dest->num_samples--; +} + +/* ************************************** + * TID CRID Tree Functions + * ************************************* + */ + +/* + * Open the meta and data relation for TID-CRID tree relation + * + * Caller must release via vci_CloseTidCridRelations() + */ +void +vci_OpenTidCridRelations(vci_TidCridRelations *rel, + vci_MainRelHeaderInfo *info, + LOCKMODE lockmode) +{ + rel->meta = table_open(vci_GetMainRelVar(info, vcimrv_tid_crid_meta_oid, 0), lockmode); + rel->data = table_open(vci_GetMainRelVar(info, vcimrv_tid_crid_data_oid, 0), lockmode); + + rel->info = info; +} + +/* + * Close TID-CRID tree relation + */ +void +vci_CloseTidCridRelations(vci_TidCridRelations *rel, LOCKMODE lockmode) +{ + if (rel) + { + if (RelationIsValid(rel->data)) + table_close(rel->data, lockmode); + if (RelationIsValid(rel->meta)) + table_close(rel->meta, lockmode); + } +} + +#define vci_GetTidCridMetaT(page) \ + ((vcis_tidcrid_meta_t *)& ((page)[VCI_MIN_PAGE_HEADER])) + +/* + * Read metadata from the relation + */ +static vcis_tidcrid_meta_t * +vci_GetTidCridMeta(vci_TidCridRelations *relPair) +{ + Page page; + + relPair->bufMeta = vci_ReadBufferWithPageInit(relPair->meta, VCI_COLUMN_META_HEADER_PAGE_ID); + page = BufferGetPage(relPair->bufMeta); + + return vci_GetTidCridMetaT(page); +} + +/* + * Read the metadata in the initial tuple of pages + */ +static vcis_tidcrid_pagetag_t * +vci_GetTidCridTag(vci_TidCridRelations *relPair, BlockNumber blk) +{ + Page page; + HeapTupleHeader htup; + + relPair->bufData = vci_ReadBufferWithPageInit(relPair->data, blk); + page = BufferGetPage(relPair->bufData); + + htup = (HeapTupleHeader) PageGetItem(page, + PageGetItemId(page, VCI_TID_CRID_PAGETAG_ITEM_ID)); + + return (vcis_tidcrid_pagetag_t *) ((char *) htup + htup->t_hoff); +} + +/* + * Calculate offset (page number and the position in the page) to access the + * flexible array in meta relation + */ +static void +GetTidCridMetaItemPosition(BlockNumber *blockNumber, + uint32 *offset, + BlockNumber blkNum) +{ + const int maxTidCridMetaItemInFirstPage = + (VCI_MAX_PAGE_SPACE - offsetof(vcis_tidcrid_meta_t, body)) / sizeof(vcis_tidcrid_meta_item_t); + const int maxTidCridMetaItem = VCI_MAX_PAGE_SPACE / sizeof(vcis_tidcrid_meta_item_t); + + Assert(blockNumber); + Assert(offset); + + if (blkNum < maxTidCridMetaItemInFirstPage) + { + *blockNumber = 0; + *offset = VCI_MIN_PAGE_HEADER + offsetof(vcis_tidcrid_meta_t, body) + + (blkNum * sizeof(vcis_tidcrid_meta_item_t)); + } + else + { + int32 blkNumRem = blkNum - maxTidCridMetaItemInFirstPage; + + *blockNumber = blkNumRem / maxTidCridMetaItem; + blkNumRem -= *blockNumber * maxTidCridMetaItem; + *blockNumber += 1; + *offset = VCI_MIN_PAGE_HEADER + + (blkNumRem * sizeof(vcis_tidcrid_meta_item_t)); + } +} + +/* + * read an entry from vcis_tidcrid_meta + */ +static vcis_tidcrid_meta_item_t * +vci_GetTidCridMetaItem(vci_TidCridRelations *relPair, BlockNumber blkNum) +{ + BlockNumber blockNumber; + uint32 offset; + Page page; + BlockNumber currentBlocks = RelationGetNumberOfBlocks(relPair->meta); + + GetTidCridMetaItemPosition(&blockNumber, &offset, blkNum); + + if (blockNumber >= currentBlocks) + vci_FormatPageWithOneItem(relPair->meta, blockNumber); + else + vci_PreparePagesWithOneItemIfNecessary(relPair->meta, blockNumber); + + relPair->bufMeta = ReadBuffer(relPair->meta, blockNumber); + page = BufferGetPage(relPair->bufMeta); + + return (vcis_tidcrid_meta_item_t *) &(((char *) page)[offset]); +} + +/* + * Returns the pointer to nodes (trunk or leaf) + */ +static char * +vci_GetTidCridTreeNode(vci_TidCridRelations *relPair, ItemPointer trunkPtr, int64 leafNo, + ItemPointer retPtr) +{ + Page page; + HeapTupleHeader htup; + vcis_tidcrid_trunk_t *trunk; + ItemPointerData leafPtrData; + ItemPointer leafPtr = &leafPtrData; + + Assert(ItemPointerIsValid(trunkPtr)); + + relPair->bufData = vci_ReadBufferWithPageInit(relPair->data, ItemPointerGetBlockNumber(trunkPtr)); + page = BufferGetPage(relPair->bufData); + htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, ItemPointerGetOffsetNumber(trunkPtr))); + trunk = (vcis_tidcrid_trunk_t *) ((char *) htup + htup->t_hoff); + + if (leafNo == VCI_TID_CRID_TRUNKNODE) + { + Assert(retPtr == NULL); + + return (char *) trunk; + } + + Assert(leafNo >= 0 && leafNo < VCI_TID_CRID_LEAF_CAPACITY); + leafPtrData = trunk->leaf_item[leafNo]; /* copy */ + + ReleaseBuffer(relPair->bufData); + + if (ItemPointerIsValid(leafPtr)) + { + vcis_tidcrid_pagetag_t *tag PG_USED_FOR_ASSERTS_ONLY; + + tag = vci_GetTidCridTag(relPair, ItemPointerGetBlockNumber(leafPtr)); + + Assert(tag->bitmap & (1U << (ItemPointerGetOffsetNumber(leafPtr) - 1))); + + ReleaseBuffer(relPair->bufData); + + relPair->bufData = vci_ReadBufferWithPageInit(relPair->data, ItemPointerGetBlockNumber(leafPtr)); + page = BufferGetPage(relPair->bufData); + htup = (HeapTupleHeader) PageGetItem(page, + PageGetItemId(page, ItemPointerGetOffsetNumber(leafPtr))); + + if (retPtr) + *retPtr = leafPtrData; + + return (char *) htup + htup->t_hoff; + } + + return NULL; +} + +/* + * Removes LeafNode + */ +static void +RemoveLeafTidCridTree(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo) +{ + vcis_tidcrid_leaf_t *leaf PG_USED_FOR_ASSERTS_ONLY; + vcis_tidcrid_trunk_t *trunk; + + ItemPointerData leafPtrData; + ItemPointer leafPtr = &leafPtrData; + + /* leaf */ + leaf = (vcis_tidcrid_leaf_t *) vci_GetTidCridTreeNode(relPair, trunkPtr, + leafNo, leafPtr); + ReleaseBuffer(relPair->bufData); + Assert(leaf); + + /* Write recovery record */ + WriteRecoveryRecordForTidCridLeaf(relPair, trunkPtr, leafNo, + ItemPointerGetBlockNumber(leafPtr), + ItemPointerGetOffsetNumber(leafPtr)); + + UnsetFreeSpaceBitmap(relPair, + ItemPointerGetBlockNumber(leafPtr), + ItemPointerGetOffsetNumber(leafPtr)); + + /* Remove forom the trunk node */ + trunk = (vcis_tidcrid_trunk_t *) + vci_GetTidCridTreeNode(relPair, trunkPtr, VCI_TID_CRID_TRUNKNODE, NULL); + Assert(trunk->type == vcis_tidcrid_type_trunk); + Assert((trunk->bitmap & (UINT64CONST(1) << leafNo)) != 0); + + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + trunk->bitmap &= ~(UINT64CONST(1) << leafNo); + MemSet(&trunk->leaf_item[leafNo], 0, sizeof(ItemPointerData)); + + vci_WriteItem(relPair->data, relPair->bufData, + ItemPointerGetOffsetNumber(trunkPtr)); + UnlockReleaseBuffer(relPair->bufData); +} + +/* + * Add new leaf node + */ +static void +AddNewLeafTidCridTree(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo) +{ + Page page; + HeapTupleHeader htup; + BlockNumber freeBlk; + OffsetNumber newOffset; + vcis_tidcrid_leaf_t *leaf; + vcis_tidcrid_trunk_t *trunk; + vcis_tidcrid_pagetag_t *tag; + + ItemPointerData leafPtrData; + ItemPointer leafPtr = &leafPtrData; + + /* Firstly search from the same page as trunk */ + tag = vci_GetTidCridTag(relPair, ItemPointerGetBlockNumber(trunkPtr)); + Assert(tag->type == vcis_tidcrid_type_pagetag); + newOffset = vci_GetLowestBit(~tag->bitmap) + 1; + ReleaseBuffer(relPair->bufData); + + if (newOffset <= VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE) + { + /* Free space is found */ + freeBlk = ItemPointerGetBlockNumber(trunkPtr); + } + else + { + freeBlk = vci_FindFreeSpaceForExtent((vci_ColumnRelations *) relPair, 1); + newOffset = FindFreeItem(relPair, freeBlk); + } + + WriteRecoveryRecordForTidCridLeaf(relPair, trunkPtr, leafNo, freeBlk, VCI_TID_CRID_RECOVERY_CURRENT_VAL); + + ItemPointerSet(leafPtr, freeBlk, newOffset); + + /* Connect to the leaf from the trunk */ + trunk = (vcis_tidcrid_trunk_t *) + vci_GetTidCridTreeNode(relPair, trunkPtr, VCI_TID_CRID_TRUNKNODE, NULL); + Assert(trunk->type == vcis_tidcrid_type_trunk); + Assert((trunk->bitmap & (UINT64CONST(1) << leafNo)) == 0); + + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + trunk->bitmap |= (UINT64CONST(1) << leafNo); + trunk->leaf_item[leafNo] = *leafPtr; + + vci_WriteItem(relPair->data, relPair->bufData, + ItemPointerGetOffsetNumber(trunkPtr)); + UnlockReleaseBuffer(relPair->bufData); + + /* Write a tag to the page */ + SetFreeSpaceBitmap(relPair, freeBlk, newOffset); + + relPair->bufData = vci_ReadBufferWithPageInit(relPair->data, ItemPointerGetBlockNumber(leafPtr)); + page = BufferGetPage(relPair->bufData); + htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, ItemPointerGetOffsetNumber(leafPtr))); + + leaf = (vcis_tidcrid_leaf_t *) ((char *) htup + htup->t_hoff); + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + + leaf->type = vcis_tidcrid_type_leaf; + leaf->bitmap = UINT64CONST(0); + + for (int i = 0; i < VCI_TID_CRID_LEAF_CAPACITY; i++) + { + leaf->crid[i] = vci_GetCridFromUint64(VCI_INVALID_CRID); + } + + vci_WriteItem(relPair->data, relPair->bufData, newOffset); + UnlockReleaseBuffer(relPair->bufData); +} + +/* + * Returns the item pointer to the subtree related with original TID + */ +void +vci_GetTidCridSubTree(vci_TidCridRelations *relPair, BlockNumber blkOrig, + ItemPointer retPtr) +{ + vcis_tidcrid_meta_item_t *metaItem; + vcis_tidcrid_pagetag_t *tag PG_USED_FOR_ASSERTS_ONLY; + + metaItem = vci_GetTidCridMetaItem(relPair, blkOrig); + ItemPointerSet(retPtr, metaItem->block_number, metaItem->item_id); + + if (ItemPointerIsValid(retPtr)) + { + tag = vci_GetTidCridTag(relPair, metaItem->block_number); + + Assert((tag->bitmap & (UINT64CONST(1) << (metaItem->item_id - 1))) != 0); + + ReleaseBuffer(relPair->bufData); + } + + ReleaseBuffer(relPair->bufMeta); +} + +/* + * Create a new trunk in the subtree + */ +void +vci_CreateTidCridSubTree(vci_TidCridRelations *relPair, BlockNumber blkOrig, + ItemPointer retPtr) +{ + BlockNumber freeBlk; + OffsetNumber newOffset; + + vcis_tidcrid_trunk_t *trunk; + vcis_tidcrid_meta_item_t *metaItem; + + Assert(retPtr); + + /* Find the free page from the list */ + freeBlk = vci_FindFreeSpaceForExtent((vci_ColumnRelations *) relPair, 1); + + /* Find the free item from the free page */ + newOffset = FindFreeItem(relPair, freeBlk); + + WriteRecoveryRecordForTidCridTrunk(relPair, blkOrig, freeBlk, VCI_TID_CRID_RECOVERY_CURRENT_VAL); + + /* Set ItemPointer to the meta relation item */ + metaItem = vci_GetTidCridMetaItem(relPair, blkOrig); + LockBuffer(relPair->bufMeta, BUFFER_LOCK_EXCLUSIVE); + metaItem->block_number = freeBlk; + metaItem->item_id = newOffset; + + vci_WriteOneItemPage(relPair->meta, relPair->bufMeta); + UnlockReleaseBuffer(relPair->bufMeta); + + /* Write a tag in the page */ + SetFreeSpaceBitmap(relPair, freeBlk, newOffset); + + ItemPointerSet(retPtr, freeBlk, newOffset); + trunk = (vcis_tidcrid_trunk_t *) + vci_GetTidCridTreeNode(relPair, retPtr, VCI_TID_CRID_TRUNKNODE, NULL); + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + Assert(trunk); + + trunk->type = vcis_tidcrid_type_trunk; + trunk->bitmap = UINT64CONST(0); + + MemSet((trunk->leaf_item), 0, sizeof(trunk->leaf_item)); + + vci_WriteItem(relPair->data, relPair->bufData, newOffset); + UnlockReleaseBuffer(relPair->bufData); +} + +void +vci_UpdateTidCridSubTree(vci_TidCridRelations *relPair, ItemPointer trunkPtr, + vcis_tidcrid_pair_list_t *newItems) +{ + for (int i = 0; i < newItems->num; i++) + { + vcis_tidcrid_leaf_t *leaf; + ItemPointerData leafPtrData; + ItemPointer leafPtr = &leafPtrData; + int prevBitCount = 0; + uint32 offset = ItemPointerGetOffsetNumber(&newItems->body[i].page_item_id) - 1; + int8 itemIdUpperBits; + + /* Extract upper bits from item_id */ + itemIdUpperBits = (offset >> VCI_TID_CRID_LEAF_CAPACITY_BITS) & + ((1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) - 1); + + Assert(itemIdUpperBits < VCI_TID_CRID_LEAF_CAPACITY); + + leaf = (vcis_tidcrid_leaf_t *) vci_GetTidCridTreeNode(relPair, trunkPtr, + itemIdUpperBits, leafPtr); + if (leaf == NULL) + { + AddNewLeafTidCridTree(relPair, trunkPtr, itemIdUpperBits); + leaf = (vcis_tidcrid_leaf_t *) vci_GetTidCridTreeNode(relPair, trunkPtr, + itemIdUpperBits, leafPtr); + } + + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + + prevBitCount = vci_GetBitCount(leaf->bitmap); + + for (; i < newItems->num; i++) + { + uint32 innerOffset = ItemPointerGetOffsetNumber(&newItems->body[i].page_item_id) - 1; + int8 innerItemIdUpperBits; + int8 itemIdLowerBits; + + /* Extract upper bits from item_id */ + innerItemIdUpperBits = (innerOffset >> VCI_TID_CRID_LEAF_CAPACITY_BITS) & + ((1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) - 1); + + if (itemIdUpperBits != innerItemIdUpperBits) + { + i--; + break; + } + + /* Extract lower bits from item_id */ + itemIdLowerBits = innerOffset & ((1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) - 1); + + leaf->crid[itemIdLowerBits] = newItems->body[i].crid; + + if (vci_GetUint64FromCrid(leaf->crid[itemIdLowerBits]) == VCI_INVALID_CRID) + leaf->bitmap &= ~(UINT64CONST(1) << itemIdLowerBits); + else + leaf->bitmap |= UINT64CONST(1) << itemIdLowerBits; + } + + vci_WriteItem(relPair->data, relPair->bufData, + ItemPointerGetOffsetNumber(leafPtr)); + UnlockReleaseBuffer(relPair->bufData); + + if (prevBitCount != 0 && leaf->bitmap == 0) + RemoveLeafTidCridTree(relPair, trunkPtr, itemIdUpperBits); + } +} + +/* + * Covert TID->CRID from TID-CRID tree + * + * Returns CRID corresponds to the given tid, otherwise VCI_INVALID_CRID + */ +static uint64 +SearchFromTidCridTree(vci_MainRelHeaderInfo *info, ItemPointer tId) +{ + const LOCKMODE lockmode = AccessShareLock; + + uint64 retVal = VCI_INVALID_CRID; + ItemPointerData trunkNodeData; + ItemPointer trunkNode = &trunkNodeData; + + vcis_tidcrid_leaf_t *leaf; + + BlockNumber blk = ItemPointerGetBlockNumber(tId); + uint32 offset = ItemPointerGetOffsetNumber(tId) - 1; + int8 itemIdLowerBits; + int8 itemIdUpperBits; + vci_TidCridRelations relPairData; + vci_TidCridRelations *relPair = &relPairData; + + /* Separate item id into uppper/lower parts */ + itemIdLowerBits = offset & ((1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) - 1); + itemIdUpperBits = (offset >> VCI_TID_CRID_LEAF_CAPACITY_BITS) & + ((1 << VCI_TID_CRID_LEAF_CAPACITY_BITS) - 1); + + vci_OpenTidCridRelations(relPair, info, lockmode); + vci_GetTidCridSubTree(relPair, blk, trunkNode); + + if (ItemPointerIsValid(trunkNode)) + { + leaf = (vcis_tidcrid_leaf_t *) vci_GetTidCridTreeNode(relPair, trunkNode, itemIdUpperBits, NULL); + if (leaf) + { + retVal = vci_GetUint64FromCrid(leaf->crid[itemIdLowerBits]); + ReleaseBuffer(relPair->bufData); + } + } + + vci_CloseTidCridRelations(relPair, lockmode); + + return retVal; +} + +/* + * Covert TID to CRID + * + * Firstly checks the TID-CRID update list, then search TID-CRID tree + * + * @param[in] context context for the TID-CRID update list + * @param[in] tId target tid + * @param[out] fromTree true if the CRID is found from the tree + * + * Returns found CID, otherwise VCI_INVALID_CRID + */ +uint64 +vci_GetCridFromTid(vci_TidCridUpdateListContext *context, ItemPointer tId, bool *fromTree) +{ + bool viaTree = false; + uint64 result = VCI_MOVED_CRID; + + if (context->count > 0) + result = SearchCridFromTidCridUpdateListContext(context, tId); + + if (result == VCI_MOVED_CRID) + { + result = SearchFromTidCridTree(context->info, tId); + viaTree = true; + } + + if (fromTree) + *fromTree = viaTree; + + return result; +} + +/* + * Search tid from TID-CRID update list + */ +static uint64 +SearchCridFromTidCridUpdateListContext(vci_TidCridUpdateListContext *context, ItemPointer tId) +{ + int ret; + int min, + max, + pivot; + BlockNumber blk_start, + blk_end; + + /* Compare with the first sample */ + ret = ItemPointerCompare(tId, &context->header.sample_tids[0]); + if (ret < 0) /* tId < context->samp_tids[0] */ + return VCI_MOVED_CRID; + + /* Compare with the last sample */ + ret = ItemPointerCompare(&context->header.sample_tids[context->header.num_samples - 1], tId); + if (ret < 0) /* context->samp_tids[context->num_samples - + * 1] < tId */ + return VCI_MOVED_CRID; + + min = 0; + max = context->header.num_samples - 1; + + while (max - min > 1) + { + pivot = (min + max) / 2; + + ret = ItemPointerCompare(tId, &context->header.sample_tids[pivot]); + + if (ret < 0) /* tId < pivot */ + max = pivot; + else if (0 < ret) /* pivot < tId */ + min = pivot; + else + min = max = pivot; + } + + blk_start = VCI_TID_CRID_UPDATE_BODY_PAGE_ID + min * context->header.blocks_per_samp; + blk_end = VCI_TID_CRID_UPDATE_BODY_PAGE_ID + max * context->header.blocks_per_samp + context->header.blocks_per_samp - 1; + + if (context->nblocks <= blk_start) + blk_start = context->nblocks - 1; + + if (context->nblocks <= blk_end) + blk_end = context->nblocks - 1; + + return SearchCridInBlockRange(context, tId, blk_start, blk_end); +} + +static uint64 +SearchCridInBlockRange(vci_TidCridUpdateListContext *context, + ItemPointer tId, + BlockNumber start, BlockNumber end /* inclusive */ ) +{ + bool found = false; + uint64 ret = VCI_MOVED_CRID; + + do + { + BlockNumber pivot; + int first, + last; + Buffer buffer; + Page page; + vcis_tidcrid_pair_item_t *array; + bool less_lower_bound; + bool more_upper_bound; + + pivot = (start + end) / 2; + + if (pivot < context->nblocks - 1) + { + first = 0; + last = VCI_TID_CRID_UPDATE_PAGE_ITEMS - 1; + } + else + { + first = 0; + last = (context->count - 1) % VCI_TID_CRID_UPDATE_PAGE_ITEMS; + } + + buffer = vci_ReadBufferWithPageInit(context->rel, pivot); + page = BufferGetPage(buffer); + + array = vci_GetTidCridPairItemT(page); + + less_lower_bound = (ItemPointerCompare(tId, &array[first].page_item_id) < 0); + more_upper_bound = (ItemPointerCompare(&array[last].page_item_id, tId) < 0); + + if ((start == end) && (less_lower_bound || more_upper_bound)) + { + found = true; + ret = VCI_MOVED_CRID; + } + else if (less_lower_bound) + { + end = pivot; + } + else if (more_upper_bound) + { + start = pivot + 1; + } + else + { + found = true; + ret = SearchCridInBlock(context, tId, array, first, last); + } + + ReleaseBuffer(buffer); + } while (!found); + + return ret; +} + +/* + * Search CRID from the one block in TID-CRID update list + */ +static uint64 +SearchCridInBlock(vci_TidCridUpdateListContext *context, + ItemPointer tId, + vcis_tidcrid_pair_item_t *array, + int first, int last /* inclusive */ ) +{ + int pivot; + + while (last - first > 1) + { + int ret; + + pivot = (first + last) / 2; + + ret = ItemPointerCompare(&array[pivot].page_item_id, tId); + + if (ret < 0) /* array[pivot].page_item_id < tId */ + first = pivot; + else if (ret > 0) /* array[pivot].page_item_id > tId */ + last = pivot; + else + return vci_GetUint64FromCrid(array[pivot].crid); + } + + if (ItemPointerEquals(&array[first].page_item_id, tId)) + return vci_GetUint64FromCrid(array[first].crid); + else if (ItemPointerEquals(&array[last].page_item_id, tId)) + return vci_GetUint64FromCrid(array[last].crid); + else + return VCI_MOVED_CRID; +} + +/* + * Find free item from pages in data relation of TID-CRID free + * + * Returns offset to the free item + */ +static OffsetNumber +FindFreeItem(vci_TidCridRelations *relPair, BlockNumber freeBlk) +{ + vcis_tidcrid_pagetag_t *tag; + OffsetNumber newOffset; + + tag = vci_GetTidCridTag(relPair, freeBlk); + Assert(tag->type == vcis_tidcrid_type_pagetag); + + /* Initialize if not done yet */ + if ((tag->bitmap & 1) == 0) + { + tag->num = 0; + tag->free_size = VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - 1; + tag->bitmap = 0x1; + } + + newOffset = vci_GetLowestBit(~tag->bitmap) + 1; /* LSB = 0 */ + + Assert((newOffset >= 1) && (newOffset <= VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE)); + ReleaseBuffer(relPair->bufData); + + return newOffset; +} + +/* + * Set a bit to the page tag + */ +static void +SetFreeSpaceBitmap(vci_TidCridRelations *relPair, BlockNumber blk, OffsetNumber offset) +{ + vcis_tidcrid_pagetag_t *tag = vci_GetTidCridTag(relPair, blk); + uint32 bit = offset - 1; /* one-origin -> zero-origin */ + uint32 nextBitmap; + + Assert((offset >= 1) && (offset <= VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE)); + Assert((tag->bitmap & (uint32) (1U << bit)) == 0); + + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + nextBitmap = tag->bitmap | (uint32) (1U << bit); + + /* + * Remove from the free space list if the number of free items is less + * than threshold + */ + if (VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - vci_GetBitCount(nextBitmap) == + VCI_TID_CRID_FREESPACE_THRESHOLD) + { + vcis_free_space_t *FS; + + /* Release once to pass relPair to vci_RemoveFreeSpaceFromLinkLis */ + UnlockReleaseBuffer(relPair->bufData); + + FS = vci_GetFreeSpace((vci_RelationPair *) relPair, blk); + vci_WriteRecoveryRecordForFreeSpace(relPair, + VCI_TID_CRID_COLID_DUMMY, + VCI_INVALID_DICTIONARY_ID, + blk, + FS); + ReleaseBuffer(relPair->bufData); + + vci_RemoveFreeSpaceFromLinkList((vci_ColumnRelations *) relPair, blk, 1); + + /* Adjust size and positions */ + tag = vci_GetTidCridTag(relPair, blk); + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + + tag->size = 1; + tag->prev_pos = blk; + tag->next_pos = blk; + } + + tag->bitmap = nextBitmap; + vci_WriteItem(relPair->data, relPair->bufData, VCI_TID_CRID_PAGETAG_ITEM_ID); + UnlockReleaseBuffer(relPair->bufData); + +} + +/* + * Unset a bit to the page tag + */ +static void +UnsetFreeSpaceBitmap(vci_TidCridRelations *relPair, BlockNumber blk, OffsetNumber offset) +{ + vcis_tidcrid_pagetag_t *tag = vci_GetTidCridTag(relPair, blk); + int bit = offset - 1; /* one-origin -> zero-origin */ + + Assert((offset >= 1) && (offset <= VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE)); + Assert((tag->bitmap & (uint32) (1U << bit)) != 0); + + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + tag->bitmap &= ~(uint32) (1U << bit); + vci_WriteItem(relPair->data, relPair->bufData, VCI_TID_CRID_PAGETAG_ITEM_ID); + UnlockReleaseBuffer(relPair->bufData); + + if (VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - (vci_GetBitCount(tag->bitmap) + 1) == + VCI_TID_CRID_FREESPACE_THRESHOLD) + { + vcis_free_space_t newFS; + BlockNumber newFSBlockNumber; + + vci_MakeFreeSpace((vci_ColumnRelations *) relPair, blk, &newFSBlockNumber, &newFS, false); + Assert(newFSBlockNumber == blk); + + vci_WriteRecoveryRecordForFreeSpace(relPair, + VCI_TID_CRID_COLID_DUMMY, + VCI_INVALID_DICTIONARY_ID, + newFSBlockNumber, + &newFS); + + vci_AppendFreeSpaceToLinkList((vci_ColumnRelations *) relPair, + newFSBlockNumber, + newFS.prev_pos, + newFS.next_pos, + newFS.size); + } +} + +/* + * Write a recovery record while creating trunk node in the subtree + */ +static void +WriteRecoveryRecordForTidCridTrunk(vci_TidCridRelations *relPair, BlockNumber origBlkno, BlockNumber trunkBlkno, OffsetNumber trunkOffset) +{ + WriteRecoveryRecordForTidCridCommon(relPair, vcis_tid_crid_op_trunk, origBlkno, 0, trunkBlkno, trunkOffset); +} + +/* + * Write a recovery record while creating leaf node + */ +static void +WriteRecoveryRecordForTidCridLeaf(vci_TidCridRelations *relPair, ItemPointer trunkPtr, uint32 leafNo, BlockNumber leafBlkno, OffsetNumber leafOffset) +{ + vcis_tid_crid_op_type_t operation; + OffsetNumber trunkOffset; + uint32 targetInfo; + + if (leafOffset == VCI_TID_CRID_RECOVERY_CURRENT_VAL) + operation = vcis_tid_crid_op_leaf_add; + else + operation = vcis_tid_crid_op_leaf_remove; + + trunkOffset = ItemPointerGetOffsetNumber(trunkPtr); + Assert((trunkOffset <= 0xFFFF) && (leafNo <= 0xFFFF)); + targetInfo = (trunkOffset & 0xFFFF) | ((leafNo & 0xFFFF) << 16); + + WriteRecoveryRecordForTidCridCommon(relPair, operation, ItemPointerGetBlockNumber(trunkPtr), targetInfo, leafBlkno, leafOffset); +} + +/* + * Write a recovery record while updating TID-CRID tree + */ +static void +WriteRecoveryRecordForTidCridCommon(vci_TidCridRelations *relPair, vcis_tid_crid_op_type_t operation, BlockNumber targetBlkno, uint32 targetInfo, BlockNumber freeBlkno, OffsetNumber freeOffset) +{ + vcis_tidcrid_pagetag_t *tag; + uint32 tag_bitmap; + + /* + * 1. Obtains the bitmap to write the meta relation + */ + tag = vci_GetTidCridTag(relPair, freeBlkno); + Assert(tag->type == vcis_tidcrid_type_pagetag); + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + + if (freeOffset == VCI_TID_CRID_RECOVERY_CURRENT_VAL) + tag_bitmap = tag->bitmap; + else + tag_bitmap = tag->bitmap & ~(UINT64CONST(1) << (freeOffset - 1)); + + UnlockReleaseBuffer(relPair->bufData); + + /* 2. Write information to the meta relation */ + Assert(relPair->info); + + vci_SetMainRelVar(relPair->info, vcimrv_tid_crid_operation, 0, operation); + vci_SetMainRelVar(relPair->info, vcimrv_tid_crid_target_blocknumber, 0, targetBlkno); + vci_SetMainRelVar(relPair->info, vcimrv_tid_crid_target_info, 0, targetInfo); + vci_SetMainRelVar(relPair->info, vcimrv_tid_crid_free_blocknumber, 0, freeBlkno); + vci_SetMainRelVar(relPair->info, vcimrv_tid_crid_tag_bitmap, 0, tag_bitmap); + vci_SetMainRelVar(relPair->info, vcimrv_working_column_id, 0, VCI_INVALID_COLUMN_ID); + vci_WriteMainRelVar(relPair->info, vci_wmrv_update); +} + +/* + * Initialize recovery record for the TID-CRID + */ +void +vci_InitRecoveryRecordForTidCrid(vci_MainRelHeaderInfo *info) +{ + vci_SetMainRelVar(info, vcimrv_tid_crid_operation, 0, vcis_tid_crid_op_none); + + vci_SetMainRelVar(info, vcimrv_working_column_id, 0, VCI_INVALID_COLUMN_ID); +} + +/* + * Recovery the lastly modifying bitmap + * + * @param[in] info main relation + */ +void +vci_RecoveryTidCrid(vci_MainRelHeaderInfo *info) +{ + LOCKMODE lockmode = RowExclusiveLock; + + vci_TidCridRelations relPairData = {0}; + vci_TidCridRelations *relPair = &relPairData; + + vcis_tid_crid_op_type_t operation; + BlockNumber targetBlkno; + uint32 targetInfo; + BlockNumber freeBlkno; + uint32 tag_bitmap; + + operation = vci_GetMainRelVar(info, vcimrv_tid_crid_operation, 0); + targetBlkno = vci_GetMainRelVar(info, vcimrv_tid_crid_target_blocknumber, 0); + targetInfo = vci_GetMainRelVar(info, vcimrv_tid_crid_target_info, 0); + freeBlkno = vci_GetMainRelVar(info, vcimrv_tid_crid_free_blocknumber, 0); + tag_bitmap = vci_GetMainRelVar(info, vcimrv_tid_crid_tag_bitmap, 0); + + if (operation == vcis_tid_crid_op_none) + return; + + Assert(BlockNumberIsValid(freeBlkno)); + vci_OpenTidCridRelations(relPair, info, lockmode); + { + vcis_tidcrid_pagetag_t *tag; + + tag = vci_GetTidCridTag(relPair, freeBlkno); + Assert(tag->type == vcis_tidcrid_type_pagetag); + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + tag->bitmap = tag_bitmap; + vci_WriteItem(relPair->data, relPair->bufData, VCI_TID_CRID_PAGETAG_ITEM_ID); + UnlockReleaseBuffer(relPair->bufData); + } + vci_CloseTidCridRelations(relPair, lockmode); + + switch (operation) + { + case vcis_tid_crid_op_trunk: + { + vcis_tidcrid_meta_item_t *metaItem; + + metaItem = vci_GetTidCridMetaItem(relPair, targetBlkno); + LockBuffer(relPair->bufMeta, BUFFER_LOCK_EXCLUSIVE); + metaItem->block_number = InvalidBlockNumber; + metaItem->item_id = InvalidOffsetNumber; + + vci_WriteOneItemPage(relPair->meta, relPair->bufMeta); + UnlockReleaseBuffer(relPair->bufMeta); + } + break; + + case vcis_tid_crid_op_leaf_add: + case vcis_tid_crid_op_leaf_remove: + { + vcis_tidcrid_trunk_t *trunk; + ItemPointerData trunkItem; + uint32 leafNo; + + /* + * In vcis_tid_crid_op_leaf, targetBlkno represents a block + * number for the trunck, and lower 16 bit of targetInfo is + * the offset to the trunk. + */ + ItemPointerSet(&trunkItem, targetBlkno, (targetInfo & 0xFFFF)); + + /* + * Upper 16 bit of targetInfo represents the leafNo in the + * trunk. + */ + leafNo = targetInfo >> 16; + + trunk = (vcis_tidcrid_trunk_t *) + vci_GetTidCridTreeNode(relPair, &trunkItem, VCI_TID_CRID_TRUNKNODE, NULL); + + LockBuffer(relPair->bufData, BUFFER_LOCK_EXCLUSIVE); + trunk->bitmap &= ~(UINT64CONST(1) << leafNo); + MemSet(&trunk->leaf_item[leafNo], 0, sizeof(ItemPointerData)); + + vci_WriteItem(relPair->data, relPair->bufData, + ItemPointerGetOffsetNumber(&trunkItem)); + UnlockReleaseBuffer(relPair->bufData); + } + break; + + default: + break; + } +} + +/* + * Recovery the free list for TID-CRID tree relation + */ +void +vci_RecoveryFreeSpaceForTidCrid(vci_MainRelHeaderInfo *info) +{ + LOCKMODE lockmode = RowExclusiveLock; + + int16 colId; + vci_ColumnRelations relPairData = {0}; + vci_ColumnRelations *relPair = &relPairData; + vcis_column_meta_t *columnMeta; + + BlockNumber startBlockNumber; + BlockNumber prevFreeBlockNumber; + BlockNumber nextFreeBlockNumber; + uint32 oldSize; + + vci_OpenTidCridRelations(relPair, info, lockmode); + + /* get last working column */ + colId = vci_GetMainRelVar(info, vcimrv_working_column_id, 0); + + if (colId != VCI_INVALID_COLUMN_ID) + { + /* get column rel set */ + columnMeta = vci_GetColumnMeta(&relPair->bufMeta, relPair->meta); + LockBuffer(relPair->bufMeta, BUFFER_LOCK_EXCLUSIVE); + + /* restore from old fieleds */ + columnMeta->num_extents = columnMeta->num_extents_old; + columnMeta->num_free_pages = columnMeta->num_free_pages_old; + columnMeta->num_free_page_blocks = columnMeta->num_free_page_blocks_old; + + /* read freelink list recovery information */ + startBlockNumber = columnMeta->new_data_head; + prevFreeBlockNumber = columnMeta->free_page_prev_id; + nextFreeBlockNumber = columnMeta->free_page_next_id; + oldSize = columnMeta->free_page_old_size; + + vci_WriteColumnMetaDataHeader(relPair->meta, relPair->bufMeta); + UnlockReleaseBuffer(relPair->bufMeta); + + /* Recovery the free link list */ + + vci_AppendFreeSpaceToLinkList(relPair, startBlockNumber, prevFreeBlockNumber, + nextFreeBlockNumber, oldSize); + } + else + { + /* + * Connect to the free list if the previous crash was done before leaf + * was removed from the trunk. + */ + vcis_tid_crid_op_type_t operation; + BlockNumber freeBlkno; + uint32 tag_bitmap; + vcis_free_space_t newFS; + BlockNumber newFSBlockNumber; + + operation = vci_GetMainRelVar(info, vcimrv_tid_crid_operation, 0); + freeBlkno = vci_GetMainRelVar(info, vcimrv_tid_crid_free_blocknumber, 0); + tag_bitmap = vci_GetMainRelVar(info, vcimrv_tid_crid_tag_bitmap, 0); + + switch (operation) + { + case vcis_tid_crid_op_none: + case vcis_tid_crid_op_trunk: + case vcis_tid_crid_op_leaf_add: + break; + + case vcis_tid_crid_op_leaf_remove: + if (VCI_ITEMS_IN_PAGE_FOR_TID_CRID_TREE - (vci_GetBitCount(tag_bitmap) + 1) == + VCI_TID_CRID_FREESPACE_THRESHOLD) + { + vci_MakeFreeSpace((vci_ColumnRelations *) relPair, freeBlkno, &newFSBlockNumber, &newFS, false); + Assert(newFSBlockNumber == freeBlkno); + + vci_WriteRecoveryRecordForFreeSpace(relPair, + VCI_TID_CRID_COLID_DUMMY, + VCI_INVALID_DICTIONARY_ID, + newFSBlockNumber, + &newFS); + + vci_AppendFreeSpaceToLinkList((vci_ColumnRelations *) relPair, + newFSBlockNumber, + newFS.prev_pos, + newFS.next_pos, + newFS.size); + } + break; + + default: + break; + } + } + + vci_CloseTidCridRelations(relPair, lockmode); +} + +static int +CmpTidCridPairbyTID(const void *pa, const void *pb) +{ + vcis_tidcrid_pair_item_t *a = (vcis_tidcrid_pair_item_t *) pa; + vcis_tidcrid_pair_item_t *b = (vcis_tidcrid_pair_item_t *) pb; + + uint64 a_tid = vci_GetTid64FromItemPointer(&a->page_item_id); + uint64 b_tid = vci_GetTid64FromItemPointer(&b->page_item_id); + + return (a_tid < b_tid) ? -1 : ((b_tid < a_tid) ? 1 : 0); +} + +static vcis_tidcrid_pair_item_t * +CreateTidCridUpdateListFromRosChunkStorage(RosChunkStorage *src, + int32 extentId) +{ + vcis_tidcrid_pair_item_t *dst; + int ptr = 0; + uint64 crid = vci_CalcCrid64(extentId, 0); + vcis_tidcrid_pair_item_t temp; + + Assert(VCI_FIRST_NORMAL_EXTENT_ID <= extentId); + dst = palloc_array(vcis_tidcrid_pair_item_t, src->numTotalRows); + for (int chunkId = 0; chunkId < src->numFilled; ++chunkId) + { + RosChunkBuffer *chunk = src->chunk[chunkId]; + + for (uint32 lId = 0; lId < chunk->numFilled; ++lId) + { + temp.page_item_id = *(ItemPointerData *) + &(chunk->tidData[lId * sizeof(ItemPointerData)]); + temp.crid = vci_GetCridFromUint64(crid); + dst[ptr++] = temp; + ++crid; + } + } + + qsort(dst, ptr, sizeof(vcis_tidcrid_pair_item_t), CmpTidCridPairbyTID); + + return dst; +} + +/* + * Callback structure passed to MergeTidCridUpdateListCallback + */ +typedef struct +{ + /* + * oldList: base list for the merge + */ + + /* + * Context for TID-CRID Update List + */ + vci_TidCridUpdateListContext *oldListContext; + + /* + * Current position in old list + */ + uint64 oldListContextIndex; + + /* + * Record one block from the oldListContext + */ + vcis_tidcrid_pair_item_t oldListInBlock[VCI_TID_CRID_UPDATE_PAGE_ITEMS]; + + /* + * Position of reading block + */ + BlockNumber prevOldListContextBlkno; + + /* + * addList1: add different entries to oldList (exclusively used with + * addList2) + */ + + /* + * Pair TID-CRID list + */ + vcis_tidcrid_pair_item_t *addList1; + + /* + * Maximum entries in addList1 + */ + int32 numAddList1; + + /* + * Current position in addList1 + */ + int32 addList1Index; + + /* + * addList2: add different entries to oldList (exclusively used with + * addList1) + */ + Tuplesortstate *addList2; + ItemPointerData addList2CurrentTid; + vcis_Crid addList2Crid; + bool addList2Terminated; + +} vci_MergeTidCridUpdateListContext; + +/* + * Callback function passed to WriteTidCridUpdateList() + * + * Merge oldList and {addList1, addList2} and outputs with TID ordering. + */ +static bool +MergeTidCridUpdateListCallback(vcis_tidcrid_pair_item_t *item, void *data) +{ + vci_MergeTidCridUpdateListContext *mergeContext = (vci_MergeTidCridUpdateListContext *) data; + bool old_entry_valid; + bool add_entry_valid; + vcis_tidcrid_pair_item_t old_item, + add_item; + +retry: + old_entry_valid = false; + add_entry_valid = false; + + if (mergeContext->addList1) + { + /* addList1 */ + if (mergeContext->addList1Index < mergeContext->numAddList1) + { + add_item = mergeContext->addList1[mergeContext->addList1Index]; + add_entry_valid = true; + } + } + else + { + /* addList2 */ + if (!mergeContext->addList2Terminated) + { + if (!ItemPointerIsValid(&mergeContext->addList2CurrentTid)) + { + Datum value; + bool isnull; + + if (tuplesort_getdatum(mergeContext->addList2, true, true, &value, &isnull, NULL)) + { + mergeContext->addList2CurrentTid = *DatumGetItemPointer(value); + } + else + { + mergeContext->addList2Terminated = true; + goto get_old_list; + } + } + + add_item.page_item_id = mergeContext->addList2CurrentTid; + add_item.crid = mergeContext->addList2Crid; + + add_entry_valid = true; + } + } + +get_old_list: + if (mergeContext->oldListContextIndex < mergeContext->oldListContext->count) + { + BlockNumber blkno; + + blkno = VCI_TID_CRID_UPDATE_BODY_PAGE_ID + mergeContext->oldListContextIndex / VCI_TID_CRID_UPDATE_PAGE_ITEMS; + + if (blkno != mergeContext->prevOldListContextBlkno) + { + vci_ReadOneBlockFromTidCridUpdateList(mergeContext->oldListContext, blkno, mergeContext->oldListInBlock); + mergeContext->prevOldListContextBlkno = blkno; + } + + old_item = mergeContext->oldListInBlock[mergeContext->oldListContextIndex % VCI_TID_CRID_UPDATE_PAGE_ITEMS]; + + old_entry_valid = true; + } + + if (old_entry_valid && add_entry_valid) + { + int32 res = ItemPointerCompare(&old_item.page_item_id, &add_item.page_item_id); + + if (res == 0) + { + /* + * Retain latter one if same TID item has come + */ + mergeContext->oldListContextIndex++; + mergeContext->addList1Index++; + ItemPointerSetInvalid(&mergeContext->addList2CurrentTid); + + if (vci_GetUint64FromCrid(add_item.crid) == VCI_MOVED_CRID) + goto retry; + + *item = add_item; + } + else if (res < 0) + { + mergeContext->oldListContextIndex++; + + *item = old_item; + } + else + { + mergeContext->addList1Index++; + ItemPointerSetInvalid(&mergeContext->addList2CurrentTid); + + Assert(vci_GetUint64FromCrid(add_item.crid) != VCI_MOVED_CRID); + + *item = add_item; + } + + return true; + } + else if (old_entry_valid) + { + mergeContext->oldListContextIndex++; + + *item = old_item; + + return true; + } + else if (add_entry_valid) + { + mergeContext->addList1Index++; + ItemPointerSetInvalid(&mergeContext->addList2CurrentTid); + + Assert(vci_GetUint64FromCrid(add_item.crid) != VCI_MOVED_CRID); + + *item = add_item; + + return true; + } + else + { + return false; + } +} + +/* + * Add TID-CRID pair into the TID-CRID Update List + * + * @param[in] info info main relation + * @param[in] src extent to be added + * @param[in] extentId extent id to be added + */ +void +vci_AddTidCridUpdateList(vci_MainRelHeaderInfo *info, + RosChunkStorage *src, + int32 extentId) +{ + uint32 oldSel = vci_GetMainRelVar(info, vcimrv_tid_crid_diff_sel, 0); + uint32 newSel = 1 ^ oldSel; + vci_MergeTidCridUpdateListContext mergeContext = {0}; + + Assert(VCI_FIRST_NORMAL_EXTENT_ID <= extentId); + mergeContext.oldListContext = vci_OpenTidCridUpdateList(info, oldSel); + + mergeContext.addList1 = CreateTidCridUpdateListFromRosChunkStorage(src, extentId); + mergeContext.numAddList1 = src->numTotalRows; + + mergeContext.prevOldListContextBlkno = InvalidBlockNumber; + + WriteTidCridUpdateList(info, newSel, MergeTidCridUpdateListCallback, &mergeContext); + + pfree(mergeContext.addList1); + vci_CloseTidCridUpdateList(mergeContext.oldListContext); +} + +void +vci_MergeAndWriteTidCridUpdateList(vci_MainRelHeaderInfo *info, + int newSel, int oldSel, + Tuplesortstate *newList, vcis_Crid crid) +{ + vci_MergeTidCridUpdateListContext mergeContext = {0}; + + mergeContext.oldListContext = vci_OpenTidCridUpdateList(info, oldSel); + + mergeContext.addList2 = newList; + ItemPointerSetInvalid(&mergeContext.addList2CurrentTid); + mergeContext.addList2Crid = crid; + + mergeContext.prevOldListContextBlkno = InvalidBlockNumber; + + WriteTidCridUpdateList(info, newSel, MergeTidCridUpdateListCallback, &mergeContext); + + vci_CloseTidCridUpdateList(mergeContext.oldListContext); +} diff --git a/contrib/vci/storage/vci_wos.c b/contrib/vci/storage/vci_wos.c new file mode 100644 index 0000000..7739b4c --- /dev/null +++ b/contrib/vci/storage/vci_wos.c @@ -0,0 +1,263 @@ +/*------------------------------------------------------------------------- + * + * vci_wos.c + * Manipulate WOS + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/vci/storage/vci_wos.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/heapam_xlog.h" +#include "access/relscan.h" +#include "access/visibilitymap.h" +#include "access/xact.h" +#include "c.h" +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/procarray.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" + +#include "vci.h" + +#include "vci_mem.h" +#include "vci_ros.h" +#include "vci_wos.h" +#include "vci_xact.h" + +bool HeapTupleSatisfiesWos2Ros(HeapTuple htup, Snapshot snapshot, Buffer buffer); +bool HeapTupleSatisfiesLocalRos(HeapTuple htup, Snapshot snapshot, Buffer buffer); +static bool IsXmaxHasCommitted(HeapTuple htup); + +/* Cache used by IsXmaxHasCommitted */ +static struct +{ + TransactionId xid; + bool committed; +} cachedTransactionInfo; + +/* + * vci_GetSnapshotForWos2Ros + * + * Creates a snapshot which is used for WOS->ROS and WOS->Delete vector + * conversions. + * + * WOS entries are created when CRUD commands are executed, and the visibility + * check in WOS is done with the normal snapshot. + * + * ROS control commands can removes WOS entries, and the result can be seen by + * everyone as soon as the command is done. + * + * Caller must call PopActiveSnapshot() afterward. + */ +Snapshot +vci_GetSnapshotForWos2Ros(void) +{ + Snapshot snapshot; + + snapshot = vci_GetCurrentSnapshot(); + + snapshot->snapshot_type = SNAPSHOT_VCI_WOS2ROS; + + /* Clean up the cache */ + cachedTransactionInfo.xid = InvalidTransactionId; + + return snapshot; +} + +bool +HeapTupleSatisfiesWos2Ros(HeapTuple htup, Snapshot snapshot, Buffer buffer) +{ + + SnapshotType temp_snapshot_type; + + temp_snapshot_type = snapshot->snapshot_type; + snapshot->snapshot_type = SNAPSHOT_MVCC; + + if (HeapTupleSatisfiesVisibility(htup, snapshot, buffer)) + { + snapshot->snapshot_type = temp_snapshot_type; + + if (IsXmaxHasCommitted(htup)) + return false; + + return true; + } + + snapshot->snapshot_type = temp_snapshot_type; + return false; +} + +static TransactionId exclusiveTransactionId; + +/* + * vci_GetSnapshotForLocalRos + * + * Creates a snapshot which is used for local ROS conversion + * + * @param[in] inclusive_xid Visible xid regardless of the MVCC snapshot + * @param[in] exclusive_xid Invisible xid regardless of the MVCC snapshot + * + * Mostly same as vci_GetSnapshotForWos2Ros(), but sometimes results by ROS + * control commands cannot be seen by MVCC. Because the transactions creating + * local ROS and ROS control commands are sometimes overlapped. + */ +Snapshot +vci_GetSnapshotForLocalRos(TransactionId inclusive_xid, TransactionId exclusive_xid) +{ + Snapshot snapshot; + + snapshot = vci_GetCurrentSnapshot(); + + snapshot->snapshot_type = SNAPSHOT_VCI_LOCALROS; + + /* Removes transaction inclusive_xid from MVCC control */ + if (TransactionIdIsValid(inclusive_xid)) + { + for (int i = 0; i < snapshot->xcnt; i++) + { + if (TransactionIdEquals(snapshot->xip[i], inclusive_xid)) + { + i++; + for (; i < snapshot->xcnt; i++) + snapshot->xip[i - 1] = snapshot->xip[i]; + snapshot->xcnt--; + break; + } + } + } + + exclusiveTransactionId = exclusive_xid; + + /* Clean up the cache */ + cachedTransactionInfo.xid = InvalidTransactionId; + + return snapshot; +} + +bool +HeapTupleSatisfiesLocalRos(HeapTuple htup, Snapshot snapshot, Buffer buffer) +{ + SnapshotType temp_snapshot_type; + + /* Store away the VCI specific type and check for MVCC visibility */ + temp_snapshot_type = snapshot->snapshot_type; + snapshot->snapshot_type = SNAPSHOT_MVCC; + + if (HeapTupleSatisfiesVisibility(htup, snapshot, buffer)) + { + snapshot->snapshot_type = temp_snapshot_type; + if (IsXmaxHasCommitted(htup)) + { + TransactionId xmax; + + xmax = HeapTupleHeaderGetRawXmax(htup->t_data); + + if (TransactionIdEquals(xmax, exclusiveTransactionId)) + return true; + + return false; + } + + return true; + } + + snapshot->snapshot_type = temp_snapshot_type; + return false; +} + +/* + * Checks whether the htup has been removed + */ +static bool +IsXmaxHasCommitted(HeapTuple htup) +{ + TransactionId xmax; + bool result = false; + + if (htup->t_data->t_infomask & HEAP_XMAX_COMMITTED) + return true; + + xmax = HeapTupleHeaderGetRawXmax(htup->t_data); + + if (!TransactionIdIsValid(xmax)) + return false; + + if (htup->t_data->t_infomask & HEAP_XMAX_INVALID) + return false; + + if (TransactionIdEquals(xmax, cachedTransactionInfo.xid)) + return cachedTransactionInfo.committed; + + switch (vci_transaction_get_type(xmax)) + { + case VCI_XACT_SELF: + case VCI_XACT_DID_COMMIT: + result = true; + break; + + default: + break; + } + + cachedTransactionInfo.xid = xmax; + cachedTransactionInfo.committed = result; + + return result; +} + +/** + * @brief This function estimate the number of items in all pages of a heap + * relation, from the item size and number of pages, assuming that all the + * entries has the same size, and no HOT chains. + * + * @param[in] oid Oid of relation. + * @return estimated number of items in the relation. + */ +uint64 +vci_EstimateNumEntriesInHeapRelation(Oid oid) +{ + if (OidIsValid(oid)) + { + Relation rel; + TableScanDesc scan; + HeapTuple tuple; + uint64 result = 0; + + rel = table_open(oid, AccessShareLock); + scan = table_beginscan(rel, GetActiveSnapshot(), 0, NULL); + scan->rs_flags &= ~SO_ALLOW_PAGEMODE; + tuple = heap_getnext(scan, ForwardScanDirection); + + if (NULL != tuple) + { + BlockNumber relallvisible; + uint64 numEntriesPerPage = (BLCKSZ - SizeOfPageHeaderData) / + (tuple->t_len + sizeof(ItemIdData)); + + /* + * Estimated value would be calculated as: - Subtract the free + * page from the total number of pages, - then multiple the + * maximum entries per page. + */ + visibilitymap_count(rel, &relallvisible, NULL); + result = (RelationGetNumberOfBlocks(rel) - relallvisible) * numEntriesPerPage; + } + + table_endscan(scan); + table_close(rel, AccessShareLock); + + return result; + } + + return 0; +} -- 1.8.3.1