From f395014170dad3d71756c80ff8343f54b9042410 Mon Sep 17 00:00:00 2001 From: ejrh Date: Wed, 30 Jan 2019 10:37:10 +1300 Subject: [PATCH 3/4] Support range quals in Tid Scan This means queries with expressions such as "ctid >= ? AND ctid < ?" can be answered by scanning over that part of a table, rather than falling back to a full SeqScan. --- src/backend/commands/explain.c | 23 ++ src/backend/executor/Makefile | 1 + src/backend/executor/execAmi.c | 6 + src/backend/executor/execProcnode.c | 10 + src/backend/executor/nodeTidrangescan.c | 599 +++++++++++++++++++++++++++++ src/backend/nodes/copyfuncs.c | 24 ++ src/backend/nodes/outfuncs.c | 13 + src/backend/optimizer/path/costsize.c | 96 +++++ src/backend/optimizer/path/tidpath.c | 106 ++++- src/backend/optimizer/plan/createplan.c | 98 +++++ src/backend/optimizer/plan/setrefs.c | 13 + src/backend/optimizer/plan/subselect.c | 6 + src/backend/optimizer/util/pathnode.c | 29 ++ src/include/catalog/pg_operator.dat | 6 +- src/include/executor/nodeTidrangescan.h | 23 ++ src/include/nodes/execnodes.h | 22 ++ src/include/nodes/nodes.h | 3 + src/include/nodes/pathnodes.h | 12 + src/include/nodes/plannodes.h | 13 + src/include/optimizer/cost.h | 2 + src/include/optimizer/pathnode.h | 2 + src/test/regress/expected/tidrangescan.out | 238 ++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/tidrangescan.sql | 74 ++++ src/tools/pgindent/typedefs.list | 5 + 25 files changed, 1412 insertions(+), 14 deletions(-) create mode 100644 src/backend/executor/nodeTidrangescan.c create mode 100644 src/include/executor/nodeTidrangescan.h create mode 100644 src/test/regress/expected/tidrangescan.out create mode 100644 src/test/regress/sql/tidrangescan.sql diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 400f3c9..6a63010 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -933,6 +933,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used) case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: + case T_TidRangeScan: case T_SubqueryScan: case T_FunctionScan: case T_TableFuncScan: @@ -1079,6 +1080,9 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_TidScan: pname = sname = "Tid Scan"; break; + case T_TidRangeScan: + pname = sname = "Tid Range Scan"; + break; case T_SubqueryScan: pname = sname = "Subquery Scan"; break; @@ -1270,6 +1274,7 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_SampleScan: case T_BitmapHeapScan: case T_TidScan: + case T_TidRangeScan: case T_SubqueryScan: case T_FunctionScan: case T_TableFuncScan: @@ -1691,6 +1696,23 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); } break; + case T_TidRangeScan: + { + /* + * The tidrangequals list has AND semantics, so be sure to + * show it as an AND condition. + */ + List *tidquals = ((TidRangeScan *) plan)->tidrangequals; + + if (list_length(tidquals) > 1) + tidquals = list_make1(make_andclause(tidquals)); + show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + } + break; case T_ForeignScan: show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) @@ -2978,6 +3000,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: + case T_TidRangeScan: case T_ForeignScan: case T_CustomScan: case T_ModifyTable: diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index cc09895..0152e31 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -28,6 +28,7 @@ OBJS = execAmi.o execCurrent.o execExpr.o execExprInterp.o \ nodeValuesscan.o \ nodeCtescan.o nodeNamedtuplestorescan.o nodeWorktablescan.o \ nodeGroup.o nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o \ + nodeTidrangescan.o \ nodeForeignscan.o nodeWindowAgg.o tstoreReceiver.o tqueue.o spi.o \ nodeTableFuncscan.o diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 187f892..e85ed61 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -51,6 +51,7 @@ #include "executor/nodeSubplan.h" #include "executor/nodeSubqueryscan.h" #include "executor/nodeTableFuncscan.h" +#include "executor/nodeTidrangescan.h" #include "executor/nodeTidscan.h" #include "executor/nodeUnique.h" #include "executor/nodeValuesscan.h" @@ -198,6 +199,10 @@ ExecReScan(PlanState *node) ExecReScanTidScan((TidScanState *) node); break; + case T_TidRangeScanState: + ExecReScanTidRangeScan((TidRangeScanState *) node); + break; + case T_SubqueryScanState: ExecReScanSubqueryScan((SubqueryScanState *) node); break; @@ -524,6 +529,7 @@ ExecSupportsBackwardScan(Plan *node) case T_SeqScan: case T_TidScan: + case T_TidRangeScan: case T_FunctionScan: case T_ValuesScan: case T_CteScan: diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 4ab2903..46b39d0 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -108,6 +108,7 @@ #include "executor/nodeSubplan.h" #include "executor/nodeSubqueryscan.h" #include "executor/nodeTableFuncscan.h" +#include "executor/nodeTidrangescan.h" #include "executor/nodeTidscan.h" #include "executor/nodeUnique.h" #include "executor/nodeValuesscan.h" @@ -238,6 +239,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_TidRangeScan: + result = (PlanState *) ExecInitTidRangeScan((TidRangeScan *) node, + estate, eflags); + break; + case T_SubqueryScan: result = (PlanState *) ExecInitSubqueryScan((SubqueryScan *) node, estate, eflags); @@ -632,6 +638,10 @@ ExecEndNode(PlanState *node) ExecEndTidScan((TidScanState *) node); break; + case T_TidRangeScanState: + ExecEndTidRangeScan((TidRangeScanState *) node); + break; + case T_SubqueryScanState: ExecEndSubqueryScan((SubqueryScanState *) node); break; diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c new file mode 100644 index 0000000..a5065f9 --- /dev/null +++ b/src/backend/executor/nodeTidrangescan.c @@ -0,0 +1,599 @@ +/*------------------------------------------------------------------------- + * + * nodeTidrangescan.c + * Routines to support tid range scans of relations + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeTidrangescan.c + * + *------------------------------------------------------------------------- + */ +/* + * INTERFACE ROUTINES + * + * ExecTidRangeScan scans a relation using a range of tids + * ExecInitTidRangeScan creates and initializes state info. + * ExecReScanTidRangeScan rescans the tid relation. + * ExecEndTidRangeScan releases all storage. + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "access/relscan.h" +#include "access/sysattr.h" +#include "access/tableam.h" +#include "catalog/pg_operator.h" +#include "executor/execdebug.h" +#include "executor/nodeTidrangescan.h" +#include "nodes/nodeFuncs.h" +#include "storage/bufmgr.h" +#include "utils/rel.h" + + +#define IsCTIDVar(node) \ + ((node) != NULL && \ + IsA((node), Var) && \ + ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \ + ((Var *) (node))->varlevelsup == 0) + +typedef enum +{ + TIDEXPR_UPPER_BOUND, + TIDEXPR_LOWER_BOUND +} TidExprType; + +/* one element in TidExpr's opexprs */ +typedef struct TidOpExpr +{ + TidExprType exprtype; /* type of op */ + ExprState *exprstate; /* ExprState for a TID-yielding subexpr */ + bool inclusive; /* whether op is inclusive */ +} TidOpExpr; + +/* + * For the given 'expr', build and return an appropriate TidOpExpr taking into + * account the expr's operator and operand order. + */ +static TidOpExpr * +MakeTidOpExpr(OpExpr *expr, TidRangeScanState *tidstate) +{ + Node *arg1 = get_leftop((Expr *) expr); + Node *arg2 = get_rightop((Expr *) expr); + ExprState *exprstate = NULL; + bool invert = false; + TidOpExpr *tidopexpr; + + if (IsCTIDVar(arg1)) + exprstate = ExecInitExpr((Expr *) arg2, &tidstate->ss.ps); + else if (IsCTIDVar(arg2)) + { + exprstate = ExecInitExpr((Expr *) arg1, &tidstate->ss.ps); + invert = true; + } + else + elog(ERROR, "could not identify CTID variable"); + + tidopexpr = (TidOpExpr *) palloc0(sizeof(TidOpExpr)); + + switch (expr->opno) + { + case TIDLessEqOperator: + tidopexpr->inclusive = true; + /* fall through */ + case TIDLessOperator: + tidopexpr->exprtype = invert ? TIDEXPR_LOWER_BOUND : TIDEXPR_UPPER_BOUND; + break; + case TIDGreaterEqOperator: + tidopexpr->inclusive = true; + /* fall through */ + case TIDGreaterOperator: + tidopexpr->exprtype = invert ? TIDEXPR_UPPER_BOUND : TIDEXPR_LOWER_BOUND; + break; + default: + elog(ERROR, "could not identify CTID expression"); + } + + tidopexpr->exprstate = exprstate; + + return tidopexpr; +} + +/* + * Extract the qual subexpressions that yield TIDs to search for, + * and compile them into ExprStates if they're ordinary expressions. + */ +static void +TidExprListCreate(TidRangeScanState *tidrangestate) +{ + TidRangeScan *node = (TidRangeScan *) tidrangestate->ss.ps.plan; + List *tidexprs = NIL; + ListCell *l; + + foreach(l, node->tidrangequals) + { + OpExpr *opexpr = lfirst(l); + TidOpExpr *tidopexpr = MakeTidOpExpr(opexpr, tidrangestate); + + tidexprs = lappend(tidexprs, tidopexpr); + } + + tidrangestate->trss_tidexprs = tidexprs; +} + +/* + * Set a lower bound tid, taking into account the inclusivity of the bound. + * Return true if the bound is valid. + */ +static bool +SetTidLowerBound(ItemPointer tid, bool inclusive, ItemPointer lowerBound) +{ + OffsetNumber offset; + + *lowerBound = *tid; + offset = ItemPointerGetOffsetNumberNoCheck(tid); + + if (!inclusive) + { + /* Check if the lower bound is actually in the next block. */ + if (offset >= MaxOffsetNumber) + { + BlockNumber block = ItemPointerGetBlockNumberNoCheck(lowerBound); + + /* + * If the lower bound was already at or above the maximum block + * number, then there is no valid range. + */ + if (block >= MaxBlockNumber) + return false; + + ItemPointerSetBlockNumber(lowerBound, block + 1); + ItemPointerSetOffsetNumber(lowerBound, 1); + } + else + ItemPointerSetOffsetNumber(lowerBound, OffsetNumberNext(offset)); + } + else if (offset == 0) + ItemPointerSetOffsetNumber(lowerBound, 1); + + return true; +} + +/* + * Set an upper bound tid, taking into account the inclusivity of the bound. + * Return true if the bound is valid. + */ +static bool +SetTidUpperBound(ItemPointer tid, bool inclusive, ItemPointer upperBound) +{ + OffsetNumber offset; + + *upperBound = *tid; + offset = ItemPointerGetOffsetNumberNoCheck(tid); + + /* + * Since TID offsets start at 1, an inclusive upper bound with offset 0 + * can be treated as an exclusive bound. This has the benefit of + * eliminating that block from the scan range. + */ + if (inclusive && offset == 0) + inclusive = false; + + if (!inclusive) + { + /* Check if the upper bound is actually in the previous block. */ + if (offset == 0) + { + BlockNumber block = ItemPointerGetBlockNumberNoCheck(upperBound); + + /* + * If the upper bound was already in block 0, then there is no + * valid range. + */ + if (block == 0) + return false; + + ItemPointerSetBlockNumber(upperBound, block - 1); + ItemPointerSetOffsetNumber(upperBound, MaxOffsetNumber); + } + else + ItemPointerSetOffsetNumber(upperBound, OffsetNumberPrev(offset)); + } + + return true; +} + +/* ---------------------------------------------------------------- + * TidRangeEval + * + * Compute the range of TIDs to scan, by evaluating the + * expressions for them. + * ---------------------------------------------------------------- + */ +static void +TidRangeEval(TidRangeScanState *node) +{ + ExprContext *econtext = node->ss.ps.ps_ExprContext; + BlockNumber nblocks; + ItemPointerData lowerBound; + ItemPointerData upperBound; + ListCell *l; + + /* + * We silently discard any TIDs that are out of range at the time of scan + * start. (Since we hold at least AccessShareLock on the table, it won't + * be possible for someone to truncate away the blocks we intend to + * visit.) + */ + nblocks = RelationGetNumberOfBlocks(node->ss.ss_currentRelation); + + + /* The biggest range on an empty table is empty; just skip it. */ + if (nblocks == 0) + return; + + /* Set the lower and upper bound to scan the whole table. */ + ItemPointerSetBlockNumber(&lowerBound, 0); + ItemPointerSetOffsetNumber(&lowerBound, 1); + ItemPointerSetBlockNumber(&upperBound, nblocks - 1); + ItemPointerSetOffsetNumber(&upperBound, MaxOffsetNumber); + + foreach(l, node->trss_tidexprs) + { + TidOpExpr *tidopexpr = (TidOpExpr *) lfirst(l); + ItemPointer itemptr; + bool isNull; + + /* Evaluate this bound. */ + itemptr = (ItemPointer) + DatumGetPointer(ExecEvalExprSwitchContext(tidopexpr->exprstate, + econtext, + &isNull)); + + /* If the bound is NULL, *nothing* matches the qual. */ + if (isNull) + return; + + if (tidopexpr->exprtype == TIDEXPR_LOWER_BOUND) + { + ItemPointerData lb; + + if (!SetTidLowerBound(itemptr, tidopexpr->inclusive, &lb)) + return; + + if (ItemPointerCompare(&lb, &lowerBound) > 0) + lowerBound = lb; + } + + if (tidopexpr->exprtype == TIDEXPR_UPPER_BOUND) + { + ItemPointerData ub; + + if (!SetTidUpperBound(itemptr, tidopexpr->inclusive, &ub)) + return; + + if (ItemPointerCompare(&ub, &upperBound) < 0) + upperBound = ub; + } + } + + /* If the resulting range is not empty, use it. */ + if (ItemPointerCompare(&lowerBound, &upperBound) <= 0) + { + node->trss_startBlock = ItemPointerGetBlockNumberNoCheck(&lowerBound); + node->trss_endBlock = ItemPointerGetBlockNumberNoCheck(&upperBound); + node->trss_startOffset = ItemPointerGetOffsetNumberNoCheck(&lowerBound); + node->trss_endOffset = ItemPointerGetOffsetNumberNoCheck(&upperBound); + } +} + +/* ---------------------------------------------------------------- + * NextInTidRange + * + * Fetch the next tuple when scanning a range of TIDs. + * + * Since the heap access method may return tuples that are in the scan + * limit, but not within the required TID range, this function will + * check for such tuples and skip over them. + * ---------------------------------------------------------------- + */ +static HeapTuple +NextInTidRange(TidRangeScanState *node, TableScanDesc scandesc, ScanDirection direction) +{ + HeapTuple tuple; + + for (;;) + { + BlockNumber block; + OffsetNumber offset; + + tuple = heap_getnext(scandesc, direction); + if (!tuple) + break; + + /* Check that the tuple is within the required range. */ + block = ItemPointerGetBlockNumber(&tuple->t_self); + offset = ItemPointerGetOffsetNumber(&tuple->t_self); + + /* The tuple should never come from outside the scan limits. */ + Assert(block >= node->trss_startBlock && + block <= node->trss_endBlock); + + /* + * If the tuple is in the first block of the range and before the + * first requested offset, then we can either skip it (if scanning + * forward), or end the scan (if scanning backward). + */ + if (block == node->trss_startBlock && offset < node->trss_startOffset) + { + if (ScanDirectionIsForward(direction)) + continue; + else + tuple = NULL; + } + + /* Similarly for the last block, after the last requested offset. */ + if (block == node->trss_endBlock && offset > node->trss_endOffset) + { + if (ScanDirectionIsBackward(direction)) + continue; + else + tuple = NULL; + } + + break; + } + + return tuple; +} + +/* ---------------------------------------------------------------- + * TidRangeNext + * + * Retrieve a tuple from the TidRangeScan node's currentRelation + * using the tids in the TidRangeScanState information. + * + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +TidRangeNext(TidRangeScanState *node) +{ + TableScanDesc scandesc; + EState *estate; + ScanDirection direction; + HeapTuple tuple; + TupleTableSlot *slot; + + /* + * extract necessary information from tid scan node + */ + scandesc = node->ss.ss_currentScanDesc; + estate = node->ss.ps.state; + direction = estate->es_direction; + slot = node->ss.ss_ScanTupleSlot; + + if (!node->trss_inScan) + { + BlockNumber blocks_to_scan; + + /* First time through, compute the list of TID ranges to be visited */ + if (node->trss_startBlock == InvalidBlockNumber) + TidRangeEval(node); + + if (scandesc == NULL) + { + scandesc = table_beginscan_strat(node->ss.ss_currentRelation, + estate->es_snapshot, + 0, NULL, + false, false); + node->ss.ss_currentScanDesc = scandesc; + } + + /* Compute the number of blocks to scan and set the scan limits. */ + if (node->trss_startBlock == InvalidBlockNumber) + { + /* If the range is empty, set the scan limits to zero blocks. */ + node->trss_startBlock = 0; + blocks_to_scan = 0; + } + else + blocks_to_scan = node->trss_endBlock - node->trss_startBlock + 1; + + heap_setscanlimits(scandesc, node->trss_startBlock, blocks_to_scan); + node->trss_inScan = true; + } + + /* Fetch the next tuple. */ + tuple = NextInTidRange(node, scandesc, direction); + + /* + * If we've exhuasted all the tuples in the range, reset the inScan flag. + * This will cause the heap to be rescanned for any subsequent fetches, + * which is important for some cursor operations: for instance, FETCH LAST + * fetches all the tuples in order and then fetches one tuple in reverse. + */ + if (!tuple) + node->trss_inScan = false; + + /* + * save the tuple and the buffer returned to us by the access methods in + * our scan tuple slot and return the slot. Note also that + * ExecStoreBufferHeapTuple will increment the refcount of the buffer; the + * refcount will not be dropped until the tuple table slot is cleared. + */ + if (tuple) + ExecStoreBufferHeapTuple(tuple, /* tuple to store */ + slot, /* slot to store in */ + ((HeapScanDesc) scandesc)->rs_cbuf); /* buffer associated + * with this tuple */ + else + ExecClearTuple(slot); + + return slot; +} + +/* + * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual + */ +static bool +TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot) +{ + /* + * XXX shouldn't we check here to make sure tuple is in TID range? In + * runtime-key case this is not certain, is it? + */ + return true; +} + +/* ---------------------------------------------------------------- + * ExecTidRangeScan(node) + * + * Scans the relation using tids and returns the next qualifying tuple + * in the direction specified. + * We call the ExecScan() routine and pass it the appropriate + * access method functions. + * + * Conditions: + * -- the "cursor" maintained by the AMI is positioned at the tuple + * returned previously. + * + * Initial States: + * -- the relation indicated is opened for scanning so that the + * "cursor" is positioned before the first qualifying tuple. + * -- trss_startBlock is InvalidBlockNumber + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +ExecTidRangeScan(PlanState *pstate) +{ + TidRangeScanState *node = castNode(TidRangeScanState, pstate); + + return ExecScan(&node->ss, + (ExecScanAccessMtd) TidRangeNext, + (ExecScanRecheckMtd) TidRangeRecheck); +} + +/* ---------------------------------------------------------------- + * ExecReScanTidRangeScan(node) + * ---------------------------------------------------------------- + */ +void +ExecReScanTidRangeScan(TidRangeScanState *node) +{ + TableScanDesc scan = node->ss.ss_currentScanDesc; + + if (scan != NULL) + table_rescan(scan, /* scan desc */ + NULL); /* new scan keys */ + + /* mark scan as not in progress, and tid range list as not computed yet */ + node->trss_inScan = false; + node->trss_startBlock = InvalidBlockNumber; + + ExecScanReScan(&node->ss); +} + +/* ---------------------------------------------------------------- + * ExecEndTidRangeScan + * + * Releases any storage allocated through C routines. + * Returns nothing. + * ---------------------------------------------------------------- + */ +void +ExecEndTidRangeScan(TidRangeScanState *node) +{ + TableScanDesc scan = node->ss.ss_currentScanDesc; + + /* + * Free the exprcontext + */ + ExecFreeExprContext(&node->ss.ps); + + /* + * clear out tuple table slots + */ + if (node->ss.ps.ps_ResultTupleSlot) + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* close heap scan */ + if (scan != NULL) + table_endscan(scan); +} + +/* ---------------------------------------------------------------- + * ExecInitTidRangeScan + * + * Initializes the tid range scan's state information, creates + * scan keys, and opens the base and tid relations. + * + * Parameters: + * node: TidRangeScan node produced by the planner. + * estate: the execution state initialized in InitPlan. + * ---------------------------------------------------------------- + */ +TidRangeScanState * +ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags) +{ + TidRangeScanState *tidrangestate; + Relation currentRelation; + + /* + * create state structure + */ + tidrangestate = makeNode(TidRangeScanState); + tidrangestate->ss.ps.plan = (Plan *) node; + tidrangestate->ss.ps.state = estate; + tidrangestate->ss.ps.ExecProcNode = ExecTidRangeScan; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &tidrangestate->ss.ps); + + /* + * mark scan as not in progress, and tid range list as not computed yet + */ + tidrangestate->trss_inScan = false; + tidrangestate->trss_startBlock = InvalidBlockNumber; + + /* + * open the scan relation + */ + currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); + + tidrangestate->ss.ss_currentRelation = currentRelation; + tidrangestate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ + + /* + * get the scan type from the relation descriptor. + */ + ExecInitScanTupleSlot(estate, &tidrangestate->ss, + RelationGetDescr(currentRelation), + table_slot_callbacks(currentRelation)); + + /* + * Initialize result type and projection. + */ + ExecInitResultTypeTL(&tidrangestate->ss.ps); + ExecAssignScanProjectionInfo(&tidrangestate->ss); + + /* + * initialize child expressions + */ + tidrangestate->ss.ps.qual = + ExecInitQual(node->scan.plan.qual, (PlanState *) tidrangestate); + + TidExprListCreate(tidrangestate); + + /* + * all done. + */ + return tidrangestate; +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 74b23b7..a9ee89c 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -585,6 +585,27 @@ _copyTidScan(const TidScan *from) } /* + * _copyTidRangeScan + */ +static TidRangeScan * +_copyTidRangeScan(const TidRangeScan *from) +{ + TidRangeScan *newnode = makeNode(TidRangeScan); + + /* + * copy node superclass fields + */ + CopyScanFields((const Scan *) from, (Scan *) newnode); + + /* + * copy remainder of node + */ + COPY_NODE_FIELD(tidrangequals); + + return newnode; +} + +/* * _copySubqueryScan */ static SubqueryScan * @@ -4843,6 +4864,9 @@ copyObjectImpl(const void *from) case T_TidScan: retval = _copyTidScan(from); break; + case T_TidRangeScan: + retval = _copyTidRangeScan(from); + break; case T_SubqueryScan: retval = _copySubqueryScan(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 8fd5ad8..0aa21e2 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -608,6 +608,16 @@ _outTidScan(StringInfo str, const TidScan *node) } static void +_outTidRangeScan(StringInfo str, const TidRangeScan *node) +{ + WRITE_NODE_TYPE("TIDRANGESCAN"); + + _outScanInfo(str, (const Scan *) node); + + WRITE_NODE_FIELD(tidrangequals); +} + +static void _outSubqueryScan(StringInfo str, const SubqueryScan *node) { WRITE_NODE_TYPE("SUBQUERYSCAN"); @@ -3683,6 +3693,9 @@ outNode(StringInfo str, const void *obj) case T_TidScan: _outTidScan(str, obj); break; + case T_TidRangeScan: + _outTidRangeScan(str, obj); + break; case T_SubqueryScan: _outSubqueryScan(str, obj); break; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 4b9be13..2d9846d 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1272,6 +1272,102 @@ cost_tidscan(Path *path, PlannerInfo *root, } /* + * cost_tidrangescan + * Determines and returns the cost of scanning a relation using a range of + * TIDs. + * + * 'baserel' is the relation to be scanned + * 'tidrangequals' is the list of TID-checkable range quals + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_tidrangescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, List *tidrangequals, ParamPathInfo *param_info) +{ + Selectivity selectivity; + double pages; + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + QualCost tid_qual_cost; + double ntuples; + double nrandompages; + double nseqpages; + double spc_random_page_cost; + double spc_seq_page_cost; + + /* Should only be applied to base relations */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RELATION); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* Count how many tuples and pages we expect to scan */ + selectivity = clauselist_selectivity(root, tidrangequals, baserel->relid, + JOIN_INNER, NULL); + pages = ceil(selectivity * baserel->pages); + + if (pages <= 0.0) + pages = 1.0; + + /* + * The first page in a range requires a random seek, but each subsequent + * page is just a normal sequential page read. NOTE: it's desirable for + * Tid Range Scans to cost more than the equivalent Sequential Scans, + * because Seq Scans have some performance advantages such as scan + * synchronization and parallelizability, and we'd prefer one of them to + * be picked unless a Tid Range Scan really is better. + */ + ntuples = selectivity * baserel->tuples; + nseqpages = pages - 1.0; + nrandompages = 1.0; + + if (!enable_tidscan) + startup_cost += disable_cost; + + /* + * The TID qual expressions will be computed once, any other baserestrict + * quals once per retrieved tuple. + */ + cost_qual_eval(&tid_qual_cost, tidrangequals, root); + + /* fetch estimated page cost for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + &spc_seq_page_cost); + + /* disk costs */ + run_cost += spc_random_page_cost * nrandompages + spc_seq_page_cost * nseqpages; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + /* + * XXX currently we assume TID quals are a subset of qpquals at this + * point; they will be removed (if possible) when we create the plan, so + * we subtract their cost from the total qpqual cost. (If the TID quals + * can't be removed, this is a mistake and we're going to underestimate + * the CPU cost a bit.) + */ + startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple - + tid_qual_cost.per_tuple; + run_cost += cpu_per_tuple * ntuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* * cost_subqueryscan * Determines and returns the cost of scanning a subquery RTE. * diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index 466e996..533e936 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -2,9 +2,9 @@ * * tidpath.c * Routines to determine which TID conditions are usable for scanning - * a given relation, and create TidPaths accordingly. + * a given relation, and create TidPaths and TidRangePaths accordingly. * - * What we are looking for here is WHERE conditions of the form + * For TidPaths, we look for WHERE conditions of the form * "CTID = pseudoconstant", which can be implemented by just fetching * the tuple directly via heap_fetch(). We can also handle OR'd conditions * such as (CTID = const1) OR (CTID = const2), as well as ScalarArrayOpExpr @@ -23,6 +23,9 @@ * a function, but in practice it works better to keep the special node * representation all the way through to execution. * + * Additionally, TidRangePaths may be created for conditions of the form + * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=, and + * AND-clauses composed of such conditions. * * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -63,14 +66,14 @@ IsCTIDVar(Var *var, RelOptInfo *rel) /* * Check to see if a RestrictInfo is of the form - * CTID = pseudoconstant + * CTID OP pseudoconstant * or - * pseudoconstant = CTID - * where the CTID Var belongs to relation "rel", and nothing on the - * other side of the clause does. + * pseudoconstant OP CTID + * where OP is a binary operation, the CTID Var belongs to relation "rel", + * and nothing on the other side of the clause does. */ static bool -IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel) +IsTidBinaryClause(RestrictInfo *rinfo, RelOptInfo *rel) { OpExpr *node; Node *arg1, @@ -83,10 +86,9 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel) return false; node = (OpExpr *) rinfo->clause; - /* Operator must be tideq */ - if (node->opno != TIDEqualOperator) + /* Operator must take two arguments */ + if (list_length(node->args) != 2) return false; - Assert(list_length(node->args) == 2); arg1 = linitial(node->args); arg2 = lsecond(node->args); @@ -118,6 +120,44 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel) /* * Check to see if a RestrictInfo is of the form + * CTID = pseudoconstant + * or + * pseudoconstant = CTID + * where the CTID Var belongs to relation "rel", and nothing on the + * other side of the clause does. + */ +static bool +IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel) +{ + if (!IsTidBinaryClause(rinfo, rel)) + return false; + return ((OpExpr *) rinfo->clause)->opno == TIDEqualOperator; +} + +/* + * Check to see if a RestrictInfo is of the form + * CTID OP pseudoconstant + * or + * pseudoconstant OP CTID + * where OP is a range operator such as <, <=, >, or >=, the CTID Var belongs + * to relation "rel", and nothing on the other side of the clause does. + */ +static bool +IsTidRangeClause(RestrictInfo *rinfo, RelOptInfo *rel) +{ + Oid opno; + + if (!IsTidBinaryClause(rinfo, rel)) + return false; + opno = ((OpExpr *) rinfo->clause)->opno; + return opno == TIDLessOperator || + opno == TIDLessEqOperator || + opno == TIDGreaterOperator || + opno == TIDGreaterEqOperator; +} + +/* + * Check to see if a RestrictInfo is of the form * CTID = ANY (pseudoconstant_array) * where the CTID Var belongs to relation "rel", and nothing on the * other side of the clause does. @@ -302,6 +342,32 @@ TidQualFromRestrictInfoList(List *rlist, RelOptInfo *rel) } /* + * Extract a set of CTID range conditions from implicit-AND List of RestrictInfos + * + * Returns a List of CTID range qual RestrictInfos for the specified rel + * (with implicit AND semantics across the list), or NIL if there are no + * usable conditions. + */ +static List * +TidRangeQualFromRestrictInfoList(List *rlist, RelOptInfo *rel) +{ + List *rlst = NIL; + ListCell *l; + + foreach(l, rlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (IsTidRangeClause(rinfo, rel)) + { + rlst = lappend(rlst, rinfo); + } + } + + return rlst; +} + +/* * Given a list of join clauses involving our rel, create a parameterized * TidPath for each one that is a suitable TidEqual clause. * @@ -385,6 +451,7 @@ void create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) { List *tidquals; + List *tidrangequals; /* * If any suitable quals exist in the rel's baserestrict list, generate a @@ -405,6 +472,25 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) } /* + * If there are range quals in the baserestrict list, generate a + * TidRangePath. + */ + tidrangequals = TidRangeQualFromRestrictInfoList(rel->baserestrictinfo, rel); + + if (tidrangequals) + { + /* + * This path uses no join clauses, but it could still have required + * parameterization due to LATERAL refs in its tlist. + */ + Relids required_outer = rel->lateral_relids; + + add_path(rel, (Path *) create_tidrangescan_path(root, rel, + tidrangequals, + required_outer)); + } + + /* * Try to generate parameterized TidPaths using equality clauses extracted * from EquivalenceClasses. (This is important since simple "t1.ctid = * t2.ctid" clauses will turn into ECs.) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index bc0ed37..f96ff23 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -125,6 +125,8 @@ static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, static void bitmap_subplan_mark_shared(Plan *plan); static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path, List *tlist, List *scan_clauses); +static TidRangeScan *create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path, + List *tlist, List *scan_clauses); static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, List *tlist, List *scan_clauses); @@ -189,6 +191,8 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist, Index scanrelid); static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid, List *tidquals); +static TidRangeScan *make_tidrangescan(List *qptlist, List *qpqual, Index scanrelid, + List *tidrangequals); static SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual, Index scanrelid, @@ -373,6 +377,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: + case T_TidRangeScan: case T_SubqueryScan: case T_FunctionScan: case T_TableFuncScan: @@ -661,6 +666,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags) scan_clauses); break; + case T_TidRangeScan: + plan = (Plan *) create_tidrangescan_plan(root, + (TidRangePath *) best_path, + tlist, + scan_clauses); + break; + case T_SubqueryScan: plan = (Plan *) create_subqueryscan_plan(root, (SubqueryScanPath *) best_path, @@ -3208,6 +3220,73 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, } /* + * create_tidrangescan_plan + * Returns a tidrangescan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static TidRangeScan * +create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path, + List *tlist, List *scan_clauses) +{ + TidRangeScan *scan_plan; + Index scan_relid = best_path->path.parent->relid; + List *tidrangequals = best_path->tidrangequals; + + /* it should be a base rel... */ + Assert(scan_relid > 0); + Assert(best_path->path.parent->rtekind == RTE_RELATION); + + /* + * The qpqual list must contain all restrictions not enforced by the + * tidrangequals list. tidquals has AND semantics, so we can simply + * remove any qual that appears in it. + */ + { + List *qpqual = NIL; + ListCell *l; + + foreach(l, scan_clauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (rinfo->pseudoconstant) + continue; /* we may drop pseudoconstants here */ + if (list_member_ptr(tidrangequals, rinfo)) + continue; /* simple duplicate */ + if (is_redundant_derived_clause(rinfo, tidrangequals)) + continue; /* derived from same EquivalenceClass */ + qpqual = lappend(qpqual, rinfo); + } + scan_clauses = qpqual; + } + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo lists to bare expressions; ignore pseudoconstants */ + tidrangequals = extract_actual_clauses(tidrangequals, false); + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->path.param_info) + { + tidrangequals = (List *) + replace_nestloop_params(root, (Node *) tidrangequals); + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } + + scan_plan = make_tidrangescan(tlist, + scan_clauses, + scan_relid, + tidrangequals); + + copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); + + return scan_plan; +} + +/* * create_subqueryscan_plan * Returns a subqueryscan plan for the base relation scanned by 'best_path' * with restriction clauses 'scan_clauses' and targetlist 'tlist'. @@ -5109,6 +5188,25 @@ make_tidscan(List *qptlist, return node; } +static TidRangeScan * +make_tidrangescan(List *qptlist, + List *qpqual, + Index scanrelid, + List *tidrangequals) +{ + TidRangeScan *node = makeNode(TidRangeScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->tidrangequals = tidrangequals; + + return node; +} + static SubqueryScan * make_subqueryscan(List *qptlist, List *qpqual, diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 0213a37..0d208e9 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -537,6 +537,19 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_list(root, splan->tidquals, rtoffset); } break; + case T_TidRangeScan: + { + TidRangeScan *splan = (TidRangeScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, rtoffset); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, rtoffset); + splan->tidrangequals = + fix_scan_list(root, splan->tidrangequals, rtoffset); + } + break; case T_SubqueryScan: /* Needs special treatment, see comments below */ return set_subqueryscan_references(root, diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 33e47cc..4a958a6 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -2235,6 +2235,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, context.paramids = bms_add_members(context.paramids, scan_params); break; + case T_TidRangeScan: + finalize_primnode((Node *) ((TidRangeScan *) plan)->tidrangequals, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + case T_SubqueryScan: { SubqueryScan *sscan = (SubqueryScan *) plan; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 169e51e..a87ccf8 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1198,6 +1198,35 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, } /* + * create_tidscan_path + * Creates a path corresponding to a scan by a range of TIDs, returning + * the pathnode. + */ +TidRangePath * +create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel, List *tidrangequals, + Relids required_outer) +{ + TidRangePath *pathnode = makeNode(TidRangePath); + + pathnode->path.pathtype = T_TidRangeScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = NIL; /* always unordered */ + + pathnode->tidrangequals = tidrangequals; + + cost_tidrangescan(&pathnode->path, root, rel, tidrangequals, + pathnode->path.param_info); + + return pathnode; +} + +/* * create_append_path * Creates a path corresponding to an Append plan, returning the * pathnode. diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index 06aec07..fd642af 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -216,15 +216,15 @@ oprname => '<', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '>(tid,tid)', oprnegate => '>=(tid,tid)', oprcode => 'tidlt', oprrest => 'scalarltsel', oprjoin => 'scalarltjoinsel' }, -{ oid => '2800', descr => 'greater than', +{ oid => '2800', oid_symbol => 'TIDGreaterOperator', descr => 'greater than', oprname => '>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '<(tid,tid)', oprnegate => '<=(tid,tid)', oprcode => 'tidgt', oprrest => 'scalargtsel', oprjoin => 'scalargtjoinsel' }, -{ oid => '2801', descr => 'less than or equal', +{ oid => '2801', oid_symbol => 'TIDLessEqOperator', descr => 'less than or equal', oprname => '<=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '>=(tid,tid)', oprnegate => '>(tid,tid)', oprcode => 'tidle', oprrest => 'scalarlesel', oprjoin => 'scalarlejoinsel' }, -{ oid => '2802', descr => 'greater than or equal', +{ oid => '2802', oid_symbol => 'TIDGreaterEqOperator', descr => 'greater than or equal', oprname => '>=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '<=(tid,tid)', oprnegate => '<(tid,tid)', oprcode => 'tidge', oprrest => 'scalargesel', oprjoin => 'scalargejoinsel' }, diff --git a/src/include/executor/nodeTidrangescan.h b/src/include/executor/nodeTidrangescan.h new file mode 100644 index 0000000..cff8790 --- /dev/null +++ b/src/include/executor/nodeTidrangescan.h @@ -0,0 +1,23 @@ +/*------------------------------------------------------------------------- + * + * nodeTidrangescan.h + * + * + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/nodeTidrangescan.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODETIDRANGESCAN_H +#define NODETIDRANGESCAN_H + +#include "nodes/execnodes.h" + +extern TidRangeScanState *ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags); +extern void ExecEndTidRangeScan(TidRangeScanState *node); +extern void ExecReScanTidRangeScan(TidRangeScanState *node); + +#endif /* NODETIDRANGESCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 770b56c..44b146e 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1562,6 +1562,28 @@ typedef struct TidScanState } TidScanState; /* ---------------- + * TidRangeScanState information + * + * tidexprs list of TidExpr structs (see nodeTidscan.c) + * trss_startBlock first block to scan + * trss_endBlock last block to scan (inclusive) + * trss_startOffset first offset in first block to scan + * trss_endOffset last offset in last block to scan (inclusive) + * trss_inScan is a scan currently in progress? + * ---------------- + */ +typedef struct TidRangeScanState +{ + ScanState ss; /* its first field is NodeTag */ + List *trss_tidexprs; + BlockNumber trss_startBlock; + BlockNumber trss_endBlock; + OffsetNumber trss_startOffset; + OffsetNumber trss_endOffset; + bool trss_inScan; +} TidRangeScanState; + +/* ---------------- * SubqueryScanState information * * SubqueryScanState is used for scanning a sub-query in the range table. diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index ffb4cd4..8d7dfd3 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -59,6 +59,7 @@ typedef enum NodeTag T_BitmapIndexScan, T_BitmapHeapScan, T_TidScan, + T_TidRangeScan, T_SubqueryScan, T_FunctionScan, T_ValuesScan, @@ -115,6 +116,7 @@ typedef enum NodeTag T_BitmapIndexScanState, T_BitmapHeapScanState, T_TidScanState, + T_TidRangeScanState, T_SubqueryScanState, T_FunctionScanState, T_TableFuncScanState, @@ -229,6 +231,7 @@ typedef enum NodeTag T_BitmapAndPath, T_BitmapOrPath, T_TidPath, + T_TidRangePath, T_SubqueryScanPath, T_ForeignPath, T_CustomPath, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 4b15d26..645cfc8 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1276,6 +1276,18 @@ typedef struct TidPath } TidPath; /* + * TidRangePath represents a scan by a continguous range of TIDs + * + * tidrangequals is an implicitly AND'ed list of qual expressions of the form + * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=. + */ +typedef struct TidRangePath +{ + Path path; + List *tidrangequals; +} TidRangePath; + +/* * SubqueryScanPath represents a scan of an unflattened subquery-in-FROM * * Note that the subpath comes from a different planning domain; for example diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 93d7f32..eaaa11b 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -489,6 +489,19 @@ typedef struct TidScan } TidScan; /* ---------------- + * tid range scan node + * + * tidrangequals is an implicitly AND'ed list of qual expressions of the form + * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=. + * ---------------- + */ +typedef struct TidRangeScan +{ + Scan scan; + List *tidrangequals; /* qual(s) involving CTID op something */ +} TidRangeScan; + +/* ---------------- * subquery scan node * * SubqueryScan is for scanning the output of a sub-query in the range table. diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index ac6de0f..e534fb8 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -82,6 +82,8 @@ extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root); extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec); extern void cost_tidscan(Path *path, PlannerInfo *root, RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info); +extern void cost_tidrangescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info); extern void cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info); extern void cost_functionscan(Path *path, PlannerInfo *root, diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index a51a6dc..aec02f4 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -63,6 +63,8 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root, List *bitmapquals); extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, Relids required_outer); +extern TidRangePath *create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel, + List *tidrangequals, Relids required_outer); extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, Relids required_outer, diff --git a/src/test/regress/expected/tidrangescan.out b/src/test/regress/expected/tidrangescan.out new file mode 100644 index 0000000..fbe961b --- /dev/null +++ b/src/test/regress/expected/tidrangescan.out @@ -0,0 +1,238 @@ +-- tests for tidrangescans +CREATE TABLE tidrangescan(id integer, data text); +INSERT INTO tidrangescan SELECT i,repeat('x', 100) FROM generate_series(1,1000) AS s(i); +DELETE FROM tidrangescan WHERE substring(ctid::text from ',(\d+)\)')::integer > 10 OR substring(ctid::text from '\((\d+),')::integer >= 10;; +VACUUM tidrangescan; +-- range scans with upper bound +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid < '(1,0)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; + ctid +-------- + (0,1) + (0,2) + (0,3) + (0,4) + (0,5) + (0,6) + (0,7) + (0,8) + (0,9) + (0,10) +(10 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)'; + QUERY PLAN +------------------------------------ + Tid Range Scan on tidrangescan + TID Cond: (ctid <= '(1,5)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)'; + ctid +-------- + (0,1) + (0,2) + (0,3) + (0,4) + (0,5) + (0,6) + (0,7) + (0,8) + (0,9) + (0,10) + (1,1) + (1,2) + (1,3) + (1,4) + (1,5) +(15 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)'; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid < '(0,0)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)'; + ctid +------ +(0 rows) + +-- range scans with lower bound +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid > '(9,8)'; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid > '(9,8)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid > '(9,8)'; + ctid +-------- + (9,9) + (9,10) +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE '(9,8)' < ctid; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: ('(9,8)'::tid < ctid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE '(9,8)' < ctid; + ctid +-------- + (9,9) + (9,10) +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid >= '(9,8)'; + QUERY PLAN +------------------------------------ + Tid Range Scan on tidrangescan + TID Cond: (ctid >= '(9,8)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid >= '(9,8)'; + ctid +-------- + (9,8) + (9,9) + (9,10) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)'; + QUERY PLAN +-------------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid >= '(100,0)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)'; + ctid +------ +(0 rows) + +-- range scans with both bounds +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid; + QUERY PLAN +---------------------------------------------------------------- + Tid Range Scan on tidrangescan + TID Cond: ((ctid > '(4,4)'::tid) AND ('(4,7)'::tid >= ctid)) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid; + ctid +------- + (4,5) + (4,6) + (4,7) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)'; + QUERY PLAN +---------------------------------------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (('(4,7)'::tid >= ctid) AND (ctid > '(4,4)'::tid)) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)'; + ctid +------- + (4,5) + (4,6) + (4,7) +(3 rows) + +-- extreme offsets +SELECT ctid FROM tidrangescan where ctid > '(0,65535)' AND ctid < '(1,0)' LIMIT 1; + ctid +------ +(0 rows) + +SELECT ctid FROM tidrangescan where ctid < '(0,0)' LIMIT 1; + ctid +------ +(0 rows) + +-- empty table +CREATE TABLE tidrangescan_empty(id integer, data text); +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan_empty WHERE ctid < '(1, 0)'; + QUERY PLAN +-------------------------------------- + Tid Range Scan on tidrangescan_empty + TID Cond: (ctid < '(1,0)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan_empty WHERE ctid < '(1, 0)'; + ctid +------ +(0 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan_empty WHERE ctid > '(9, 0)'; + QUERY PLAN +-------------------------------------- + Tid Range Scan on tidrangescan_empty + TID Cond: (ctid > '(9,0)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan_empty WHERE ctid > '(9, 0)'; + ctid +------ +(0 rows) + +-- cursors +BEGIN; +DECLARE c CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; +FETCH NEXT c; + ctid +------- + (0,1) +(1 row) + +FETCH NEXT c; + ctid +------- + (0,2) +(1 row) + +FETCH PRIOR c; + ctid +------- + (0,1) +(1 row) + +FETCH FIRST c; + ctid +------- + (0,1) +(1 row) + +FETCH LAST c; + ctid +-------- + (0,10) +(1 row) + +COMMIT; +DROP TABLE tidrangescan; +DROP TABLE tidrangescan_empty; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 030a71f..47070f7 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -89,7 +89,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview # ---------- # Another group of parallel tests # ---------- -test: create_table_like alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan stats_ext +test: create_table_like alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan tidrangescan stats_ext # rules cannot run concurrently with any test that creates a view test: rules psql_crosstab amutils diff --git a/src/test/regress/sql/tidrangescan.sql b/src/test/regress/sql/tidrangescan.sql new file mode 100644 index 0000000..042c743 --- /dev/null +++ b/src/test/regress/sql/tidrangescan.sql @@ -0,0 +1,74 @@ +-- tests for tidrangescans + +CREATE TABLE tidrangescan(id integer, data text); + +INSERT INTO tidrangescan SELECT i,repeat('x', 100) FROM generate_series(1,1000) AS s(i); +DELETE FROM tidrangescan WHERE substring(ctid::text from ',(\d+)\)')::integer > 10 OR substring(ctid::text from '\((\d+),')::integer >= 10;; +VACUUM tidrangescan; + +-- range scans with upper bound +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; +SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)'; +SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)'; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)'; +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)'; + +-- range scans with lower bound +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid > '(9,8)'; +SELECT ctid FROM tidrangescan WHERE ctid > '(9,8)'; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE '(9,8)' < ctid; +SELECT ctid FROM tidrangescan WHERE '(9,8)' < ctid; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid >= '(9,8)'; +SELECT ctid FROM tidrangescan WHERE ctid >= '(9,8)'; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)'; +SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)'; + +-- range scans with both bounds +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid; +SELECT ctid FROM tidrangescan WHERE ctid > '(4,4)' AND '(4,7)' >= ctid; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)'; +SELECT ctid FROM tidrangescan WHERE '(4,7)' >= ctid AND ctid > '(4,4)'; + +-- extreme offsets +SELECT ctid FROM tidrangescan where ctid > '(0,65535)' AND ctid < '(1,0)' LIMIT 1; +SELECT ctid FROM tidrangescan where ctid < '(0,0)' LIMIT 1; + +-- empty table +CREATE TABLE tidrangescan_empty(id integer, data text); + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan_empty WHERE ctid < '(1, 0)'; +SELECT ctid FROM tidrangescan_empty WHERE ctid < '(1, 0)'; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan_empty WHERE ctid > '(9, 0)'; +SELECT ctid FROM tidrangescan_empty WHERE ctid > '(9, 0)'; + +-- cursors +BEGIN; +DECLARE c CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; +FETCH NEXT c; +FETCH NEXT c; +FETCH PRIOR c; +FETCH FIRST c; +FETCH LAST c; +COMMIT; + +DROP TABLE tidrangescan; +DROP TABLE tidrangescan_empty; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index b821df9..c0da577 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2347,8 +2347,13 @@ TextPositionState TheLexeme TheSubstitute TidExpr +TidExprType TidHashKey +TidOpExpr TidPath +TidRangePath +TidRangeScan +TidRangeScanState TidScan TidScanState TimeADT -- 2.7.4