From 6eead275645e6fae2ae75db1e5091cdc77fe1568 Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Thu, 2 Jul 2020 19:29:32 +1200 Subject: [PATCH v2 3/3] Add Result Cache executor node Here we add a new executor node type named "Result Cache". The planner can use this node to sit above a parameterized path in order to cache the tuples for commonly used sets of parameters. The cache itself is just a hash table which limits itself to not exceeding work_mem in size. We maintain a dlist of keys for this cache and when we require more space in the table for new entries, we start removing entries starting with the least recently used ones. For parameterized nested loop joins we now consider using one of these Result Caches in between the Nested Loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Knowing this relies on having good ndistinct estimates on the nested loop parameters. Effectively, for parameterized nested loops, when a Result Cache is used, the join becomes a sort of hybrid of nested loop and hash joins. This is useful as we only need to fill the hash table (the cache) with the records that are required during the "probe" phase. We'll never end up hashing anything that we don't need, which is especially useful when some items in the table will never be looked up and hash join's hash table would have exceeded work_mem. We also support caching the results from correlated subqueries. However, currently, since subqueries are planned before their parent query, we are unable to obtain any estimations on the cache hit ratio. For now, we opt to just always put a Result Cache above a suitable correlated subquery. In the future, we may like to be smarter about that, but for now, the overhead of using the Result Cache, even in cases where we never get a cache hit is minimal. --- .../postgres_fdw/expected/postgres_fdw.out | 28 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 2 + doc/src/sgml/config.sgml | 18 + src/backend/commands/explain.c | 112 ++ src/backend/executor/Makefile | 1 + src/backend/executor/execAmi.c | 5 + src/backend/executor/execExpr.c | 132 ++ src/backend/executor/execParallel.c | 18 + src/backend/executor/execProcnode.c | 10 + src/backend/executor/nodeResultCache.c | 1060 +++++++++++++++++ src/backend/nodes/copyfuncs.c | 29 + src/backend/nodes/outfuncs.c | 34 + src/backend/nodes/readfuncs.c | 21 + src/backend/optimizer/path/allpaths.c | 4 + src/backend/optimizer/path/costsize.c | 127 ++ src/backend/optimizer/path/joinpath.c | 374 +++++- src/backend/optimizer/plan/createplan.c | 81 ++ src/backend/optimizer/plan/setrefs.c | 1 + src/backend/optimizer/plan/subselect.c | 110 ++ src/backend/optimizer/util/pathnode.c | 62 + src/backend/utils/misc/guc.c | 10 + src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/executor/executor.h | 6 + src/include/executor/nodeResultCache.h | 29 + src/include/lib/ilist.h | 19 + src/include/nodes/execnodes.h | 64 + src/include/nodes/nodes.h | 3 + src/include/nodes/pathnodes.h | 16 + src/include/nodes/plannodes.h | 18 + src/include/optimizer/cost.h | 1 + src/include/optimizer/pathnode.h | 7 + src/test/regress/expected/aggregates.out | 8 +- src/test/regress/expected/groupingsets.out | 20 +- src/test/regress/expected/join.out | 51 +- src/test/regress/expected/join_hash.out | 72 +- src/test/regress/expected/partition_prune.out | 242 ++-- src/test/regress/expected/resultcache.out | 100 ++ src/test/regress/expected/rowsecurity.out | 20 +- src/test/regress/expected/select_parallel.out | 28 +- src/test/regress/expected/subselect.out | 24 +- src/test/regress/expected/sysviews.out | 3 +- src/test/regress/parallel_schedule | 2 +- src/test/regress/serial_schedule | 1 + src/test/regress/sql/aggregates.sql | 2 + src/test/regress/sql/join.sql | 2 + src/test/regress/sql/partition_prune.sql | 2 + src/test/regress/sql/resultcache.sql | 32 + 47 files changed, 2783 insertions(+), 229 deletions(-) create mode 100644 src/backend/executor/nodeResultCache.c create mode 100644 src/include/executor/nodeResultCache.h create mode 100644 src/test/regress/expected/resultcache.out create mode 100644 src/test/regress/sql/resultcache.sql diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 82fc1290ef..a5d697bd7a 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -1581,6 +1581,7 @@ SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) FULL 20 | 0 | AAA020 (10 rows) +SET enable_resultcache TO off; -- right outer join + left outer join EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 RIGHT JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10; @@ -1607,6 +1608,7 @@ SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 RIGHT JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT 20 | 0 | AAA020 (10 rows) +RESET enable_resultcache; -- left outer join + right outer join EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) RIGHT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10; @@ -2914,10 +2916,13 @@ select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft Relations: Aggregate on (public.ft2 t2) Remote SQL: SELECT count(*) FILTER (WHERE ((c2 = 6) AND ("C 1" < 10))) FROM "S 1"."T 1" WHERE (((c2 % 6) = 0)) SubPlan 1 - -> Foreign Scan on public.ft1 t1 - Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10)))) - Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6)) -(13 rows) + -> Result Cache + Output: ((count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))) + Cache Key: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10)))) + -> Foreign Scan on public.ft1 t1 + Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10)))) + Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6)) +(16 rows) select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1; count @@ -2928,8 +2933,8 @@ select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft -- Inner query is aggregation query explain (verbose, costs off) select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------ Unique Output: ((SubPlan 1)) -> Sort @@ -2939,11 +2944,14 @@ select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) fro Output: (SubPlan 1) Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (((c2 % 6) = 0)) SubPlan 1 - -> Foreign Scan + -> Result Cache Output: (count(t1.c1) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10)))) - Relations: Aggregate on (public.ft1 t1) - Remote SQL: SELECT count("C 1") FILTER (WHERE (($1::integer = 6) AND ($2::integer < 10))) FROM "S 1"."T 1" WHERE (("C 1" = 6)) -(13 rows) + Cache Key: t2.c2, t2.c1 + -> Foreign Scan + Output: (count(t1.c1) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10)))) + Relations: Aggregate on (public.ft1 t1) + Remote SQL: SELECT count("C 1") FILTER (WHERE (($1::integer = 6) AND ($2::integer < 10))) FROM "S 1"."T 1" WHERE (("C 1" = 6)) +(16 rows) select distinct (select count(t1.c1) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1; count diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 83971665e3..00b3567e0f 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -480,10 +480,12 @@ SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10; SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10; +SET enable_resultcache TO off; -- right outer join + left outer join EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 RIGHT JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10; SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 RIGHT JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10; +RESET enable_resultcache; -- left outer join + right outer join EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) RIGHT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10; diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index b81aab239f..7e17b1f13d 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4637,6 +4637,24 @@ ANY num_sync ( + enable_resultcache (boolean) + + enable_resultcache configuration parameter + + + + + Enables or disables the query planner's use of a result cache node for + parameterized nodes. This node type allows scans to the underlying + nodes to be skipped when the results for the current parameters are + already in the cache. Less commonly looked up results may be evicted + from the cache when more space is required for new entries. + The default is on. + + + + enable_mergejoin (boolean) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 093864cfc0..10a4fa83b6 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -108,6 +108,8 @@ static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_incremental_sort_info(IncrementalSortState *incrsortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); +static void show_resultcache_info(ResultCacheState *rcstate, List *ancestors, + ExplainState *es); static void show_hashagg_info(AggState *hashstate, ExplainState *es); static void show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es); @@ -1279,6 +1281,9 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_Material: pname = sname = "Materialize"; break; + case T_ResultCache: + pname = sname = "Result Cache"; + break; case T_Sort: pname = sname = "Sort"; break; @@ -1970,6 +1975,10 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_Hash: show_hash_info(castNode(HashState, planstate), es); break; + case T_ResultCache: + show_resultcache_info(castNode(ResultCacheState, planstate), + ancestors, es); + break; default: break; } @@ -3043,6 +3052,109 @@ show_hash_info(HashState *hashstate, ExplainState *es) } } +static void +show_resultcache_info(ResultCacheState *rcstate, List *ancestors, ExplainState *es) +{ + Plan *plan = ((PlanState *) rcstate)->plan; + ListCell *lc; + List *context; + StringInfoData keystr; + char *seperator = ""; + bool useprefix; + + initStringInfo(&keystr); + + /* XXX surely we'll always have more than one if we have a resultcache? */ + useprefix = list_length(es->rtable) > 1; + + /* Set up deparsing context */ + context = set_deparse_context_plan(es->deparse_cxt, + plan, + ancestors); + + foreach(lc, ((ResultCache *) plan)->param_exprs) + { + Node *expr = (Node *) lfirst(lc); + + appendStringInfoString(&keystr, seperator); + + appendStringInfoString(&keystr, deparse_expression(expr, context, + useprefix, false)); + seperator = ", "; + } + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyText("Cache Key", keystr.data, es); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, "Cache Key: %s\n", keystr.data); + } + + pfree(keystr.data); + + if (!es->analyze) + return; + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyInteger("Cache Hits", NULL, rcstate->stats.cache_hits, es); + ExplainPropertyInteger("Cache Misses", NULL, rcstate->stats.cache_misses, es); + ExplainPropertyInteger("Cache Evictions", NULL, rcstate->stats.cache_evictions, es); + ExplainPropertyInteger("Cache Overflows", NULL, rcstate->stats.cache_overflows, es); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Cache Hits: " UINT64_FORMAT " Cache Misses: " UINT64_FORMAT " Cache Evictions: " UINT64_FORMAT " Cache Overflows: " UINT64_FORMAT "\n", + rcstate->stats.cache_hits, + rcstate->stats.cache_misses, + rcstate->stats.cache_evictions, + rcstate->stats.cache_overflows); + } + + /* Show details from parallel workers, if any */ + if (rcstate->shared_info != NULL) + { + int n; + + for (n = 0; n < rcstate->shared_info->num_workers; n++) + { + ResultCacheInstrumentation *si; + + si = &rcstate->shared_info->sinstrument[n]; + + if (es->workers_state) + ExplainOpenWorker(n, es); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Cache Hits: " UINT64_FORMAT " Cache Misses: " UINT64_FORMAT " Cache Evictions: " UINT64_FORMAT " Cache Overflows: " UINT64_FORMAT "\n", + si->cache_hits, si->cache_misses, si->cache_evictions, si->cache_overflows); + } + else + { + ExplainPropertyInteger("Cache Hits", NULL, + si->cache_hits, es); + ExplainPropertyInteger("Cache Misses", NULL, + si->cache_misses, es); + ExplainPropertyInteger("Cache Evictions", NULL, + si->cache_evictions, es); + ExplainPropertyInteger("Cache Overflows", NULL, + si->cache_overflows, es); + } + + if (es->workers_state) + ExplainCloseWorker(n, es); + } + } +} + /* * Show information on hash aggregate memory usage and batches. */ diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index f990c6473a..d5724de69f 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -60,6 +60,7 @@ OBJS = \ nodeProjectSet.o \ nodeRecursiveunion.o \ nodeResult.o \ + nodeResultCache.o \ nodeSamplescan.o \ nodeSeqscan.o \ nodeSetOp.o \ diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index e2154ba86a..68920ecd89 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -44,6 +44,7 @@ #include "executor/nodeProjectSet.h" #include "executor/nodeRecursiveunion.h" #include "executor/nodeResult.h" +#include "executor/nodeResultCache.h" #include "executor/nodeSamplescan.h" #include "executor/nodeSeqscan.h" #include "executor/nodeSetOp.h" @@ -249,6 +250,10 @@ ExecReScan(PlanState *node) ExecReScanMaterial((MaterialState *) node); break; + case T_ResultCacheState: + ExecReScanResultCache((ResultCacheState *) node); + break; + case T_SortState: ExecReScanSort((SortState *) node); break; diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 236413f62a..f32876f412 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3487,3 +3487,135 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, return state; } + +/* + * Build equality expression that can be evaluated using ExecQual(), returning + * true if the expression context's inner/outer tuples are equal. Datums in + * the inner/outer slots are assumed to be in the same order and quantity as + * the 'eqfunctions' parameter. + * + * desc: tuple descriptor of the to-be-compared tuples + * ops: the slot ops for the inner/outer tuple slots + * eqFunctions: array of function oids of the equality functions to use + * this must be the same length as the 'param_exprs' list. + * collactions: collation Oids to use for equality comparison. Must be the + * same length as the 'param_exprs' list. + * parent: parent executor node + */ +ExprState * +ExecBuildParamSetEqual(TupleDesc desc, + const TupleTableSlotOps *ops, + const Oid *eqfunctions, + const Oid *collations, + const List *param_exprs, + PlanState *parent) +{ + ExprState *state = makeNode(ExprState); + ExprEvalStep scratch = {0}; + int maxatt = list_length(param_exprs); + List *adjust_jumps = NIL; + ListCell *lc; + + state->expr = NULL; + state->flags = EEO_FLAG_IS_QUAL; + state->parent = parent; + + scratch.resvalue = &state->resvalue; + scratch.resnull = &state->resnull; + + /* push deform steps */ + scratch.opcode = EEOP_INNER_FETCHSOME; + scratch.d.fetch.last_var = maxatt; + scratch.d.fetch.fixed = false; + scratch.d.fetch.known_desc = desc; + scratch.d.fetch.kind = ops; + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); + + scratch.opcode = EEOP_OUTER_FETCHSOME; + scratch.d.fetch.last_var = maxatt; + scratch.d.fetch.fixed = false; + scratch.d.fetch.known_desc = desc; + scratch.d.fetch.kind = ops; + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); + + for (int attno = 0; attno < maxatt; attno++) + { + Form_pg_attribute att = TupleDescAttr(desc, attno); + Oid foid = eqfunctions[attno]; + Oid collid = collations[attno]; + FmgrInfo *finfo; + FunctionCallInfo fcinfo; + AclResult aclresult; + + /* Check permission to call function */ + aclresult = pg_proc_aclcheck(foid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, get_func_name(foid)); + + InvokeFunctionExecuteHook(foid); + + /* Set up the primary fmgr lookup information */ + finfo = palloc0(sizeof(FmgrInfo)); + fcinfo = palloc0(SizeForFunctionCallInfo(2)); + fmgr_info(foid, finfo); + fmgr_info_set_expr(NULL, finfo); + InitFunctionCallInfoData(*fcinfo, finfo, 2, + collid, NULL, NULL); + + /* left arg */ + scratch.opcode = EEOP_INNER_VAR; + scratch.d.var.attnum = attno; + scratch.d.var.vartype = att->atttypid; + scratch.resvalue = &fcinfo->args[0].value; + scratch.resnull = &fcinfo->args[0].isnull; + ExprEvalPushStep(state, &scratch); + + /* right arg */ + scratch.opcode = EEOP_OUTER_VAR; + scratch.d.var.attnum = attno; + scratch.d.var.vartype = att->atttypid; + scratch.resvalue = &fcinfo->args[1].value; + scratch.resnull = &fcinfo->args[1].isnull; + ExprEvalPushStep(state, &scratch); + + scratch.opcode = finfo->fn_strict ? EEOP_FUNCEXPR_STRICT : + EEOP_FUNCEXPR; + scratch.d.func.finfo = finfo; + scratch.d.func.fcinfo_data = fcinfo; + scratch.d.func.fn_addr = finfo->fn_addr; + scratch.d.func.nargs = 2; + scratch.resvalue = &state->resvalue; + scratch.resnull = &state->resnull; + ExprEvalPushStep(state, &scratch); + + /* then emit EEOP_QUAL to detect if result is false (or null) */ + scratch.opcode = EEOP_QUAL; + scratch.d.qualexpr.jumpdone = -1; + scratch.resvalue = &state->resvalue; + scratch.resnull = &state->resnull; + ExprEvalPushStep(state, &scratch); + adjust_jumps = lappend_int(adjust_jumps, + state->steps_len - 1); + } + + /* adjust jump targets */ + foreach(lc, adjust_jumps) + { + ExprEvalStep *as = &state->steps[lfirst_int(lc)]; + + Assert(as->opcode == EEOP_QUAL); + Assert(as->d.qualexpr.jumpdone == -1); + as->d.qualexpr.jumpdone = state->steps_len; + } + + scratch.resvalue = NULL; + scratch.resnull = NULL; + scratch.opcode = EEOP_DONE; + ExprEvalPushStep(state, &scratch); + + ExecReadyExpr(state); + + return state; +} diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 382e78fb7f..d4c50c261d 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -35,6 +35,7 @@ #include "executor/nodeIncrementalSort.h" #include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" +#include "executor/nodeResultCache.h" #include "executor/nodeSeqscan.h" #include "executor/nodeSort.h" #include "executor/nodeSubplan.h" @@ -293,6 +294,10 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e) /* even when not parallel-aware, for EXPLAIN ANALYZE */ ExecAggEstimate((AggState *) planstate, e->pcxt); break; + case T_ResultCacheState: + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecResultCacheEstimate((ResultCacheState *)planstate, e->pcxt); + break; default: break; } @@ -513,6 +518,10 @@ ExecParallelInitializeDSM(PlanState *planstate, /* even when not parallel-aware, for EXPLAIN ANALYZE */ ExecAggInitializeDSM((AggState *) planstate, d->pcxt); break; + case T_ResultCacheState: + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecResultCacheInitializeDSM((ResultCacheState *) planstate, d->pcxt); + break; default: break; } @@ -989,6 +998,7 @@ ExecParallelReInitializeDSM(PlanState *planstate, case T_HashState: case T_SortState: case T_IncrementalSortState: + case T_ResultCacheState: /* these nodes have DSM state, but no reinitialization is required */ break; @@ -1058,6 +1068,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, case T_AggState: ExecAggRetrieveInstrumentation((AggState *) planstate); break; + case T_ResultCacheState: + ExecResultCacheRetrieveInstrumentation((ResultCacheState *) planstate); + break; default: break; } @@ -1350,6 +1363,11 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt) /* even when not parallel-aware, for EXPLAIN ANALYZE */ ExecAggInitializeWorker((AggState *) planstate, pwcxt); break; + case T_ResultCacheState: + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecResultCacheInitializeWorker((ResultCacheState *) planstate, + pwcxt); + break; default: break; } diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 5662e7d742..7f76394851 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -102,6 +102,7 @@ #include "executor/nodeProjectSet.h" #include "executor/nodeRecursiveunion.h" #include "executor/nodeResult.h" +#include "executor/nodeResultCache.h" #include "executor/nodeSamplescan.h" #include "executor/nodeSeqscan.h" #include "executor/nodeSetOp.h" @@ -309,6 +310,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_ResultCache: + result = (PlanState *) ExecInitResultCache((ResultCache *) node, + estate, eflags); + break; + case T_Sort: result = (PlanState *) ExecInitSort((Sort *) node, estate, eflags); @@ -695,6 +701,10 @@ ExecEndNode(PlanState *node) ExecEndMaterial((MaterialState *) node); break; + case T_ResultCacheState: + ExecEndResultCache((ResultCacheState *) node); + break; + case T_SortState: ExecEndSort((SortState *) node); break; diff --git a/src/backend/executor/nodeResultCache.c b/src/backend/executor/nodeResultCache.c new file mode 100644 index 0000000000..396d2aee18 --- /dev/null +++ b/src/backend/executor/nodeResultCache.c @@ -0,0 +1,1060 @@ +/*------------------------------------------------------------------------- + * + * nodeResultCache.c + * Routines to handle caching of results from parameterized nodes + * + * Portions Copyright (c) 2020, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/executor/nodeResultCache.c + * + * ResultCache nodes are intended to sit above a parameterized node in the + * plan tree in order to cache results from them. The intention here is that + * a repeat scan with a parameter which has already been seen by the node can + * fetch tuples from the cache rather than having to re-scan the outer node + * all over again. The query planner may choose to make use of one of these + * when it thinks rescans for previously seen values are likely enough to + * warrant adding the additional node. + * + * The method of cache we use is a hash table. When the cache fills, we never + * spill tuples to disk, instead, we choose to evict the least recently used + * cache entry from the cache. We remember the least recently used entry by + * always pushing new entries and entries we look for onto the tail of a + * doubly linked list. This means that older items always bubble to the top + * of this LRU list. + * + * Sometimes our callers won't run their scans to completion. For example a + * semi-join only needs to run until it finds a matching tuple, and once it + * does, the join operator skips to the next outer tuple and does not execute + * the inner side again on that scan. Because of this, we must keep track of + * when a cache entry is complete, and by default, we know it is when we run + * out of tuples to read during the scan. However, there are cases where we + * can mark the cache entry as complete without exhausting the scan of all + * tuples. One case is unique joins, where the join operator knows that there + * will only be at most one match for any given outer tuple. In order to + * support such cases we allow the "singlerow" option to be set for the cache. + * This option marks the cache entry as complete after we read the first tuple + * from the subnode. + *------------------------------------------------------------------------- + */ + /* + * INTERFACE ROUTINES + * ExecResultCache - materialize the result of a subplan + * ExecInitResultCache - initialize node and subnodes + * ExecEndResultCache - shutdown node and subnodes + * ExecReScanResultCache - rescan the result cache + */ + +#include "postgres.h" + +#include "access/parallel.h" +#include "common/hashfn.h" +#include "executor/executor.h" +#include "executor/nodeResultCache.h" +#include "lib/ilist.h" +#include "miscadmin.h" +#include "utils/lsyscache.h" + +/* + * States of the ExecResultCache state machine + */ +#define RC_CACHE_LOOKUP 1 /* Attempt to perform a cache lookup */ +#define RC_CACHE_FETCH_NEXT_TUPLE 2 /* Get another tuple from the cache */ +#define RC_FILLING_CACHE 3 /* Read outer node to fill cache */ +#define RC_CACHE_BYPASS_MODE 4 /* Bypass mode. Just read from our + * subplan without caching anything */ +#define RC_END_OF_SCAN 5 /* Ready for rescan */ + + +/* Helper macros for memory accounting */ +#define EMPTY_ENTRY_MEMORY_BYTES(e) (sizeof(ResultCacheEntry) + \ + sizeof(ResultCacheKey) + \ + (e)->key->params->t_len); +#define CACHE_TUPLE_BYTES(t) (sizeof(ResultCacheTuple) + \ + (t)->mintuple->t_len) + + /* + * ResultCacheTuple + * Stores an individually cached tuple + */ +typedef struct ResultCacheTuple +{ + MinimalTuple mintuple; /* Cached tuple */ + struct ResultCacheTuple *next; /* The next tuple with the same parameter + * values or NULL if it's the last one */ +} ResultCacheTuple; + +/* + * ResultCacheKey + * The hash table key for cached entries plus the LRU list link + */ +typedef struct ResultCacheKey +{ + MinimalTuple params; + dlist_node lru_node; /* Pointer to next/prev key in LRU list */ +} ResultCacheKey; + +/* + * ResultCacheEntry + * The data struct that the cache hash table stores + */ +typedef struct ResultCacheEntry +{ + ResultCacheKey *key; /* Hash key for hash table lookups */ + ResultCacheTuple *tuplehead; /* Pointer to the first tuple or NULL if + * no tuples are cached for this entry */ + uint32 status; /* Hash status */ + uint32 hash; /* Hash value (cached) */ + uint64 entry_mem; /* Bytes of memory used by this entry */ + bool complete; /* Did we read the outer plan to completion? */ +} ResultCacheEntry; + + +#define SH_PREFIX resultcache +#define SH_ELEMENT_TYPE ResultCacheEntry +#define SH_KEY_TYPE ResultCacheKey * +#define SH_SCOPE static +#define SH_DECLARE +#include "lib/simplehash.h" + +static uint32 ResultCacheHash_hash(struct resultcache_hash *tb, const ResultCacheKey *key); +static int ResultCacheHash_equal(struct resultcache_hash *tb, + const ResultCacheKey *params1, + const ResultCacheKey *params2); + +#define SH_PREFIX resultcache +#define SH_ELEMENT_TYPE ResultCacheEntry +#define SH_KEY_TYPE ResultCacheKey * +#define SH_KEY key +#define SH_HASH_KEY(tb, key) ResultCacheHash_hash(tb, key) +#define SH_EQUAL(tb, a, b) ResultCacheHash_equal(tb, a, b) == 0 +#define SH_SCOPE static +#define SH_STORE_HASH +#define SH_GET_HASH(tb, a) a->hash +#define SH_DEFINE +#include "lib/simplehash.h" + +/* + * ResultCacheHash_hash + * Hash function for simplehash hashtable. 'key' is unused here as we + * require that all table lookups first populate the ResultCacheState's + * probeslot with the key values to be looked up. + */ +static uint32 +ResultCacheHash_hash(struct resultcache_hash *tb, const ResultCacheKey *key) +{ + ResultCacheState *rcstate = (ResultCacheState *) tb->private_data; + TupleTableSlot *pslot = rcstate->probeslot; + uint32 hashkey = 0; + int numkeys = rcstate->nkeys; + FmgrInfo *hashfunctions = rcstate->hashfunctions; + Oid *collations = rcstate->collations; + + for (int i = 0; i < numkeys; i++) + { + /* rotate hashkey left 1 bit at each step */ + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + + if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */ + { + uint32 hkey; + + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], + collations[i], pslot->tts_values[i])); + hashkey ^= hkey; + } + } + + return murmurhash32(hashkey); +} + +/* + * ResultCacheHash_equal + * Equality function for confirming hash value matches during a hash + * table lookup. 'key2' is never used, instead the ResultCacheState's + * probeslot is always populated with details of what's being looked up. + */ +static int +ResultCacheHash_equal(struct resultcache_hash *tb, const ResultCacheKey *key1, + const ResultCacheKey *key2) +{ + ResultCacheState *rcstate = (ResultCacheState *) tb->private_data; + ExprContext *econtext = rcstate->ss.ps.ps_ExprContext; + TupleTableSlot *tslot = rcstate->tableslot; + TupleTableSlot *pslot = rcstate->probeslot; + + /* probeslot should have already been prepared by prepare_probe_slot() */ + + ExecStoreMinimalTuple(key1->params, tslot, false); + + econtext->ecxt_innertuple = tslot; + econtext->ecxt_outertuple = pslot; + return !ExecQualAndReset(rcstate->cache_eq_expr, econtext); +} + +/* + * Initialize the hash table to empty. + */ +static void +build_hash_table(ResultCacheState *rcstate) +{ + /* XXX should the planner decide on the bucket count? */ + rcstate->hashtable = resultcache_create(rcstate->tableContext, 1024, + rcstate); +} + +/* + * prepare_probe_slot + * Populate rcstate's probeslot with the values from the tuple stored + * in 'key'. If 'key' is NULL, then perform the population by evalulating + * rcstate's param_exprs. + */ +static inline void +prepare_probe_slot(ResultCacheState *rcstate, ResultCacheKey *key) +{ + TupleTableSlot *pslot = rcstate->probeslot; + TupleTableSlot *tslot = rcstate->tableslot; + int numKeys = rcstate->nkeys; + + ExecClearTuple(pslot); + + if (key == NULL) + { + /* Set the probeslot's values based on the current parameter values */ + for (int i = 0; i < numKeys; i++) + pslot->tts_values[i] = ExecEvalExpr(rcstate->param_exprs[i], + rcstate->ss.ps.ps_ExprContext, + &pslot->tts_isnull[i]); + } + else + { + /* Process the key's MinimalTuple and store the values in probeslot */ + ExecStoreMinimalTuple(key->params, tslot, false); + slot_getallattrs(tslot); + memcpy(pslot->tts_values, tslot->tts_values, sizeof(Datum) * numKeys); + memcpy(pslot->tts_isnull, tslot->tts_isnull, sizeof(bool) * numKeys); + } + + ExecStoreVirtualTuple(pslot); +} + +/* + * entry_purge_tuples + * Remove all tuples from a cache entry, leaving an empty cache entry. + * Also update memory accounting to reflect the removal of the tuples. + */ +static inline void +entry_purge_tuples(ResultCacheState *rcstate, ResultCacheEntry *entry) +{ + ResultCacheTuple *tuple = entry->tuplehead; + + while (tuple != NULL) + { + ResultCacheTuple *next = tuple->next; + + pfree(tuple->mintuple); + pfree(tuple); + + tuple = next; + } + + /* Update memory accounting for this entry and the entire cache */ + rcstate->mem_used -= entry->entry_mem; + entry->entry_mem = EMPTY_ENTRY_MEMORY_BYTES(entry); + rcstate->mem_used += entry->entry_mem; + + entry->complete = false; + entry->tuplehead = NULL; +} + +/* + * remove_cache_entry + * Remove 'entry' from the cache and free memory used by it. + */ +static void +remove_cache_entry(ResultCacheState *rcstate, ResultCacheEntry *entry) +{ + ResultCacheKey *key = entry->key; + + dlist_delete(&entry->key->lru_node); + + /* Remove all of the tuples from this entry */ + entry_purge_tuples(rcstate, entry); + + rcstate->mem_used -= entry->entry_mem; + + /* Ensure we didn't mess up the tracking somehow */ + Assert(rcstate->mem_used >= 0); + + /* Remove the entry from the cache */ + resultcache_delete_item(rcstate->hashtable, entry); + + pfree(key->params); + pfree(key); +} + +/* + * cache_reduce_memory + * Evict older and less recently used items from the cache in order to + * reduce the memory consumption back to something below the + * ResultCacheState's mem_lowerlimit. + * + * 'specialkey', if not NULL, causes the function to return false if the entry + * entry which the key belongs to is removed from the cache. + */ +static bool +cache_reduce_memory(ResultCacheState *rcstate, ResultCacheKey *specialkey) +{ + bool specialkey_intact = true; /* for now */ + dlist_mutable_iter iter; + + /* We expect only to be called when we've gone over budget on memory */ + Assert(rcstate->mem_used > rcstate->mem_upperlimit); + + /* Start the eviction process starting at the head of the LRU list. */ + dlist_foreach_modify(iter, &rcstate->lru_list) + { + ResultCacheKey *key = dlist_container(ResultCacheKey, lru_node, + iter.cur); + ResultCacheEntry *entry; + + /* + * Populate the hash probe slot in preparation for looking up this + * LRU entry. + */ + prepare_probe_slot(rcstate, key); + + /* + * Ideally the LRU list pointers would be stored in the entry itself + * rather than in the key. Unfortunately, we can't do that as the + * simplehash.h code may resize the table and allocate new memory for + * entries which would result in those pointers pointing to the old + * buckets. However, it's fine to use the key to store this as that's + * only referenced by a pointer in the entry, which of course follows + * the entry whenever the hash table is resized. Since we only have a + * pointer to the key here, we must perform a hash table lookup to + * find the entry that the key belongs to. + */ + entry = resultcache_lookup(rcstate->hashtable, NULL); + + /* A good spot to check for corruption of the table and LRU list. */ + Assert(entry != NULL); + Assert(entry->key == key); + + /* + * If we're being called to free memory while the cache is being + * populated with new tuples, then we'd better take some care as we + * could end up freeing the entry which 'specialkey' belongs to. + * Generally callers will pass 'specialkeys' as the keys for the cache + * entry which is currently being populated, so we must set spaceOK to + * false to inform the caller the specialkey entry has been removed. + */ + if (key == specialkey) + specialkey_intact = false; + + /* + * Finally remove the entry. This will remove from the LRU list too. + */ + remove_cache_entry(rcstate, entry); + + rcstate->stats.cache_evictions += 1; /* Update Stats */ + + /* Exit if we've freed enough memory */ + if (rcstate->mem_used <= rcstate->mem_lowerlimit) + break; + } + + return specialkey_intact; +} + +/* + * cache_lookup + * Perform a lookup to see if we've already cached results based on the + * scan's current parameters. If we find an existing entry we move it to + * the end of the LRU list, set *found to true then return it. If we + * don't find an entry then we create a new one and add it to the end of + * the LRU list. We also update cache memory accounting and remove older + * entries if we go over the memory budget. If we managed to free enough + * memory we return the new entry, else we return NULL. + * + * Callers can assume we'll never return NULL when *found is true. + */ +static ResultCacheEntry * +cache_lookup(ResultCacheState *rcstate, bool *found) +{ + ResultCacheKey *key; + ResultCacheEntry *entry; + MemoryContext oldcontext; + + /* prepare the probe slot with the current scan parameters */ + prepare_probe_slot(rcstate, NULL); + + /* + * Add the new entry to the cache. No need to pass a valid key since the + * hash function uses rcstate's probeslot, which we populated above. + */ + entry = resultcache_insert(rcstate->hashtable, NULL, found); + + if (*found) + { + /* Move existing entry to the tail of the LRU list */ + dlist_move_tail(&rcstate->lru_list, &entry->key->lru_node); + + return entry; + } + + oldcontext = MemoryContextSwitchTo(rcstate->tableContext); + + /* Allocate a new key */ + entry->key = key = palloc(sizeof(ResultCacheKey)); + key->params = ExecCopySlotMinimalTuple(rcstate->probeslot); + + /* Mark the number of bytes which are used by this entry */ + entry->entry_mem = EMPTY_ENTRY_MEMORY_BYTES(entry); + entry->complete = false; + entry->tuplehead = NULL; + + /* Update the total cache memory utilization */ + rcstate->mem_used += entry->entry_mem; + + /* + * Since this is the most recently used entry, push this entry onto the + * end of the LRU list. + */ + dlist_push_tail(&rcstate->lru_list, &entry->key->lru_node); + + rcstate->last_tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + /* + * If we've gone over our memory budget, then we'll free up some space in + * the cache. + */ + if (rcstate->mem_used > rcstate->mem_upperlimit) + { + /* + * Try to free up some memory. It's highly unlikely that we'll fail + * to do so here since the entry we've just added is yet to contain + * any tuples and we're able to remove any other entry to reduce the + * memory consumption. + */ + if (unlikely(!cache_reduce_memory(rcstate, key))) + return NULL; + + /* + * The process of removing entries from the cache may have caused + * the code in simplehash.h to shuffle elements to earlier buckets in + * the hash table. If it has, we'll need to find the entry again by + * performing a lookup. Fortunately, we can detect if this has + * happened by seeing if the entry is still in use and that the key + * pointer matches our expected key. + */ + if (entry->status != resultcache_SH_IN_USE || entry->key != key) + { + /* + * We need to repopulate the probeslot as lookups performed during + * the cache evictions above will have stored some other key. + */ + prepare_probe_slot(rcstate, key); + + /* Re-find the newly added entry */ + entry = resultcache_lookup(rcstate->hashtable, NULL); + Assert(entry != NULL); + } + } + + return entry; +} + +/* + * cache_store_tuple + * Add the tuple stored in 'slot' to the rcstate's current cache entry. + * The cache entry must have already been made with cache_lookup(). + * rcstate's last_tuple field must point to the tail of rcstate->entry's + * list of tuples. + */ +static bool +cache_store_tuple(ResultCacheState *rcstate, TupleTableSlot *slot) +{ + ResultCacheTuple *tuple; + ResultCacheEntry *entry = rcstate->entry; + MemoryContext oldcontext; + + Assert(slot != NULL); + Assert(entry != NULL); + + oldcontext = MemoryContextSwitchTo(rcstate->tableContext); + + tuple = (ResultCacheTuple *) palloc(sizeof(ResultCacheTuple)); + tuple->mintuple = ExecCopySlotMinimalTuple(slot); + tuple->next = NULL; + + /* Account for the memory we just consumed */ + entry->entry_mem += CACHE_TUPLE_BYTES(tuple); + rcstate->mem_used += CACHE_TUPLE_BYTES(tuple); + + if (entry->tuplehead == NULL) + { + /* + * This is the first tuple for this entry, so just point the list head + * to it. + */ + entry->tuplehead = tuple; + rcstate->last_tuple = tuple; + } + else + { + /* push this tuple onto the tail of the list */ + /* XXX use slist? */ + rcstate->last_tuple->next = tuple; + rcstate->last_tuple = tuple; + } + + MemoryContextSwitchTo(oldcontext); + + /* + * If we've gone over our memory budget then free up some space in the + * cache. + */ + if (rcstate->mem_used > rcstate->mem_upperlimit) + { + ResultCacheKey *key = entry->key; + + if (!cache_reduce_memory(rcstate, key)) + return false; + + /* + * The process of removing entries from the cache may have caused + * the code in simplehash.h to shuffle elements to earlier buckets in + * the hash table. If it has, we'll need to find the entry again by + * performing a lookup. Fortunately, we can detect if this has + * happened by seeing if the entry is still in use and that the key + * pointer matches our expected key. + */ + if (entry->status != resultcache_SH_IN_USE || entry->key != key) + { + /* + * We need to repopulate the probeslot as lookups performed during + * the cache evictions above will have stored some other key. + */ + prepare_probe_slot(rcstate, key); + + /* Re-find the entry */ + rcstate->entry = entry = resultcache_lookup(rcstate->hashtable, + NULL); + Assert(entry != NULL); + } + } + + return true; +} + +static TupleTableSlot * +ExecResultCache(PlanState *pstate) +{ + ResultCacheState *node = castNode(ResultCacheState, pstate); + PlanState *outerNode; + TupleTableSlot *slot; + + switch (node->rc_status) + { + case RC_CACHE_LOOKUP: + { + ResultCacheEntry *entry; + bool found; + + Assert(node->entry == NULL); + + /* + * We're only ever in this state for the first call of the + * scan. Here we have a look to see if we've already seen the + * current parameters before and if we have already cached a + * complete set of records that the outer plan will return for + * these parameters. + * + * When we find a valid cache entry, we'll return the first + * tuple from it. If not found, we'll create a cache entry and + * then try to fetch a tuple from the outer scan. If we find + * one there, we'll try to cache it. + */ + + /* see if we've got anything cached for the current parameters */ + entry = cache_lookup(node, &found); + + if (found && entry->complete) + { + node->stats.cache_hits += 1; /* stats update */ + + /* Fetch the first cached tuple, if there is one */ + node->last_tuple = entry->tuplehead; + node->entry = entry; + + if (entry->tuplehead) + { + node->rc_status = RC_CACHE_FETCH_NEXT_TUPLE; + + slot = node->ss.ps.ps_ResultTupleSlot; + ExecStoreMinimalTuple(entry->tuplehead->mintuple, + slot, false); + + return slot; + } + else + { + /* No tuples in this cache entry. */ + node->rc_status = RC_END_OF_SCAN; + return NULL; + } + } + else + { + TupleTableSlot *outerslot; + + node->stats.cache_misses += 1; /* stats update */ + + if (found) + { + /* + * A cache entry was found, but the scan for that + * entry did not run to completion. We'll just remove + * all tuples and start again. It might be tempting + * to continue where we left off, but there's no + * guarantee the outer node will produce the tuples in + * the same order as it did last time. + */ + entry_purge_tuples(node, entry); + } + + /* Scan the outer node for a tuple to cache */ + outerNode = outerPlanState(node); + outerslot = ExecProcNode(outerNode); + if (TupIsNull(outerslot)) + { + /* + * cache_lookup may have returned NULL due to + * failure to free enough cache space, so ensure we + * don't do anything here that assumes it worked. + * There's no need to go into bypass mode here as + * we're setting rc_status to end of scan. + */ + if (likely(entry)) + entry->complete = true; + + node->rc_status = RC_END_OF_SCAN; + return NULL; + } + + node->entry = entry; + + /* + * If we failed to create the entry or failed to store the + * tuple in the entry, then go into bypass mode. + */ + if (unlikely(entry == NULL || + !cache_store_tuple(node, outerslot))) + { + node->stats.cache_overflows += 1; /* stats update */ + + node->rc_status = RC_CACHE_BYPASS_MODE; + + /* + * No need to clear out last_tuple as we'll stay in + * bypass mode until the end of the scan. + */ + } + else + { + /* + * If we only expect a single row from this scan then + * we can mark that we're not expecting more. This + * allows cache lookups to work even when the scan has + * not been executed to completion. + */ + entry->complete = node->singlerow; + node->rc_status = RC_FILLING_CACHE; + } + + slot = node->ss.ps.ps_ResultTupleSlot; + ExecCopySlot(slot, outerslot); + return slot; + } + } + + case RC_CACHE_FETCH_NEXT_TUPLE: + { + ResultCacheEntry *entry = node->entry; + Assert(entry != NULL); + + /* Skip to the next tuple to output. */ + node->last_tuple = node->last_tuple->next; + + /* No more tuples in the cache */ + if (node->last_tuple == NULL) + { + node->rc_status = RC_END_OF_SCAN; + return NULL; + } + + slot = node->ss.ps.ps_ResultTupleSlot; + ExecStoreMinimalTuple(node->last_tuple->mintuple, slot, + false); + + return slot; + } + + case RC_FILLING_CACHE: + { + TupleTableSlot *outerslot; + ResultCacheEntry *entry = node->entry; + + /* entry should already have been set by RC_CACHE_LOOKUP */ + Assert(entry != NULL); + + /* + * When in the RC_FILLING_CACHE state, we've just had a cache + * miss and are populating the cache with the current scan + * tuples. + */ + outerNode = outerPlanState(node); + outerslot = ExecProcNode(outerNode); + if (TupIsNull(outerslot)) + { + /* No more tuples. Mark it as complete */ + entry->complete = true; + node->rc_status = RC_END_OF_SCAN; + return NULL; + } + else + { + /* + * Validate if the planner properly set the singlerow + * flag. It should only set that if each cache entry can, + * at most, return 1 row. + * XXX is this worth the check? + */ + if (unlikely(entry->complete)) + elog(ERROR, "cache entry already complete"); + + /* Record the tuple in the current cache entry */ + if (unlikely(!cache_store_tuple(node, outerslot))) + { + /* Couldn't store it? Handle overflow */ + node->stats.cache_overflows += 1; /* stats update */ + + node->rc_status = RC_CACHE_BYPASS_MODE; + + /* + * No need to clear out entry or last_tuple as we'll + * stay in bypass mode until the end of the scan. + */ + } + + slot = node->ss.ps.ps_ResultTupleSlot; + ExecCopySlot(slot, outerslot); + return slot; + } + } + + case RC_CACHE_BYPASS_MODE: + { + TupleTableSlot *outerslot; + + /* + * When in bypass mode we just continue to read tuples without + * caching. We need to wait until the next rescan before we + * can come out of this mode. + */ + outerNode = outerPlanState(node); + outerslot = ExecProcNode(outerNode); + if (TupIsNull(outerslot)) + { + node->rc_status = RC_END_OF_SCAN; + return NULL; + } + + slot = node->ss.ps.ps_ResultTupleSlot; + ExecCopySlot(slot, outerslot); + return slot; + } + + case RC_END_OF_SCAN: + /* + * We've already returned NULL for this scan, but just in case + * something call us again by mistake. + */ + return NULL; + + default: + elog(ERROR, "unrecognized resultcache state: %d", + (int) node->rc_status); + return NULL; + } /* switch */ +} + +ResultCacheState * +ExecInitResultCache(ResultCache *node, EState *estate, int eflags) +{ + ResultCacheState *rcstate = makeNode(ResultCacheState); + Plan *outerNode; + int i; + int nkeys; + Oid *eqfuncoids; + + /* check for unsupported flags */ + Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + + rcstate->ss.ps.plan = (Plan *) node; + rcstate->ss.ps.state = estate; + rcstate->ss.ps.ExecProcNode = ExecResultCache; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &rcstate->ss.ps); + + outerNode = outerPlan(node); + outerPlanState(rcstate) = ExecInitNode(outerNode, estate, eflags); + + /* + * Initialize return slot and type. No need to initialize projection info + * because this node doesn't do projections. + */ + ExecInitResultTupleSlotTL(&rcstate->ss.ps, &TTSOpsMinimalTuple); + rcstate->ss.ps.ps_ProjInfo = NULL; + + /* + * Initialize scan slot and type. + */ + ExecCreateScanSlotFromOuterPlan(estate, &rcstate->ss, &TTSOpsMinimalTuple); + + /* + * Set the state machine to lookup the cache. We won't find anything + * until we cache something, but this saves a special case to create the + * first entry. + */ + rcstate->rc_status = RC_CACHE_LOOKUP; + + rcstate->nkeys = nkeys = node->numKeys; + rcstate->hashkeydesc = ExecTypeFromExprList(node->param_exprs); + rcstate->tableslot = MakeSingleTupleTableSlot(rcstate->hashkeydesc, + &TTSOpsMinimalTuple); + rcstate->probeslot = MakeSingleTupleTableSlot(rcstate->hashkeydesc, + &TTSOpsVirtual); + + rcstate->param_exprs = (ExprState **) palloc(nkeys * sizeof(ExprState *)); + rcstate->collations = node->collations; /* Just point directly to the plan data */ + rcstate->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); + + eqfuncoids = palloc(nkeys * sizeof(Oid)); + + for (i = 0; i < nkeys; i++) + { + Oid hashop = node->hashOperators[i]; + Oid left_hashfn; + Oid right_hashfn; + Expr *param_expr = (Expr *) list_nth(node->param_exprs, i); + + if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) + elog(ERROR, "could not find hash function for hash operator %u", + hashop); + + fmgr_info(left_hashfn, &rcstate->hashfunctions[i]); + + rcstate->param_exprs[i] = ExecInitExpr(param_expr, (PlanState *) rcstate); + eqfuncoids[i] = get_opcode(hashop); + } + + rcstate->cache_eq_expr = ExecBuildParamSetEqual(rcstate->hashkeydesc, + &TTSOpsMinimalTuple, + eqfuncoids, + node->collations, + node->param_exprs, + (PlanState *) rcstate); + + pfree(eqfuncoids); + rcstate->mem_used = 0; + + /* Limit the total memory consumed by the cache to this */ + rcstate->mem_upperlimit = work_mem * 1024L; + + /* + * Set the lower limit to something a bit less than the upper limit so + * that we don't have to evict tuples every time we need to add a new one + * after the cache has filled. We don't make it too much smaller as we'd + * like to keep as much in the cache as possible. + */ + rcstate->mem_lowerlimit = rcstate->mem_upperlimit * 0.98; + + /* A memory context dedicated for the cache */ + rcstate->tableContext = AllocSetContextCreate(CurrentMemoryContext, + "ResultCacheHashTable", + ALLOCSET_DEFAULT_SIZES); + + dlist_init(&rcstate->lru_list); + rcstate->last_tuple = NULL; + rcstate->entry = NULL; + + /* + * Mark if we can assume the cache entry is completed after we get the + * first record for it. Some callers might not call us again after + * getting the first match. e.g. A join operator performing a unique join + * is able to skip to the next outer tuple after getting the first + * matching inner tuple. In this case, the cache entry is complete after + * getting the first tuple. This allows us to mark it as so. + */ + rcstate->singlerow = node->singlerow; + + /* Zero the statistics counters */ + memset(&rcstate->stats, 0, sizeof(ResultCacheInstrumentation)); + + /* Allocate and set up the actual cache */ + build_hash_table(rcstate); + + return rcstate; +} + +void +ExecEndResultCache(ResultCacheState *node) +{ + /* + * When ending a parallel worker, copy the statistics gathered by the + * worker back into shared memory so that it can be picked up by the + * main process to report in EXPLAIN ANALYZE. + */ + if (node->shared_info && IsParallelWorker()) + { + ResultCacheInstrumentation *si; + + Assert(ParallelWorkerNumber <= node->shared_info->num_workers); + si = &node->shared_info->sinstrument[ParallelWorkerNumber]; + memcpy(si, &node->stats, sizeof(ResultCacheInstrumentation)); + } + + /* Remove the cache context */ + MemoryContextDelete(node->tableContext); + + ExecClearTuple(node->ss.ss_ScanTupleSlot); + /* must drop pointer to cache result tuple */ + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + + /* + * free exprcontext + */ + ExecFreeExprContext(&node->ss.ps); + + /* + * shut down the subplan + */ + ExecEndNode(outerPlanState(node)); +} + +void +ExecReScanResultCache(ResultCacheState *node) +{ + PlanState *outerPlan = outerPlanState(node); + + /* Mark that we must lookup the cache for a new set of parameters */ + node->rc_status = RC_CACHE_LOOKUP; + + /* nullify pointers used for the last scan */ + node->entry = NULL; + node->last_tuple = NULL; + + /* + * if chgParam of subnode is not null then plan will be re-scanned by + * first ExecProcNode. + */ + if (outerPlan->chgParam == NULL) + ExecReScan(outerPlan); + +} + +/* ---------------------------------------------------------------- + * Parallel Query Support + * ---------------------------------------------------------------- + */ + + /* ---------------------------------------------------------------- + * ExecResultCacheEstimate + * + * Estimate space required to propagate result cache statistics. + * ---------------------------------------------------------------- + */ +void +ExecResultCacheEstimate(ResultCacheState *node, ParallelContext *pcxt) +{ + Size size; + + /* don't need this if not instrumenting or no workers */ + if (!node->ss.ps.instrument || pcxt->nworkers == 0) + return; + + size = mul_size(pcxt->nworkers, sizeof(ResultCacheInstrumentation)); + size = add_size(size, offsetof(SharedResultCacheInfo, sinstrument)); + shm_toc_estimate_chunk(&pcxt->estimator, size); + shm_toc_estimate_keys(&pcxt->estimator, 1); +} + +/* ---------------------------------------------------------------- + * ExecResultCacheInitializeDSM + * + * Initialize DSM space for result cache statistics. + * ---------------------------------------------------------------- + */ +void +ExecResultCacheInitializeDSM(ResultCacheState *node, ParallelContext *pcxt) +{ + Size size; + + /* don't need this if not instrumenting or no workers */ + if (!node->ss.ps.instrument || pcxt->nworkers == 0) + return; + + size = offsetof(SharedResultCacheInfo, sinstrument) + + pcxt->nworkers * sizeof(ResultCacheInstrumentation); + node->shared_info = shm_toc_allocate(pcxt->toc, size); + /* ensure any unfilled slots will contain zeroes */ + memset(node->shared_info, 0, size); + node->shared_info->num_workers = pcxt->nworkers; + shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, + node->shared_info); +} + +/* ---------------------------------------------------------------- + * ExecResultCacheInitializeWorker + * + * Attach worker to DSM space for result cache statistics. + * ---------------------------------------------------------------- + */ +void +ExecResultCacheInitializeWorker(ResultCacheState *node, ParallelWorkerContext *pwcxt) +{ + node->shared_info = + shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true); +} + +/* ---------------------------------------------------------------- + * ExecResultCacheRetrieveInstrumentation + * + * Transfer result cache statistics from DSM to private memory. + * ---------------------------------------------------------------- + */ +void +ExecResultCacheRetrieveInstrumentation(ResultCacheState *node) +{ + Size size; + SharedResultCacheInfo *si; + + if (node->shared_info == NULL) + return; + + size = offsetof(SharedResultCacheInfo, sinstrument) + + node->shared_info->num_workers * sizeof(ResultCacheInstrumentation); + si = palloc(size); + memcpy(si, node->shared_info, size); + node->shared_info = si; +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index d8cf87e6d0..449fd93542 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -927,6 +927,32 @@ _copyMaterial(const Material *from) } +/* + * _copyResultCache + */ +static ResultCache * +_copyResultCache(const ResultCache *from) +{ + ResultCache *newnode = makeNode(ResultCache); + + /* + * copy node superclass fields + */ + CopyPlanFields((const Plan *) from, (Plan *) newnode); + + /* + * copy remainder of node + */ + COPY_SCALAR_FIELD(numKeys); + COPY_POINTER_FIELD(hashOperators, sizeof(Oid) * from->numKeys); + COPY_POINTER_FIELD(collations, sizeof(Oid) * from->numKeys); + COPY_NODE_FIELD(param_exprs); + COPY_SCALAR_FIELD(singlerow); + + return newnode; +} + + /* * CopySortFields * @@ -4937,6 +4963,9 @@ copyObjectImpl(const void *from) case T_Material: retval = _copyMaterial(from); break; + case T_ResultCache: + retval = _copyResultCache(from); + break; case T_Sort: retval = _copySort(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e2f177515d..ab433854bf 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -836,6 +836,20 @@ _outMaterial(StringInfo str, const Material *node) _outPlanInfo(str, (const Plan *) node); } +static void +_outResultCache(StringInfo str, const ResultCache *node) +{ + WRITE_NODE_TYPE("RESULTCACHE"); + + _outPlanInfo(str, (const Plan *) node); + + WRITE_INT_FIELD(numKeys); + WRITE_OID_ARRAY(hashOperators, node->numKeys); + WRITE_OID_ARRAY(collations, node->numKeys); + WRITE_NODE_FIELD(param_exprs); + WRITE_BOOL_FIELD(singlerow); +} + static void _outSortInfo(StringInfo str, const Sort *node) { @@ -1908,6 +1922,20 @@ _outMaterialPath(StringInfo str, const MaterialPath *node) WRITE_NODE_FIELD(subpath); } +static void +_outResultCachePath(StringInfo str, const ResultCachePath *node) +{ + WRITE_NODE_TYPE("RESULTCACHEPATH"); + + _outPathInfo(str, (const Path *) node); + + WRITE_NODE_FIELD(subpath); + WRITE_NODE_FIELD(hash_operators); + WRITE_NODE_FIELD(param_exprs); + WRITE_BOOL_FIELD(singlerow); + WRITE_FLOAT_FIELD(calls, "%.0f"); +} + static void _outUniquePath(StringInfo str, const UniquePath *node) { @@ -3809,6 +3837,9 @@ outNode(StringInfo str, const void *obj) case T_Material: _outMaterial(str, obj); break; + case T_ResultCache: + _outResultCache(str, obj); + break; case T_Sort: _outSort(str, obj); break; @@ -4043,6 +4074,9 @@ outNode(StringInfo str, const void *obj) case T_MaterialPath: _outMaterialPath(str, obj); break; + case T_ResultCachePath: + _outResultCachePath(str, obj); + break; case T_UniquePath: _outUniquePath(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 42050ab719..49ab438dbc 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2150,6 +2150,25 @@ _readMaterial(void) READ_DONE(); } +/* + * _readResultCache + */ +static ResultCache * +_readResultCache(void) +{ + READ_LOCALS(ResultCache); + + ReadCommonPlan(&local_node->plan); + + READ_INT_FIELD(numKeys); + READ_OID_ARRAY(hashOperators, local_node->numKeys); + READ_OID_ARRAY(collations, local_node->numKeys); + READ_NODE_FIELD(param_exprs); + READ_BOOL_FIELD(singlerow); + + READ_DONE(); +} + /* * ReadCommonSort * Assign the basic stuff of all nodes that inherit from Sort @@ -2832,6 +2851,8 @@ parseNodeString(void) return_value = _readHashJoin(); else if (MATCH("MATERIAL", 8)) return_value = _readMaterial(); + else if (MATCH("RESULTCACHE", 11)) + return_value = _readResultCache(); else if (MATCH("SORT", 4)) return_value = _readSort(); else if (MATCH("INCREMENTALSORT", 15)) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index d984da25d7..72b0aa6b2e 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -4073,6 +4073,10 @@ print_path(PlannerInfo *root, Path *path, int indent) ptype = "Material"; subpath = ((MaterialPath *) path)->subpath; break; + case T_ResultCache: + ptype = "ResultCache"; + subpath = ((ResultCachePath *) path)->subpath; + break; case T_UniquePath: ptype = "Unique"; subpath = ((UniquePath *) path)->subpath; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 97758dc41c..40b9d1b576 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -133,6 +133,7 @@ bool enable_hashagg = true; bool hashagg_avoid_disk_plan = true; bool enable_nestloop = true; bool enable_material = true; +bool enable_resultcache = true; bool enable_mergejoin = true; bool enable_hashjoin = true; bool enable_gathermerge = true; @@ -2297,6 +2298,127 @@ cost_material(Path *path, path->total_cost = startup_cost + run_cost; } +/* + * cost_resultcache_rescan + * Determines and returns the estimated cost of using a ResultCache node. + * + * In order to estimate this, we must gain knowledge of how often we expect to + * be called and how many distinct sets of parameters we are likely to be + * called with. If we expect a good cache hit ratio, then we can set our + * costs to account for that hit ratio, plus a little bit of cost for the + * caching itself. Caching will not work out well if we expect to be called + * with too many distinct parameter values. The worst-case here is that we + * never see the same parameter values twice, in which case we'd never get a + * cache hit and caching would be a complete waste of effort. + */ +static void +cost_resultcache_rescan(PlannerInfo *root, ResultCachePath *rcpath, + Cost *rescan_startup_cost, Cost *rescan_total_cost) +{ + Cost input_startup_cost = rcpath->subpath->startup_cost; + Cost input_total_cost = rcpath->subpath->total_cost; + double tuples = rcpath->subpath->rows; + double calls = rcpath->calls; + int width = rcpath->subpath->pathtarget->width; + int flags; + + double work_mem_bytes; + double scan_bytes; + double est_cache_entries; + double ndistinct; + double evict_ratio; + double hit_ratio; + Cost startup_cost; + Cost total_cost; + + /* available cache space */ + work_mem_bytes = work_mem * 1024L; + + /* set the number of bytes each cache entry should consume in the cache */ + scan_bytes = relation_byte_size(tuples, width); + + /* estimate on the upper limit of cache entries we can hold at once */ + est_cache_entries = work_mem_bytes / scan_bytes; + + /* estimate on the distinct number of parameter values */ + ndistinct = estimate_num_groups(root, rcpath->param_exprs, calls, NULL, + &flags); + + /* + * When the estimation fell back on using a default value, it's a bit too + * risky to assume that it's ok to use a Result Cache. The use of a + * default could cause us to use a Result Cache when it's really + * inappropriate to do so. If we see that this has been done then we'll + * assume that every call will have unique parameters, which will almost + * certainly mean a ResultCachePath will never survive add_path(). + */ + if ((flags & SELFLAG_USED_DEFAULT) != 0) + ndistinct = calls; + + /* + * When the number of distinct parameter values is above the amount we can + * store in the cache, then we'll have to evict some entries from the + * cache. This is not free, so here we estimate how often we'll incur the + * cost of that eviction. + */ + evict_ratio = 1.0 - Min(est_cache_entries, ndistinct) / ndistinct; + + /* + * In order to estimate how costly a single scan will be, we need to + * attempt to estimate what the cache hit ratio will be. To do that we + * must look at how many scans are estimated in total of this node and how + * many of those scans we expect to get a cache hit. + */ + hit_ratio = 1.0 / ndistinct * Min(est_cache_entries, ndistinct) - + (ndistinct / calls); + + /* Ensure we don't go negative */ + hit_ratio = Max(hit_ratio, 0); + + /* + * Set the total_cost accounting for the expected cache hit ratio. We + * also add on a cpu_operator_cost to account for a cache lookup. This + * will happen regardless of if it's a cache hit or not. + */ + total_cost = input_total_cost * (1.0 - hit_ratio) + cpu_operator_cost; + + /* Now adjust the total cost to account for cache evictions */ + + /* Charge a cpu_tuple_cost for evicting the actual cache entry */ + total_cost += cpu_tuple_cost * evict_ratio; + + /* + * Charge a 10th of cpu_operator_cost to evict every tuple in that entry. + * The per-tuple eviction is really just a pfree, so charging a whole + * cpu_operator_cost seems a little excessive. + */ + total_cost += cpu_operator_cost / 10.0 * evict_ratio * tuples; + + /* + * Now adjust for storing things in the cache, since that's not free + * either. Everything must go in the cache, so we don't proportion this + * over any ratio, just apply it once for the scan. We charge a + * cpu_tuple_cost for the creation of the cache entry and also a + * cpu_operator_cost for each tuple we expect to cache. + */ + total_cost += cpu_tuple_cost + cpu_operator_cost * tuples; + + /* + * Getting the first row must be also be proportioned according to the + * expected cache hit ratio. + */ + startup_cost = input_startup_cost * (1.0 - hit_ratio); + + /* + * Additionally we charge a cpu_tuple_cost to account for cache lookups, + * which we'll do regardless of if it was a cache hit or not. + */ + startup_cost += cpu_tuple_cost; + + *rescan_startup_cost = startup_cost; + *rescan_total_cost = total_cost; +} + /* * cost_agg * Determines and returns the cost of performing an Agg plan node, @@ -4022,6 +4144,11 @@ cost_rescan(PlannerInfo *root, Path *path, *rescan_total_cost = run_cost; } break; + case T_ResultCache: + /* All the hard work is done by cost_resultcache_rescan */ + cost_resultcache_rescan(root, (ResultCachePath *) path, + rescan_startup_cost, rescan_total_cost); + break; default: *rescan_startup_cost = path->startup_cost; *rescan_total_cost = path->total_cost; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index db54a6ba2e..53f259fa55 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -18,10 +18,13 @@ #include "executor/executor.h" #include "foreign/fdwapi.h" +#include "nodes/nodeFuncs.h" #include "optimizer/cost.h" +#include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" +#include "utils/typcache.h" /* Hook for plugins to get control in add_paths_to_joinrel() */ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; @@ -52,6 +55,9 @@ static void try_partial_mergejoin_path(PlannerInfo *root, static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); +static inline bool clause_sides_match_join(RestrictInfo *rinfo, + RelOptInfo *outerrel, + RelOptInfo *innerrel); static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); @@ -163,6 +169,11 @@ add_paths_to_joinrel(PlannerInfo *root, { case JOIN_SEMI: case JOIN_ANTI: + + /* + * XXX it may be worth proving this to allow a ResultCache to be + * considered for Nested Loop Semi/Anti Joins. + */ extra.inner_unique = false; /* well, unproven */ break; case JOIN_UNIQUE_INNER: @@ -354,6 +365,162 @@ allow_star_schema_join(PlannerInfo *root, bms_nonempty_difference(inner_paramrels, outerrelids)); } +/* + * paraminfo_get_equal_hashops + * Determine if it's valid to use a ResultCache node to cache inner rows, + * including looking for volatile functions in the inner side of the + * join. Also, fetch outer side exprs and check for valid hashable + * equality operator for each outer expr. Returns true and sets the + * 'param_exprs' and 'operators' output parameters if the caching is + * possible. + */ +static bool +paraminfo_get_equal_hashops(ParamPathInfo *param_info, List **param_exprs, + List **operators, RelOptInfo *outerrel, + RelOptInfo *innerrel) +{ + List *clauses = param_info->ppi_clauses; + ListCell *lc; + + /* + * We can't use a result cache if there are volatile functions in the + * inner rel's target list or restrict list. A cache hit could reduce the + * number of calls to these functions. + * + * XXX Think about this harder. Any other restrictions to add here? + */ + if (contain_volatile_functions((Node *) innerrel->reltarget->exprs)) + return false; + + foreach(lc, innerrel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (contain_volatile_functions((Node *) rinfo->clause)) + return false; + } + + *param_exprs = NIL; + *operators = NIL; + + Assert(list_length(clauses) > 0); + + foreach(lc, clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + OpExpr *opexpr; + TypeCacheEntry *typentry; + Node *expr; + + opexpr = (OpExpr *) rinfo->clause; + + /* ppi_clauses should always meet this requirement */ + if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 || + !clause_sides_match_join(rinfo, outerrel, innerrel)) + { + list_free(*operators); + list_free(*param_exprs); + return false; + } + + if (rinfo->outer_is_left) + expr = (Node *) list_nth(opexpr->args, 0); + else + expr = (Node *) list_nth(opexpr->args, 1); + + typentry = lookup_type_cache(exprType(expr), + TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR); + + /* XXX will eq_opr ever be invalid if hash_proc isn't? */ + if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr)) + { + list_free(*operators); + list_free(*param_exprs); + return false; + } + + *operators = lappend_oid(*operators, typentry->eq_opr); + *param_exprs = lappend(*param_exprs, expr); + } + + return true; +} + +/* + * get_resultcache_path + * If possible,.make and return a Result Cache path atop of 'inner_path'. + * Otherwise return NULL. + */ +static Path * +get_resultcache_path(PlannerInfo *root, RelOptInfo *innerrel, + RelOptInfo *outerrel, Path *inner_path, + Path *outer_path, JoinType jointype, + JoinPathExtraData *extra) +{ + List *param_exprs; + List *hash_operators; + + /* Obviously not if it's disabled */ + if (!enable_resultcache) + return NULL; + + /* + * We can safely not bother with all this unless we expect to perform more + * than one inner scan. The first scan is always going to be a cache + * miss. This would likely fail later anyway based on costs, so this is + * really just to save some wasted effort. + */ + if (outer_path->parent->rows < 2) + return NULL; + + /* We can only have a result cache when there's some kind of cache key */ + if (inner_path->param_info == NULL || + inner_path->param_info->ppi_clauses == NIL) + return NULL; + + /* + * We can't use a result cache when a lateral join var is required from + * somewhere else other than the inner side of the join. + * + * XXX maybe we can just include lateral_vars from above this in the + * result cache's keys? Not today though. It seems likely to reduce cache + * hits which may make it very seldom worthwhile. + */ + if (!bms_is_subset(innerrel->lateral_relids, innerrel->relids)) + return NULL; + + /* + * Currently we don't do this for SEMI and ANTI joins unless they're + * marked as inner_unique. This is because nested loop SEMI/ANTI joins + * don't scan the inner node to completion, which will mean resultcache + * cannot mark the cache entry as complete. + * + * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique + * = true. Should we? See add_paths_to_joinrel() + */ + if (!extra->inner_unique && (jointype == JOIN_SEMI || + jointype == JOIN_ANTI)) + return NULL; + + /* Check if we have hash ops for each parameter to the path */ + if (paraminfo_get_equal_hashops(inner_path->param_info, + ¶m_exprs, + &hash_operators, + outerrel, + innerrel)) + { + return (Path *) create_resultcache_path(root, + innerrel, + inner_path, + param_exprs, + hash_operators, + extra->inner_unique, + outer_path->parent->rows); + } + + return NULL; +} + /* * try_nestloop_path * Consider a nestloop join path; if it appears useful, push it into @@ -376,6 +543,8 @@ try_nestloop_path(PlannerInfo *root, Relids outerrelids; Relids inner_paramrels = PATH_REQ_OUTER(inner_path); Relids outer_paramrels = PATH_REQ_OUTER(outer_path); + Path *inner_cache_path; + bool added_path = false; /* * Paths are parameterized by top-level parents, so run parameterization @@ -458,12 +627,92 @@ try_nestloop_path(PlannerInfo *root, extra->restrictlist, pathkeys, required_outer)); + added_path = true; + } + + /* + * See if we can build a result cache path for this inner_path. That might + * make the nested loop cheaper. + */ + inner_cache_path = get_resultcache_path(root, innerrel, outerrel, + inner_path, outer_path, jointype, + extra); + + if (inner_cache_path == NULL) + { + if (!added_path) + bms_free(required_outer); + return; + } + + initial_cost_nestloop(root, &workspace, jointype, + outer_path, inner_cache_path, extra); + + if (add_path_precheck(joinrel, + workspace.startup_cost, workspace.total_cost, + pathkeys, required_outer)) + { + /* + * If the inner path is parameterized, it is parameterized by the + * topmost parent of the outer rel, not the outer rel itself. Fix + * that. + */ + if (PATH_PARAM_BY_PARENT(inner_cache_path, outer_path->parent)) + { + Path *reparameterize_path; + + reparameterize_path = reparameterize_path_by_child(root, + inner_cache_path, + outer_path->parent); + + /* + * If we could not translate the path, we can't create nest loop + * path. + */ + if (!reparameterize_path) + { + ResultCachePath *rcpath = (ResultCachePath *) inner_cache_path; + + /* Waste no memory when we reject a path here */ + list_free(rcpath->hash_operators); + list_free(rcpath->param_exprs); + pfree(rcpath); + + if (!added_path) + bms_free(required_outer); + return; + } + } + + add_path(joinrel, (Path *) + create_nestloop_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_cache_path, + extra->restrictlist, + pathkeys, + required_outer)); + added_path = true; } else + { + ResultCachePath *rcpath = (ResultCachePath *) inner_cache_path; + + /* Waste no memory when we reject a path here */ + list_free(rcpath->hash_operators); + list_free(rcpath->param_exprs); + pfree(rcpath); + } + + if (!added_path) { /* Waste no memory when we reject a path here */ bms_free(required_outer); } + } /* @@ -481,6 +730,9 @@ try_partial_nestloop_path(PlannerInfo *root, JoinPathExtraData *extra) { JoinCostWorkspace workspace; + RelOptInfo *innerrel = inner_path->parent; + RelOptInfo *outerrel = outer_path->parent; + Path *inner_cache_path; /* * If the inner path is parameterized, the parameterization must be fully @@ -492,7 +744,6 @@ try_partial_nestloop_path(PlannerInfo *root, if (inner_path->param_info != NULL) { Relids inner_paramrels = inner_path->param_info->ppi_req_outer; - RelOptInfo *outerrel = outer_path->parent; Relids outerrelids; /* @@ -511,41 +762,114 @@ try_partial_nestloop_path(PlannerInfo *root, /* * Before creating a path, get a quick lower bound on what it is likely to - * cost. Bail out right away if it looks terrible. + * cost. Don't bother if it looks terrible. */ initial_cost_nestloop(root, &workspace, jointype, outer_path, inner_path, extra); - if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) - return; + if (add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) + { + + /* + * If the inner path is parameterized, it is parameterized by the + * topmost parent of the outer rel, not the outer rel itself. Fix + * that. + */ + if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent)) + { + inner_path = reparameterize_path_by_child(root, inner_path, + outer_path->parent); + + /* + * If we could not translate the path, we can't create nest loop + * path. + */ + if (!inner_path) + return; + } + + /* Might be good enough to be worth trying, so let's try it. */ + add_partial_path(joinrel, (Path *) + create_nestloop_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_path, + extra->restrictlist, + pathkeys, + NULL)); + } /* - * If the inner path is parameterized, it is parameterized by the topmost - * parent of the outer rel, not the outer rel itself. Fix that. + * See if we can build a result cache path for this inner_path. That might + * make the nested loop cheaper. */ - if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent)) - { - inner_path = reparameterize_path_by_child(root, inner_path, - outer_path->parent); + inner_cache_path = get_resultcache_path(root, innerrel, outerrel, + inner_path, outer_path, jointype, + extra); + if (inner_cache_path == NULL) + return; + + initial_cost_nestloop(root, &workspace, jointype, + outer_path, inner_cache_path, extra); + if (add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) + { /* - * If we could not translate the path, we can't create nest loop path. + * If the inner path is parameterized, it is parameterized by the + * topmost parent of the outer rel, not the outer rel itself. Fix + * that. */ - if (!inner_path) - return; + if (PATH_PARAM_BY_PARENT(inner_cache_path, outer_path->parent)) + { + Path *reparameterize_path; + + reparameterize_path = reparameterize_path_by_child(root, + inner_cache_path, + outer_path->parent); + + /* + * If we could not translate the path, we can't create nest loop + * path. + */ + if (!reparameterize_path) + { + ResultCachePath *rcpath = (ResultCachePath *) inner_cache_path; + + /* Waste no memory when we reject a path here */ + list_free(rcpath->hash_operators); + list_free(rcpath->param_exprs); + pfree(rcpath); + return; + } + else + inner_cache_path = reparameterize_path; + } + + /* Might be good enough to be worth trying, so let's try it. */ + add_partial_path(joinrel, (Path *) + create_nestloop_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_cache_path, + extra->restrictlist, + pathkeys, + NULL)); + } + else + { + ResultCachePath *rcpath = (ResultCachePath *) inner_cache_path; + + /* Waste no memory when we reject a path here */ + list_free(rcpath->hash_operators); + list_free(rcpath->param_exprs); + pfree(rcpath); } - /* Might be good enough to be worth trying, so let's try it. */ - add_partial_path(joinrel, (Path *) - create_nestloop_path(root, - joinrel, - jointype, - &workspace, - extra, - outer_path, - inner_path, - extra->restrictlist, - pathkeys, - NULL)); } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index eb9543f6ad..fc0e75d0d3 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -90,6 +90,9 @@ static Result *create_group_result_plan(PlannerInfo *root, static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path); static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags); +static ResultCache *create_resultcache_plan(PlannerInfo *root, + ResultCachePath *best_path, + int flags); static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags); static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path); @@ -270,6 +273,9 @@ static Sort *make_sort_from_groupcols(List *groupcls, AttrNumber *grpColIdx, Plan *lefttree); static Material *make_material(Plan *lefttree); +static ResultCache *make_resultcache(Plan *lefttree, Oid *hashoperators, + Oid *collations, + List *param_exprs); static WindowAgg *make_windowagg(List *tlist, Index winref, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, @@ -444,6 +450,11 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) (MaterialPath *) best_path, flags); break; + case T_ResultCache: + plan = (Plan *) create_resultcache_plan(root, + (ResultCachePath *) best_path, + flags); + break; case T_Unique: if (IsA(best_path, UpperUniquePath)) { @@ -1516,6 +1527,54 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) return plan; } +/* + * create_resultcache_plan + * Create a ResultCache plan for 'best_path' and (recursively) plans + * for its subpaths. + * + * Returns a Plan node. + */ +static ResultCache * +create_resultcache_plan(PlannerInfo *root, ResultCachePath *best_path, int flags) +{ + ResultCache *plan; + Plan *subplan; + Oid *operators; + Oid *collations; + List *param_exprs = NIL; + ListCell *lc; + ListCell *lc2; + int nkeys; + int i; + + subplan = create_plan_recurse(root, best_path->subpath, + flags | CP_SMALL_TLIST); + + param_exprs = (List *) replace_nestloop_params(root, (Node *) + best_path->param_exprs); + + nkeys = list_length(param_exprs); + operators = palloc(nkeys * sizeof(Oid)); + collations = palloc(nkeys * sizeof(Oid)); + + i = 0; + forboth(lc, param_exprs, lc2, best_path->hash_operators) + { + Expr *param_expr = (Expr *) lfirst(lc); + Oid opno = lfirst_oid(lc2); + + operators[i] = opno; + collations[i] = exprCollation((Node *) param_expr); + i++; + } + + plan = make_resultcache(subplan, operators, collations, param_exprs); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + /* * create_unique_plan * Create a Unique plan for 'best_path' and (recursively) plans @@ -6359,6 +6418,26 @@ materialize_finished_plan(Plan *subplan) return matplan; } +static ResultCache * +make_resultcache(Plan *lefttree, Oid *hashoperators, Oid *collations, + List *param_exprs) +{ + ResultCache *node = makeNode(ResultCache); + Plan *plan = &node->plan; + + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + node->numKeys = list_length(param_exprs); + node->hashOperators = hashoperators; + node->collations = collations; + node->param_exprs = param_exprs; + + return node; +} + Agg * make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, @@ -6947,6 +7026,7 @@ is_projection_capable_path(Path *path) { case T_Hash: case T_Material: + case T_ResultCache: case T_Sort: case T_IncrementalSort: case T_Unique: @@ -6992,6 +7072,7 @@ is_projection_capable_plan(Plan *plan) { case T_Hash: case T_Material: + case T_ResultCache: case T_Sort: case T_Unique: case T_SetOp: diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index baefe0e946..13d1af1df1 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -677,6 +677,7 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) break; case T_Material: + case T_ResultCache: case T_Sort: case T_IncrementalSort: case T_Unique: diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index b02fcb9bfe..16f45f38b3 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -37,6 +37,7 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/syscache.h" +#include "utils/typcache.h" typedef struct convert_testexpr_context @@ -135,6 +136,74 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod, *colcollation = InvalidOid; } + +/* + * outer_params_hashable + * Determine if it's valid to use a ResultCache node to cache already + * seen rows matching a given set of parameters instead of performing a + * rescan of the subplan pointed to by 'subroot'. If it's valid, check + * if all parameters required by this query level can be hashed. If so, + * return true and set 'operators' to the list of hash equality operators + * for the given parameters then populate 'param_exprs' with each + * PARAM_EXEC parameter that the subplan requires the outer query to pass + * it. When hashing is not possible, false is returned and the two + * output lists are unchanged. + */ +static bool +outer_params_hashable(PlannerInfo *subroot, List *plan_params, List **operators, + List **param_exprs) +{ + List *oplist = NIL; + List *exprlist = NIL; + ListCell *lc; + + /* Ensure we're not given a top-level query. */ + Assert(subroot->parent_root != NULL); + + /* + * It's not valid to use a Result Cache node if there are any volatile + * function in the subquery. Caching could cause fewer evaluations of + * volatile functions that have side-effects + */ + if (contain_volatile_functions((Node *) subroot->parse)) + return false; + + foreach(lc, plan_params) + { + PlannerParamItem *ppi = (PlannerParamItem *) lfirst(lc); + TypeCacheEntry *typentry; + Node *expr = ppi->item; + Param *param; + + param = makeNode(Param); + param->paramkind = PARAM_EXEC; + param->paramid = ppi->paramId; + param->paramtype = exprType(expr); + param->paramtypmod = exprTypmod(expr); + param->paramcollid = exprCollation(expr); + param->location = -1; + + typentry = lookup_type_cache(param->paramtype, + TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR); + + /* XXX will eq_opr ever be invalid if hash_proc isn't? */ + if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr)) + { + list_free(oplist); + list_free(exprlist); + return false; + } + + oplist = lappend_oid(oplist, typentry->eq_opr); + exprlist = lappend(exprlist, param); + } + + *operators = oplist; + *param_exprs = exprlist; + + return true; /* all params can be hashed */ +} + /* * Convert a SubLink (as created by the parser) into a SubPlan. * @@ -232,6 +301,40 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); best_path = get_cheapest_fractional_path(final_rel, tuple_fraction); + /* + * When enabled, for parameterized EXPR_SUBLINKS, we add a ResultCache to + * the top of the subplan in order to cache previously looked up results + * in the hope that they'll be needed again by a subsequent call. At this + * stage we don't have any details of how often we'll be called or with + * which values we'll be called, so for now, we add the Result Cache + * regardless. It may be useful if we can only do this when it seems + * likely that we'll get some repeat lookups, i.e. cache hits. + */ + if (enable_resultcache && plan_params != NIL && subLinkType == EXPR_SUBLINK) + { + List *operators; + List *param_exprs; + + /* Determine if all the subplan parameters can be hashed */ + if (outer_params_hashable(subroot, plan_params, &operators, ¶m_exprs)) + { + ResultCachePath *cache_path; + + /* + * Pass -1 for the number of calls since we don't have any ideas + * what that'll be. + */ + cache_path = create_resultcache_path(root, + best_path->parent, + best_path, + param_exprs, + operators, + false, + -1); + best_path = (Path *) cache_path; + } + } + plan = create_plan(subroot, best_path); /* And convert to SubPlan or InitPlan format. */ @@ -2684,6 +2787,13 @@ finalize_plan(PlannerInfo *root, Plan *plan, /* rescan_param does *not* get added to scan_params */ break; + case T_ResultCache: + /* XXX Check this is correct */ + finalize_primnode((Node *) ((ResultCache *) plan)->param_exprs, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + case T_ProjectSet: case T_Hash: case T_Material: diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 37d6d293c3..31c4a1bb72 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1519,6 +1519,48 @@ create_material_path(RelOptInfo *rel, Path *subpath) return pathnode; } +/* + * create_resultcache_path + * Creates a path corresponding to a ResultCache plan, returning the + * pathnode. + */ +ResultCachePath * +create_resultcache_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, + List *param_exprs, List *hash_operators, + bool singlerow, double calls) +{ + ResultCachePath *pathnode = makeNode(ResultCachePath); + + Assert(subpath->parent == rel); + + pathnode->path.pathtype = T_ResultCache; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = subpath->param_info; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + pathnode->hash_operators = hash_operators; + pathnode->param_exprs = param_exprs; + pathnode->singlerow = singlerow; + pathnode->calls = calls; + + /* + * Add a small additional charge for caching the first entry. All the + * harder calculations for rescans are performed in + * cost_resultcache_rescan(). + */ + pathnode->path.startup_cost = subpath->startup_cost + cpu_tuple_cost; + pathnode->path.total_cost = subpath->total_cost + cpu_tuple_cost; + pathnode->path.rows = subpath->rows; + + return pathnode; +} + /* * create_unique_path * Creates a path representing elimination of distinct rows from the @@ -3816,6 +3858,17 @@ reparameterize_path(PlannerInfo *root, Path *path, apath->partitioned_rels, -1); } + case T_ResultCache: + { + ResultCachePath *rcpath = (ResultCachePath *) path; + + return (Path *) create_resultcache_path(root, rel, + rcpath->subpath, + rcpath->param_exprs, + rcpath->hash_operators, + rcpath->singlerow, + rcpath->calls); + } default: break; } @@ -4053,6 +4106,15 @@ do { \ } break; + case T_ResultCachePath: + { + ResultCachePath *rcpath; + + FLAT_COPY_PATH(rcpath, path, ResultCachePath); + REPARAMETERIZE_CHILD_PATH(rcpath->subpath); + new_path = (Path *) rcpath; + } + break; case T_UniquePath: { UniquePath *upath; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 75fc6f11d6..42c1d400e2 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1021,6 +1021,16 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_resultcache", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the planner's use of caching results from parameterized plan nodes."), + NULL, + GUC_EXPLAIN + }, + &enable_resultcache, + true, + NULL, NULL, NULL + }, { {"enable_nestloop", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of nested-loop join plans."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 3a25287a39..481e1b6005 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -356,6 +356,7 @@ #enable_indexscan = on #enable_indexonlyscan = on #enable_material = on +#enable_resultcache = on #enable_mergejoin = on #enable_nestloop = on #enable_parallel_append = on diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index c7deeac662..3a3a24941d 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -263,6 +263,12 @@ extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, const Oid *eqfunctions, const Oid *collations, PlanState *parent); +extern ExprState *ExecBuildParamSetEqual(TupleDesc ldesc, + const TupleTableSlotOps *lops, + const Oid *eqfunctions, + const Oid *collations, + const List *param_exprs, + PlanState *parent); extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, TupleTableSlot *slot, diff --git a/src/include/executor/nodeResultCache.h b/src/include/executor/nodeResultCache.h new file mode 100644 index 0000000000..e9c0c0cfd8 --- /dev/null +++ b/src/include/executor/nodeResultCache.h @@ -0,0 +1,29 @@ +/*------------------------------------------------------------------------- + * + * nodeResultCache.h + * + * + * + * Portions Copyright (c) 2020, PostgreSQL Global Development Group + * + * src/include/executor/nodeResultCache.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODERESULTCACHE_H +#define NODERESULTCACHE_H + +#include "nodes/execnodes.h" + +extern ResultCacheState *ExecInitResultCache(ResultCache *node, EState *estate, int eflags); +extern void ExecEndResultCache(ResultCacheState *node); +extern void ExecReScanResultCache(ResultCacheState *node); +extern void ExecResultCacheEstimate(ResultCacheState *node, + ParallelContext *pcxt); +extern void ExecResultCacheInitializeDSM(ResultCacheState *node, + ParallelContext *pcxt); +extern void ExecResultCacheInitializeWorker(ResultCacheState *node, + ParallelWorkerContext *pwcxt); +extern void ExecResultCacheRetrieveInstrumentation(ResultCacheState *node); + +#endif /* NODERESULTCACHE_H */ diff --git a/src/include/lib/ilist.h b/src/include/lib/ilist.h index 98db885f6f..fcafc03725 100644 --- a/src/include/lib/ilist.h +++ b/src/include/lib/ilist.h @@ -394,6 +394,25 @@ dlist_move_head(dlist_head *head, dlist_node *node) dlist_check(head); } +/* + * Move element from its current position in the list to the tail position in + * the same list. + * + * Undefined behaviour if 'node' is not already part of the list. + */ +static inline void +dlist_move_tail(dlist_head *head, dlist_node *node) +{ + /* fast path if it's already at the tail */ + if (head->head.prev == node) + return; + + dlist_delete(node); + dlist_push_tail(head, node); + + dlist_check(head); +} + /* * Check whether 'node' has a following node. * Caution: unreliable if 'node' is not in the list. diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index f5dfa32d55..90a114142e 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -17,6 +17,7 @@ #include "access/tupconvert.h" #include "executor/instrument.h" #include "fmgr.h" +#include "lib/ilist.h" #include "lib/pairingheap.h" #include "nodes/params.h" #include "nodes/plannodes.h" @@ -1982,6 +1983,69 @@ typedef struct MaterialState Tuplestorestate *tuplestorestate; } MaterialState; +struct ResultCacheEntry; +struct ResultCacheTuple; +struct ResultCacheKey; + +typedef struct ResultCacheInstrumentation +{ + uint64 cache_hits; /* number of times we've skipped the subnode + * scan due to tuples already being cached */ + uint64 cache_misses; /* number of times we've had to scan the + * subnode to fetch tuples */ + uint64 cache_evictions; /* number of cache entries removed due to + * the need to free memory */ + uint64 cache_overflows; /* number of times we've had to bypass the + * cache when filling it due to not being + * able to free enough space to store the + * current scan's tuples. */ +} ResultCacheInstrumentation; + +/* ---------------- + * Shared memory container for per-worker resultcache information + * ---------------- + */ +typedef struct SharedResultCacheInfo +{ + int num_workers; + ResultCacheInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedResultCacheInfo; + +/* ---------------- + * ResultCacheState information + * + * resultcache nodes are used to cache recent and commonly seen results + * from a parameterized scan. + * ---------------- + */ +typedef struct ResultCacheState +{ + ScanState ss; /* its first field is NodeTag */ + int rc_status; /* value of ExecResultCache's state machine */ + int nkeys; /* number of hash table keys */ + struct resultcache_hash *hashtable; /* hash table cache entries */ + TupleDesc hashkeydesc; /* tuple descriptor for hash keys */ + TupleTableSlot *tableslot; /* min tuple slot for existing cache entries */ + TupleTableSlot *probeslot; /* virtual slot used for hash lookups */ + ExprState *cache_eq_expr; /* Compare exec params to hash key */ + ExprState **param_exprs; /* exprs containing the parameters to this + * node */ + FmgrInfo *hashfunctions; /* lookup data for hash funcs nkeys in size */ + Oid *collations; /* collation for comparisons nkeys in size */ + uint64 mem_used; /* bytes of memory used by cache */ + uint64 mem_upperlimit; /* limit the size of the cache to this (bytes) */ + uint64 mem_lowerlimit; /* reduce memory usage below this when we free + * up space */ + MemoryContext tableContext; /* memory context for actual cache */ + dlist_head lru_list; /* least recently used entry list */ + struct ResultCacheTuple *last_tuple; + struct ResultCacheEntry *entry; /* the entry that 'last_tuple' belongs to + * or NULL if 'last_tuple' is NULL. */ + bool singlerow; /* true if the cache entry is to be marked as + * complete after caching the first record. */ + ResultCacheInstrumentation stats; /* execution statistics */ + SharedResultCacheInfo *shared_info; /* statistics for parallel workers */ +} ResultCacheState; /* ---------------- * When performing sorting by multiple keys, it's possible that the input diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 381d84b4e4..94ab62f318 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -73,6 +73,7 @@ typedef enum NodeTag T_MergeJoin, T_HashJoin, T_Material, + T_ResultCache, T_Sort, T_IncrementalSort, T_Group, @@ -130,6 +131,7 @@ typedef enum NodeTag T_MergeJoinState, T_HashJoinState, T_MaterialState, + T_ResultCacheState, T_SortState, T_IncrementalSortState, T_GroupState, @@ -241,6 +243,7 @@ typedef enum NodeTag T_MergeAppendPath, T_GroupResultPath, T_MaterialPath, + T_ResultCachePath, T_UniquePath, T_GatherPath, T_GatherMergePath, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 485d1b06c9..f83d6a71b1 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1456,6 +1456,22 @@ typedef struct MaterialPath Path *subpath; } MaterialPath; +/* + * ResultCachePath represents a ResultCache plan node, i.e., a cache that + * caches tuples from parameterized paths to save the underlying node from + * having to be rescanned for parameter values which are already cached. + */ +typedef struct ResultCachePath +{ + Path path; + Path *subpath; /* outerpath to cache tuples from */ + List *hash_operators; /* hash operators for each key */ + List *param_exprs; /* cache keys */ + bool singlerow; /* true if the cache entry is to be marked as + * complete after caching the first record. */ + double calls; /* expected number of rescans */ +} ResultCachePath; + /* * UniquePath represents elimination of distinct rows from the output of * its subpath. diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 83e01074ed..30a4f58a41 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -760,6 +760,24 @@ typedef struct Material Plan plan; } Material; +/* ---------------- + * result cache node + * ---------------- + */ +typedef struct ResultCache +{ + Plan plan; + + int numKeys; /* size of the two arrays below */ + + Oid *hashOperators; /* hash operators for each key */ + Oid *collations; /* cache keys */ + List *param_exprs; /* exprs containing parameters */ + bool singlerow; /* true if the cache entry should be marked as + * complete after we store the first tuple in + * it. */ +} ResultCache; + /* ---------------- * sort node * ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 92e70ec0d9..ab4f24648f 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -58,6 +58,7 @@ extern PGDLLIMPORT bool enable_hashagg; extern PGDLLIMPORT bool hashagg_avoid_disk_plan; extern PGDLLIMPORT bool enable_nestloop; extern PGDLLIMPORT bool enable_material; +extern PGDLLIMPORT bool enable_resultcache; extern PGDLLIMPORT bool enable_mergejoin; extern PGDLLIMPORT bool enable_hashjoin; extern PGDLLIMPORT bool enable_gathermerge; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 715a24ad29..816fb3366f 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -79,6 +79,13 @@ extern GroupResultPath *create_group_result_path(PlannerInfo *root, PathTarget *target, List *havingqual); extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath); +extern ResultCachePath *create_resultcache_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + List *param_exprs, + List *hash_operators, + bool singlerow, + double calls); extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo); extern GatherPath *create_gather_path(PlannerInfo *root, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 3bd184ae29..bdc8f3c742 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -950,12 +950,14 @@ explain (costs off) ----------------------------------------------------------------------------------------- Seq Scan on int4_tbl SubPlan 2 - -> Result + -> Result Cache + Cache Key: int4_tbl.f1 InitPlan 1 (returns $1) -> Limit -> Index Only Scan using tenk1_unique1 on tenk1 Index Cond: ((unique1 IS NOT NULL) AND (unique1 > int4_tbl.f1)) -(7 rows) + -> Result +(9 rows) select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt from int4_tbl; @@ -2523,6 +2525,7 @@ select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*) -- Make sure that generation of HashAggregate for uniqification purposes -- does not lead to array overflow due to unexpected duplicate hash keys -- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com +set enable_resultcache to off; explain (costs off) select 1 from tenk1 where (hundred, thousand) in (select twothousand, twothousand from onek); @@ -2538,6 +2541,7 @@ explain (costs off) -> Seq Scan on onek (8 rows) +reset enable_resultcache; -- -- Hash Aggregation Spill tests -- diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 03ada654bb..d78be811d9 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -742,19 +742,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c) explain (costs off) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------- Sort Sort Key: "*VALUES*".column1 -> Values Scan on "*VALUES*" SubPlan 1 - -> Aggregate - Group Key: () - Filter: "*VALUES*".column1 - -> Result - One-Time Filter: "*VALUES*".column1 - -> Seq Scan on gstest2 -(10 rows) + -> Result Cache + Cache Key: "*VALUES*".column1 + -> Aggregate + Group Key: () + Filter: "*VALUES*".column1 + -> Result + One-Time Filter: "*VALUES*".column1 + -> Seq Scan on gstest2 +(12 rows) -- HAVING with GROUPING queries select ten, grouping(ten) from onek diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index a46b1573bd..d5a8eba085 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2484,6 +2484,7 @@ reset enable_nestloop; -- set work_mem to '64kB'; set enable_mergejoin to off; +set enable_resultcache to off; explain (costs off) select count(*) from tenk1 a, tenk1 b where a.hundred = b.thousand and (b.fivethous % 10) < 10; @@ -2507,6 +2508,7 @@ select count(*) from tenk1 a, tenk1 b reset work_mem; reset enable_mergejoin; +reset enable_resultcache; -- -- regression test for 8.2 bug with improper re-ordering of left joins -- @@ -2973,8 +2975,8 @@ select * from where 1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1) order by 1,2; - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------- Sort Sort Key: t1.q1, t1.q2 -> Hash Left Join @@ -2984,11 +2986,13 @@ order by 1,2; -> Hash -> Seq Scan on int8_tbl t2 SubPlan 1 - -> Limit - -> Result - One-Time Filter: ((42) IS NOT NULL) - -> Seq Scan on int8_tbl t3 -(13 rows) + -> Result Cache + Cache Key: (42) + -> Limit + -> Result + One-Time Filter: ((42) IS NOT NULL) + -> Seq Scan on int8_tbl t3 +(15 rows) select * from int8_tbl t1 left join @@ -3510,8 +3514,8 @@ select * from tenk1 t1 left join (tenk1 t2 join tenk1 t3 on t2.thousand = t3.unique2) on t1.hundred = t2.hundred and t1.ten = t3.ten where t1.unique1 = 1; - QUERY PLAN --------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------- Nested Loop Left Join -> Index Scan using tenk1_unique1 on tenk1 t1 Index Cond: (unique1 = 1) @@ -3521,17 +3525,19 @@ where t1.unique1 = 1; Recheck Cond: (t1.hundred = hundred) -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = t1.hundred) - -> Index Scan using tenk1_unique2 on tenk1 t3 - Index Cond: (unique2 = t2.thousand) -(11 rows) + -> Result Cache + Cache Key: t2.thousand + -> Index Scan using tenk1_unique2 on tenk1 t3 + Index Cond: (unique2 = t2.thousand) +(13 rows) explain (costs off) select * from tenk1 t1 left join (tenk1 t2 join tenk1 t3 on t2.thousand = t3.unique2) on t1.hundred = t2.hundred and t1.ten + t2.ten = t3.ten where t1.unique1 = 1; - QUERY PLAN --------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------- Nested Loop Left Join -> Index Scan using tenk1_unique1 on tenk1 t1 Index Cond: (unique1 = 1) @@ -3541,9 +3547,11 @@ where t1.unique1 = 1; Recheck Cond: (t1.hundred = hundred) -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = t1.hundred) - -> Index Scan using tenk1_unique2 on tenk1 t3 - Index Cond: (unique2 = t2.thousand) -(11 rows) + -> Result Cache + Cache Key: t2.thousand + -> Index Scan using tenk1_unique2 on tenk1 t3 + Index Cond: (unique2 = t2.thousand) +(13 rows) explain (costs off) select count(*) from @@ -4890,14 +4898,15 @@ explain (costs off) QUERY PLAN ------------------------------------------------------------------ Aggregate - -> Hash Join - Hash Cond: ("*VALUES*".column1 = b.unique2) + -> Nested Loop -> Nested Loop -> Index Only Scan using tenk1_unique1 on tenk1 a -> Values Scan on "*VALUES*" - -> Hash + -> Result Cache + Cache Key: "*VALUES*".column1 -> Index Only Scan using tenk1_unique2 on tenk1 b -(8 rows) + Index Cond: (unique2 = "*VALUES*".column1) +(9 rows) select count(*) from tenk1 a, tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x; diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out index 3a91c144a2..5c826792f5 100644 --- a/src/test/regress/expected/join_hash.out +++ b/src/test/regress/expected/join_hash.out @@ -923,27 +923,42 @@ WHERE Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b Filter: ((SubPlan 4) < 50) SubPlan 4 - -> Result - Output: (hjtest_1.b * 5) + -> Result Cache + Output: ((hjtest_1.b * 5)) + Cache Key: hjtest_1.b + -> Result + Output: (hjtest_1.b * 5) -> Hash Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b -> Seq Scan on public.hjtest_2 Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b Filter: ((SubPlan 5) < 55) SubPlan 5 - -> Result - Output: (hjtest_2.c * 5) + -> Result Cache + Output: ((hjtest_2.c * 5)) + Cache Key: hjtest_2.c + -> Result + Output: (hjtest_2.c * 5) SubPlan 1 - -> Result + -> Result Cache Output: 1 - One-Time Filter: (hjtest_2.id = 1) + Cache Key: hjtest_2.id + -> Result + Output: 1 + One-Time Filter: (hjtest_2.id = 1) SubPlan 3 - -> Result - Output: (hjtest_2.c * 5) + -> Result Cache + Output: ((hjtest_2.c * 5)) + Cache Key: hjtest_2.c + -> Result + Output: (hjtest_2.c * 5) SubPlan 2 - -> Result - Output: (hjtest_1.b * 5) -(28 rows) + -> Result Cache + Output: ((hjtest_1.b * 5)) + Cache Key: hjtest_1.b + -> Result + Output: (hjtest_1.b * 5) +(43 rows) SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 FROM hjtest_1, hjtest_2 @@ -977,27 +992,42 @@ WHERE Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b Filter: ((SubPlan 5) < 55) SubPlan 5 - -> Result - Output: (hjtest_2.c * 5) + -> Result Cache + Output: ((hjtest_2.c * 5)) + Cache Key: hjtest_2.c + -> Result + Output: (hjtest_2.c * 5) -> Hash Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b -> Seq Scan on public.hjtest_1 Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b Filter: ((SubPlan 4) < 50) SubPlan 4 + -> Result Cache + Output: ((hjtest_1.b * 5)) + Cache Key: hjtest_1.b + -> Result + Output: (hjtest_1.b * 5) + SubPlan 2 + -> Result Cache + Output: ((hjtest_1.b * 5)) + Cache Key: hjtest_1.b -> Result Output: (hjtest_1.b * 5) - SubPlan 2 - -> Result - Output: (hjtest_1.b * 5) SubPlan 1 - -> Result + -> Result Cache Output: 1 - One-Time Filter: (hjtest_2.id = 1) + Cache Key: hjtest_2.id + -> Result + Output: 1 + One-Time Filter: (hjtest_2.id = 1) SubPlan 3 - -> Result - Output: (hjtest_2.c * 5) -(28 rows) + -> Result Cache + Output: ((hjtest_2.c * 5)) + Cache Key: hjtest_2.c + -> Result + Output: (hjtest_2.c * 5) +(43 rows) SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 FROM hjtest_2, hjtest_1 diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 4315e8e0a3..acee21c08e 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -1930,6 +1930,8 @@ begin ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N'); ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N'); ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N'); + ln := regexp_replace(ln, 'Cache Hits: \d+', 'Cache Hits: N'); + ln := regexp_replace(ln, 'Cache Misses: \d+', 'Cache Misses: N'); return next ln; end loop; end; @@ -2058,8 +2060,8 @@ create index ab_a3_b3_a_idx on ab_a3_b3 (a); set enable_hashjoin = 0; set enable_mergejoin = 0; select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(0, 0, 1)'); - explain_parallel_append --------------------------------------------------------------------------------------------------------- + explain_parallel_append +-------------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=N loops=N) -> Gather (actual rows=N loops=N) Workers Planned: 1 @@ -2068,32 +2070,36 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on -> Nested Loop (actual rows=N loops=N) -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) Filter: (a = ANY ('{0,0,1}'::integer[])) - -> Append (actual rows=N loops=N) - -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) - Index Cond: (a = a.a) -(27 rows) + -> Result Cache (actual rows=N loops=N) + Cache Key: a.a + Cache Hits: N Cache Misses: N Cache Evictions: 0 Cache Overflows: 0 + Worker 0: Cache Hits: N Cache Misses: N Cache Evictions: 0 Cache Overflows: 0 + -> Append (actual rows=N loops=N) + -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) + Index Cond: (a = a.a) +(31 rows) -- Ensure the same partitions are pruned when we make the nested loop -- parameter an Expr rather than a plain Param. select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a + 0 where a.a in(0, 0, 1)'); - explain_parallel_append --------------------------------------------------------------------------------------------------------- + explain_parallel_append +-------------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=N loops=N) -> Gather (actual rows=N loops=N) Workers Planned: 1 @@ -2102,31 +2108,35 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on -> Nested Loop (actual rows=N loops=N) -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) Filter: (a = ANY ('{0,0,1}'::integer[])) - -> Append (actual rows=N loops=N) - -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) - Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) - Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) - Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) - Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) - Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) - Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) - Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) - Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) - Index Cond: (a = (a.a + 0)) -(27 rows) + -> Result Cache (actual rows=N loops=N) + Cache Key: (a.a + 0) + Cache Hits: N Cache Misses: N Cache Evictions: 0 Cache Overflows: 0 + Worker 0: Cache Hits: N Cache Misses: N Cache Evictions: 0 Cache Overflows: 0 + -> Append (actual rows=N loops=N) + -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) + Index Cond: (a = (a.a + 0)) + -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) + Index Cond: (a = (a.a + 0)) + -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) + Index Cond: (a = (a.a + 0)) + -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) + Index Cond: (a = (a.a + 0)) + -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) + Index Cond: (a = (a.a + 0)) + -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) + Index Cond: (a = (a.a + 0)) + -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) + Index Cond: (a = (a.a + 0)) + -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) + Index Cond: (a = (a.a + 0)) + -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) + Index Cond: (a = (a.a + 0)) +(31 rows) insert into lprt_a values(3),(3); select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 3)'); - explain_parallel_append --------------------------------------------------------------------------------------------------------- + explain_parallel_append +-------------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=N loops=N) -> Gather (actual rows=N loops=N) Workers Planned: 1 @@ -2135,30 +2145,34 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on -> Nested Loop (actual rows=N loops=N) -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) Filter: (a = ANY ('{1,0,3}'::integer[])) - -> Append (actual rows=N loops=N) - -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (actual rows=N loops=N) - Index Cond: (a = a.a) -(27 rows) + -> Result Cache (actual rows=N loops=N) + Cache Key: a.a + Cache Hits: N Cache Misses: N Cache Evictions: 0 Cache Overflows: 0 + Worker 0: Cache Hits: N Cache Misses: N Cache Evictions: 0 Cache Overflows: 0 + -> Append (actual rows=N loops=N) + -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (actual rows=N loops=N) + Index Cond: (a = a.a) +(31 rows) select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)'); - explain_parallel_append --------------------------------------------------------------------------------------------------------- + explain_parallel_append +-------------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=N loops=N) -> Gather (actual rows=N loops=N) Workers Planned: 1 @@ -2168,31 +2182,35 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) Filter: (a = ANY ('{1,0,0}'::integer[])) Rows Removed by Filter: N - -> Append (actual rows=N loops=N) - -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) - Index Cond: (a = a.a) -(28 rows) + -> Result Cache (actual rows=N loops=N) + Cache Key: a.a + Cache Hits: N Cache Misses: N Cache Evictions: 0 Cache Overflows: 0 + Worker 0: Cache Hits: N Cache Misses: N Cache Evictions: 0 Cache Overflows: 0 + -> Append (actual rows=N loops=N) + -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) + Index Cond: (a = a.a) +(32 rows) delete from lprt_a where a = 1; select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)'); - explain_parallel_append -------------------------------------------------------------------------------------------------- + explain_parallel_append +------------------------------------------------------------------------------------------------------------ Finalize Aggregate (actual rows=N loops=N) -> Gather (actual rows=N loops=N) Workers Planned: 1 @@ -2202,26 +2220,30 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) Filter: (a = ANY ('{1,0,0}'::integer[])) Rows Removed by Filter: N - -> Append (actual rows=N loops=N) - -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) - Index Cond: (a = a.a) - -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) - Index Cond: (a = a.a) -(28 rows) + -> Result Cache (actual rows=N loops=N) + Cache Key: a.a + Cache Hits: N Cache Misses: N Cache Evictions: 0 Cache Overflows: 0 + Worker 0: Cache Hits: N Cache Misses: N Cache Evictions: 0 Cache Overflows: 0 + -> Append (actual rows=N loops=N) + -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) + Index Cond: (a = a.a) + -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) + Index Cond: (a = a.a) +(32 rows) reset enable_hashjoin; reset enable_mergejoin; diff --git a/src/test/regress/expected/resultcache.out b/src/test/regress/expected/resultcache.out new file mode 100644 index 0000000000..a231c080f8 --- /dev/null +++ b/src/test/regress/expected/resultcache.out @@ -0,0 +1,100 @@ +-- Perform tests on the Result Cache node. +-- Ensure we get the expected plan with sub plans. +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT unique1, (SELECT count(*) FROM tenk1 t2 WHERE t2.twenty = t1.twenty) FROM tenk1 t1; + QUERY PLAN +------------------------------------------------------------------------------------- + Seq Scan on tenk1 t1 (actual rows=10000 loops=1) + SubPlan 1 + -> Result Cache (actual rows=1 loops=10000) + Cache Key: t1.twenty + Cache Hits: 9980 Cache Misses: 20 Cache Evictions: 0 Cache Overflows: 0 + -> Aggregate (actual rows=1 loops=20) + -> Seq Scan on tenk1 t2 (actual rows=500 loops=20) + Filter: (twenty = t1.twenty) + Rows Removed by Filter: 9500 +(9 rows) + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT unique1, (SELECT count(*) FROM tenk1 t2 WHERE t2.thousand = t1.thousand) FROM tenk1 t1; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Seq Scan on tenk1 t1 (actual rows=10000 loops=1) + SubPlan 1 + -> Result Cache (actual rows=1 loops=10000) + Cache Key: t1.thousand + Cache Hits: 9000 Cache Misses: 1000 Cache Evictions: 0 Cache Overflows: 0 + -> Aggregate (actual rows=1 loops=1000) + -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2 (actual rows=10 loops=1000) + Index Cond: (thousand = t1.thousand) + Heap Fetches: 0 +(9 rows) + +-- Reduce work_mem so that we see some cache evictions +SET work_mem TO '64kB'; +-- Ensure we get some evitions. The number is likely to vary on different machines, so +-- XXX I'll likely need to think about how to check this better. +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT unique1, (SELECT count(*) FROM tenk1 t2 WHERE t2.thousand = t1.thousand) FROM tenk1 t1; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Seq Scan on tenk1 t1 (actual rows=10000 loops=1) + SubPlan 1 + -> Result Cache (actual rows=1 loops=10000) + Cache Key: t1.thousand + Cache Hits: 4622 Cache Misses: 5378 Cache Evictions: 4851 Cache Overflows: 0 + -> Aggregate (actual rows=1 loops=5378) + -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2 (actual rows=10 loops=5378) + Index Cond: (thousand = t1.thousand) + Heap Fetches: 0 +(9 rows) + +RESET work_mem; +-- Ensure the cache works as expected with a parallel scan. +SET min_parallel_table_scan_size TO 0; +SET parallel_setup_cost TO 0; +SET parallel_tuple_cost TO 0; +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT unique1, (SELECT count(*) FROM tenk1 t2 WHERE t2.thousand = t1.thousand) FROM tenk1 t1; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Gather (actual rows=10000 loops=1) + Workers Planned: 2 + Workers Launched: 2 + -> Parallel Seq Scan on tenk1 t1 (actual rows=3333 loops=3) + SubPlan 1 + -> Result Cache (actual rows=1 loops=10000) + Cache Key: t1.thousand + Cache Hits: 9000 Cache Misses: 1000 Cache Evictions: 0 Cache Overflows: 0 + -> Aggregate (actual rows=1 loops=1000) + -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2 (actual rows=10 loops=1000) + Index Cond: (thousand = t1.thousand) + Heap Fetches: 0 +(12 rows) + +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +-- Ensure we get a result cache on the inner side of the nested loop +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.twenty; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Nested Loop (actual rows=10000 loops=1) + -> Seq Scan on tenk1 t2 (actual rows=10000 loops=1) + -> Result Cache (actual rows=1 loops=10000) + Cache Key: t2.twenty + Cache Hits: 9980 Cache Misses: 20 Cache Evictions: 0 Cache Overflows: 0 + -> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1 loops=20) + Index Cond: (unique1 = t2.twenty) + Heap Fetches: 0 +(9 rows) + +-- And check we get the expected results. +SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.twenty; + count | avg +-------+-------------------- + 10000 | 9.5000000000000000 +(1 row) + diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 9506aaef82..b9a58be7ad 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -1477,18 +1477,20 @@ SELECT (SELECT x FROM s1 LIMIT 1) xx, * FROM s2 WHERE y like '%28%'; (3 rows) EXPLAIN (COSTS OFF) SELECT (SELECT x FROM s1 LIMIT 1) xx, * FROM s2 WHERE y like '%28%'; - QUERY PLAN -------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------- Seq Scan on s2 Filter: (((x % 2) = 0) AND (y ~~ '%28%'::text)) SubPlan 2 - -> Limit - -> Seq Scan on s1 - Filter: (hashed SubPlan 1) - SubPlan 1 - -> Seq Scan on s2 s2_1 - Filter: (((x % 2) = 0) AND (y ~~ '%af%'::text)) -(9 rows) + -> Result Cache + Cache Key: s2.x + -> Limit + -> Seq Scan on s1 + Filter: (hashed SubPlan 1) + SubPlan 1 + -> Seq Scan on s2 s2_1 + Filter: (((x % 2) = 0) AND (y ~~ '%af%'::text)) +(11 rows) SET SESSION AUTHORIZATION regress_rls_alice; ALTER POLICY p2 ON s2 USING (x in (select a from s1 where b like '%d2%')); diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 96dfb7c8dd..0d2b3c5c10 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -148,14 +148,18 @@ explain (costs off) -> Parallel Seq Scan on part_pa_test_p1 pa2_1 -> Parallel Seq Scan on part_pa_test_p2 pa2_2 SubPlan 2 - -> Result + -> Result Cache + Cache Key: max((SubPlan 1)) + -> Result SubPlan 1 - -> Append - -> Seq Scan on part_pa_test_p1 pa1_1 - Filter: (a = pa2.a) - -> Seq Scan on part_pa_test_p2 pa1_2 - Filter: (a = pa2.a) -(14 rows) + -> Result Cache + Cache Key: pa2.a + -> Append + -> Seq Scan on part_pa_test_p1 pa1_1 + Filter: (a = pa2.a) + -> Seq Scan on part_pa_test_p2 pa1_2 + Filter: (a = pa2.a) +(18 rows) drop table part_pa_test; -- test with leader participation disabled @@ -1167,9 +1171,11 @@ SELECT 1 FROM tenk1_vw_sec Workers Planned: 4 -> Parallel Index Only Scan using tenk1_unique1 on tenk1 SubPlan 1 - -> Aggregate - -> Seq Scan on int4_tbl - Filter: (f1 < tenk1_vw_sec.unique1) -(9 rows) + -> Result Cache + Cache Key: tenk1_vw_sec.unique1 + -> Aggregate + -> Seq Scan on int4_tbl + Filter: (f1 < tenk1_vw_sec.unique1) +(11 rows) rollback; diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 4c6cd5f146..9993bca2fd 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -844,19 +844,25 @@ explain (verbose, costs off) explain (verbose, costs off) select x, x from (select (select now() where y=y) as x from (values(1),(2)) v(y)) ss; - QUERY PLAN ----------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------- Values Scan on "*VALUES*" Output: (SubPlan 1), (SubPlan 2) SubPlan 1 - -> Result - Output: now() - One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) + -> Result Cache + Output: (now()) + Cache Key: "*VALUES*".column1 + -> Result + Output: now() + One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) SubPlan 2 - -> Result - Output: now() - One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) -(10 rows) + -> Result Cache + Output: (now()) + Cache Key: "*VALUES*".column1 + -> Result + Output: now() + One-Time Filter: ("*VALUES*".column1 = "*VALUES*".column1) +(16 rows) explain (verbose, costs off) select x, x from diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 01b7786f01..331767c4dd 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -87,10 +87,11 @@ select name, setting from pg_settings where name like 'enable%'; enable_partition_pruning | on enable_partitionwise_aggregate | off enable_partitionwise_join | off + enable_resultcache | on enable_seqscan | on enable_sort | on enable_tidscan | on -(18 rows) +(19 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 026ea880cd..317cd56eb2 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -112,7 +112,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # ---------- # Another group of parallel tests # ---------- -test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain +test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain resultcache # event triggers cannot run concurrently with any test that runs DDL test: event_trigger diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 979d926119..04f0473b92 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -198,6 +198,7 @@ test: partition_aggregate test: partition_info test: tuplesort test: explain +test: resultcache test: event_trigger test: fast_default test: stats diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 044d515507..2eac836e76 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1076,9 +1076,11 @@ select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*) -- Make sure that generation of HashAggregate for uniqification purposes -- does not lead to array overflow due to unexpected duplicate hash keys -- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com +set enable_resultcache to off; explain (costs off) select 1 from tenk1 where (hundred, thousand) in (select twothousand, twothousand from onek); +reset enable_resultcache; -- -- Hash Aggregation Spill tests diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 1403e0ffe7..b0bc88140f 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -539,6 +539,7 @@ reset enable_nestloop; set work_mem to '64kB'; set enable_mergejoin to off; +set enable_resultcache to off; explain (costs off) select count(*) from tenk1 a, tenk1 b @@ -548,6 +549,7 @@ select count(*) from tenk1 a, tenk1 b reset work_mem; reset enable_mergejoin; +reset enable_resultcache; -- -- regression test for 8.2 bug with improper re-ordering of left joins diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 6658455a74..bc923ae873 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -453,6 +453,8 @@ begin ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N'); ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N'); ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N'); + ln := regexp_replace(ln, 'Cache Hits: \d+', 'Cache Hits: N'); + ln := regexp_replace(ln, 'Cache Misses: \d+', 'Cache Misses: N'); return next ln; end loop; end; diff --git a/src/test/regress/sql/resultcache.sql b/src/test/regress/sql/resultcache.sql new file mode 100644 index 0000000000..ecf857c7f6 --- /dev/null +++ b/src/test/regress/sql/resultcache.sql @@ -0,0 +1,32 @@ +-- Perform tests on the Result Cache node. + +-- Ensure we get the expected plan with sub plans. +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT unique1, (SELECT count(*) FROM tenk1 t2 WHERE t2.twenty = t1.twenty) FROM tenk1 t1; +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT unique1, (SELECT count(*) FROM tenk1 t2 WHERE t2.thousand = t1.thousand) FROM tenk1 t1; + +-- Reduce work_mem so that we see some cache evictions +SET work_mem TO '64kB'; +-- Ensure we get some evitions. The number is likely to vary on different machines, so +-- XXX I'll likely need to think about how to check this better. +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT unique1, (SELECT count(*) FROM tenk1 t2 WHERE t2.thousand = t1.thousand) FROM tenk1 t1; +RESET work_mem; + +-- Ensure the cache works as expected with a parallel scan. +SET min_parallel_table_scan_size TO 0; +SET parallel_setup_cost TO 0; +SET parallel_tuple_cost TO 0; +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT unique1, (SELECT count(*) FROM tenk1 t2 WHERE t2.thousand = t1.thousand) FROM tenk1 t1; +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; + +-- Ensure we get a result cache on the inner side of the nested loop +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.twenty; + +-- And check we get the expected results. +SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.twenty; -- 2.25.1