contrib/custmj/Makefile | 17 + contrib/custmj/createplan.c | 435 +++++++++ contrib/custmj/custmj.c | 691 +++++++++++++++ contrib/custmj/custmj.h | 148 ++++ contrib/custmj/expected/custmj.out | 378 ++++++++ contrib/custmj/joinpath.c | 988 +++++++++++++++++++++ contrib/custmj/nodeMergejoin.c | 1694 ++++++++++++++++++++++++++++++++++++ contrib/custmj/setrefs.c | 326 +++++++ contrib/custmj/sql/custmj.sql | 79 ++ 9 files changed, 4756 insertions(+) diff --git a/contrib/custmj/Makefile b/contrib/custmj/Makefile new file mode 100644 index 0000000..9b264d4 --- /dev/null +++ b/contrib/custmj/Makefile @@ -0,0 +1,17 @@ +# contrib/custmj/Makefile + +MODULE_big = custmj +OBJS = custmj.o joinpath.o createplan.o setrefs.o nodeMergejoin.o + +REGRESS = custmj + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/custmj +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/custmj/createplan.c b/contrib/custmj/createplan.c new file mode 100644 index 0000000..e522d73 --- /dev/null +++ b/contrib/custmj/createplan.c @@ -0,0 +1,435 @@ +/*------------------------------------------------------------------------- + * + * createplan.c + * Routines to create the desired plan for processing a query. + * Planning is complete, we just need to convert the selected + * Path into a Plan. + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/createplan.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include + +#include "access/skey.h" +#include "catalog/pg_class.h" +#include "foreign/fdwapi.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/paths.h" +#include "optimizer/placeholder.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/predtest.h" +#include "optimizer/restrictinfo.h" +#include "optimizer/subselect.h" +#include "optimizer/tlist.h" +#include "optimizer/var.h" +#include "parser/parse_clause.h" +#include "parser/parsetree.h" +#include "utils/lsyscache.h" +#include "custmj.h" + +static MergeJoin *make_mergejoin(List *tlist, + List *joinclauses, List *otherclauses, + List *mergeclauses, + Oid *mergefamilies, + Oid *mergecollations, + int *mergestrategies, + bool *mergenullsfirst, + Plan *lefttree, Plan *righttree, + JoinType jointype); +static Material *make_material(Plan *lefttree); + +/* + * create_gating_plan + * Deal with pseudoconstant qual clauses + * + * If the node's quals list includes any pseudoconstant quals, put them + * into a gating Result node atop the already-built plan. Otherwise, + * return the plan as-is. + * + * Note that we don't change cost or size estimates when doing gating. + * The costs of qual eval were already folded into the plan's startup cost. + * Leaving the size alone amounts to assuming that the gating qual will + * succeed, which is the conservative estimate for planning upper queries. + * We certainly don't want to assume the output size is zero (unless the + * gating qual is actually constant FALSE, and that case is dealt with in + * clausesel.c). Interpolating between the two cases is silly, because + * it doesn't reflect what will really happen at runtime, and besides which + * in most cases we have only a very bad idea of the probability of the gating + * qual being true. + */ +Plan * +create_gating_plan(PlannerInfo *root, Plan *plan, List *quals) +{ + List *pseudoconstants; + + /* Sort into desirable execution order while still in RestrictInfo form */ + quals = order_qual_clauses(root, quals); + + /* Pull out any pseudoconstant quals from the RestrictInfo list */ + pseudoconstants = extract_actual_clauses(quals, true); + + if (!pseudoconstants) + return plan; + + return (Plan *) make_result(root, + plan->targetlist, + (Node *) pseudoconstants, + plan); +} + +MergeJoin * +create_mergejoin_plan(PlannerInfo *root, + CustomMergePath *best_path, + Plan *outer_plan, + Plan *inner_plan) +{ + List *tlist = build_path_tlist(root, &best_path->cpath.path); + List *joinclauses; + List *otherclauses; + List *mergeclauses; + List *outerpathkeys; + List *innerpathkeys; + int nClauses; + Oid *mergefamilies; + Oid *mergecollations; + int *mergestrategies; + bool *mergenullsfirst; + MergeJoin *join_plan; + int i; + ListCell *lc; + ListCell *lop; + ListCell *lip; + + /* Sort join qual clauses into best execution order */ + /* NB: do NOT reorder the mergeclauses */ + joinclauses = order_qual_clauses(root, best_path->joinrestrictinfo); + + /* Get the join qual clauses (in plain expression form) */ + /* Any pseudoconstant clauses are ignored here */ + if (IS_OUTER_JOIN(best_path->jointype)) + { + extract_actual_join_clauses(joinclauses, + &joinclauses, &otherclauses); + } + else + { + /* We can treat all clauses alike for an inner join */ + joinclauses = extract_actual_clauses(joinclauses, false); + otherclauses = NIL; + } + + /* + * Remove the mergeclauses from the list of join qual clauses, leaving the + * list of quals that must be checked as qpquals. + */ + mergeclauses = get_actual_clauses(best_path->path_mergeclauses); + joinclauses = list_difference(joinclauses, mergeclauses); + + /* + * Replace any outer-relation variables with nestloop params. There + * should not be any in the mergeclauses. + */ + if (best_path->cpath.path.param_info) + { + joinclauses = (List *) + replace_nestloop_params(root, (Node *) joinclauses); + otherclauses = (List *) + replace_nestloop_params(root, (Node *) otherclauses); + } + + /* + * Rearrange mergeclauses, if needed, so that the outer variable is always + * on the left; mark the mergeclause restrictinfos with correct + * outer_is_left status. + */ + mergeclauses = get_switched_clauses(best_path->path_mergeclauses, + best_path->outerjoinpath->parent->relids); + + /* + * Create explicit sort nodes for the outer and inner paths if necessary. + * Make sure there are no excess columns in the inputs if sorting. + */ + if (best_path->outersortkeys) + { + disuse_physical_tlist(root, outer_plan, best_path->outerjoinpath); + outer_plan = (Plan *) + make_sort_from_pathkeys(root, + outer_plan, + best_path->outersortkeys, + -1.0); + outerpathkeys = best_path->outersortkeys; + } + else + outerpathkeys = best_path->outerjoinpath->pathkeys; + + if (best_path->innersortkeys) + { + disuse_physical_tlist(root, inner_plan, best_path->innerjoinpath); + inner_plan = (Plan *) + make_sort_from_pathkeys(root, + inner_plan, + best_path->innersortkeys, + -1.0); + innerpathkeys = best_path->innersortkeys; + } + else + innerpathkeys = best_path->innerjoinpath->pathkeys; + + /* + * If specified, add a materialize node to shield the inner plan from the + * need to handle mark/restore. + */ + if (best_path->materialize_inner) + { + Plan *matplan = (Plan *) make_material(inner_plan); + + /* + * We assume the materialize will not spill to disk, and therefore + * charge just cpu_operator_cost per tuple. (Keep this estimate in + * sync with final_cost_mergejoin.) + */ + copy_plan_costsize(matplan, inner_plan); + matplan->total_cost += cpu_operator_cost * matplan->plan_rows; + + inner_plan = matplan; + } + + /* + * Compute the opfamily/collation/strategy/nullsfirst arrays needed by the + * executor. The information is in the pathkeys for the two inputs, but + * we need to be careful about the possibility of mergeclauses sharing a + * pathkey (compare find_mergeclauses_for_pathkeys()). + */ + nClauses = list_length(mergeclauses); + Assert(nClauses == list_length(best_path->path_mergeclauses)); + mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid)); + mergecollations = (Oid *) palloc(nClauses * sizeof(Oid)); + mergestrategies = (int *) palloc(nClauses * sizeof(int)); + mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool)); + + lop = list_head(outerpathkeys); + lip = list_head(innerpathkeys); + i = 0; + foreach(lc, best_path->path_mergeclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + EquivalenceClass *oeclass; + EquivalenceClass *ieclass; + PathKey *opathkey; + PathKey *ipathkey; + EquivalenceClass *opeclass; + EquivalenceClass *ipeclass; + ListCell *l2; + + /* fetch outer/inner eclass from mergeclause */ + Assert(IsA(rinfo, RestrictInfo)); + if (rinfo->outer_is_left) + { + oeclass = rinfo->left_ec; + ieclass = rinfo->right_ec; + } + else + { + oeclass = rinfo->right_ec; + ieclass = rinfo->left_ec; + } + Assert(oeclass != NULL); + Assert(ieclass != NULL); + + /* + * For debugging purposes, we check that the eclasses match the paths' + * pathkeys. In typical cases the merge clauses are one-to-one with + * the pathkeys, but when dealing with partially redundant query + * conditions, we might have clauses that re-reference earlier path + * keys. The case that we need to reject is where a pathkey is + * entirely skipped over. + * + * lop and lip reference the first as-yet-unused pathkey elements; + * it's okay to match them, or any element before them. If they're + * NULL then we have found all pathkey elements to be used. + */ + if (lop) + { + opathkey = (PathKey *) lfirst(lop); + opeclass = opathkey->pk_eclass; + if (oeclass == opeclass) + { + /* fast path for typical case */ + lop = lnext(lop); + } + else + { + /* redundant clauses ... must match something before lop */ + foreach(l2, outerpathkeys) + { + if (l2 == lop) + break; + opathkey = (PathKey *) lfirst(l2); + opeclass = opathkey->pk_eclass; + if (oeclass == opeclass) + break; + } + if (oeclass != opeclass) + elog(ERROR, "outer pathkeys do not match mergeclauses"); + } + } + else + { + /* redundant clauses ... must match some already-used pathkey */ + opathkey = NULL; + opeclass = NULL; + foreach(l2, outerpathkeys) + { + opathkey = (PathKey *) lfirst(l2); + opeclass = opathkey->pk_eclass; + if (oeclass == opeclass) + break; + } + if (l2 == NULL) + elog(ERROR, "outer pathkeys do not match mergeclauses"); + } + + if (lip) + { + ipathkey = (PathKey *) lfirst(lip); + ipeclass = ipathkey->pk_eclass; + if (ieclass == ipeclass) + { + /* fast path for typical case */ + lip = lnext(lip); + } + else + { + /* redundant clauses ... must match something before lip */ + foreach(l2, innerpathkeys) + { + if (l2 == lip) + break; + ipathkey = (PathKey *) lfirst(l2); + ipeclass = ipathkey->pk_eclass; + if (ieclass == ipeclass) + break; + } + if (ieclass != ipeclass) + elog(ERROR, "inner pathkeys do not match mergeclauses"); + } + } + else + { + /* redundant clauses ... must match some already-used pathkey */ + ipathkey = NULL; + ipeclass = NULL; + foreach(l2, innerpathkeys) + { + ipathkey = (PathKey *) lfirst(l2); + ipeclass = ipathkey->pk_eclass; + if (ieclass == ipeclass) + break; + } + if (l2 == NULL) + elog(ERROR, "inner pathkeys do not match mergeclauses"); + } + + /* pathkeys should match each other too (more debugging) */ + if (opathkey->pk_opfamily != ipathkey->pk_opfamily || + opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation || + opathkey->pk_strategy != ipathkey->pk_strategy || + opathkey->pk_nulls_first != ipathkey->pk_nulls_first) + elog(ERROR, "left and right pathkeys do not match in mergejoin"); + + /* OK, save info for executor */ + mergefamilies[i] = opathkey->pk_opfamily; + mergecollations[i] = opathkey->pk_eclass->ec_collation; + mergestrategies[i] = opathkey->pk_strategy; + mergenullsfirst[i] = opathkey->pk_nulls_first; + i++; + } + + /* + * Note: it is not an error if we have additional pathkey elements (i.e., + * lop or lip isn't NULL here). The input paths might be better-sorted + * than we need for the current mergejoin. + */ + + /* + * Now we can build the mergejoin node. + */ + join_plan = make_mergejoin(tlist, + joinclauses, + otherclauses, + mergeclauses, + mergefamilies, + mergecollations, + mergestrategies, + mergenullsfirst, + outer_plan, + inner_plan, + best_path->jointype); + + /* Costs of sort and material steps are included in path cost already */ + copy_path_costsize(&join_plan->join.plan, &best_path->cpath.path); + + return join_plan; +} + +static MergeJoin * +make_mergejoin(List *tlist, + List *joinclauses, + List *otherclauses, + List *mergeclauses, + Oid *mergefamilies, + Oid *mergecollations, + int *mergestrategies, + bool *mergenullsfirst, + Plan *lefttree, + Plan *righttree, + JoinType jointype) +{ + MergeJoin *node = makeNode(MergeJoin); + Plan *plan = &node->join.plan; + + /* cost should be inserted by caller */ + plan->targetlist = tlist; + plan->qual = otherclauses; + plan->lefttree = lefttree; + plan->righttree = righttree; + node->mergeclauses = mergeclauses; + node->mergeFamilies = mergefamilies; + node->mergeCollations = mergecollations; + node->mergeStrategies = mergestrategies; + node->mergeNullsFirst = mergenullsfirst; + node->join.jointype = jointype; + node->join.joinqual = joinclauses; + + return node; +} + +static Material * +make_material(Plan *lefttree) +{ + Material *node = makeNode(Material); + Plan *plan = &node->plan; + + /* cost should be inserted by caller */ + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + return node; +} diff --git a/contrib/custmj/custmj.c b/contrib/custmj/custmj.c new file mode 100644 index 0000000..ef64857 --- /dev/null +++ b/contrib/custmj/custmj.c @@ -0,0 +1,691 @@ +/* ------------------------------------------------------------------------- + * + * contrib/custmj/custmj.c + * + * Custom version of MergeJoin - an example implementation of MergeJoin + * logic on top of Custom-Plan interface, to demonstrate how to use this + * interface for joining relations. + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "commands/explain.h" +#include "nodes/makefuncs.h" +#include "nodes/nodes.h" +#include "nodes/nodeFuncs.h" +#include "executor/executor.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/paths.h" +#include "optimizer/planmain.h" +#include "optimizer/restrictinfo.h" +#include "optimizer/subselect.h" +#include "utils/builtins.h" +#include "utils/guc.h" +#include "utils/memutils.h" +#include "custmj.h" + +PG_MODULE_MAGIC; + +/* declaration of local variables */ +static add_join_path_hook_type add_join_path_orig = NULL; +bool enable_custom_mergejoin; + +/* callback table of custom merge join */ +CustomPathMethods custmj_path_methods; +CustomPlanMethods custmj_plan_methods; + +/* + * custmjAddJoinPath + * + * A callback function to add custom version of merge-join logic towards + * the supplied relations join. + */ +static void +custmjAddJoinPath(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + SpecialJoinInfo *sjinfo, + List *restrictlist, + Relids param_source_rels, + Relids extra_lateral_rels) +{ + List *mergeclause_list = NIL; + bool mergejoin_allowed = true; + SemiAntiJoinFactors semifactors; + + if (add_join_path_orig) + (*add_join_path_orig)(root, + joinrel, + outerrel, + innerrel, + jointype, + sjinfo, + restrictlist, + param_source_rels, + extra_lateral_rels); + /* nothing to do anymore */ + if (!enable_custom_mergejoin) + return; + + /* + * Find potential mergejoin clauses. + */ + mergeclause_list = select_mergejoin_clauses(root, + joinrel, + outerrel, + innerrel, + restrictlist, + jointype, + &mergejoin_allowed); + if (!mergejoin_allowed) + return; + + /* + * If it's SEMI or ANTI join, compute correction factors for cost + * estimation. These will be the same for all paths. + */ + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + compute_semi_anti_join_factors(root, outerrel, innerrel, + jointype, sjinfo, restrictlist, + &semifactors); + + /* + * 1. Consider mergejoin paths where both relations must be explicitly + * sorted. Skip this if we can't mergejoin. + */ + sort_inner_and_outer(root, joinrel, outerrel, innerrel, + restrictlist, mergeclause_list, jointype, + sjinfo, + param_source_rels, extra_lateral_rels); + + /* + * 2. Consider paths where the outer relation need not be explicitly + * sorted. This includes both nestloops and mergejoins where the outer + * path is already ordered. Again, skip this if we can't mergejoin. + * (That's okay because we know that nestloop can't handle right/full + * joins at all, so it wouldn't work in the prohibited cases either.) + */ + match_unsorted_outer(root, joinrel, outerrel, innerrel, + restrictlist, mergeclause_list, jointype, + sjinfo, &semifactors, + param_source_rels, extra_lateral_rels); +} + +/* + * CreateCustomMergeJoinPlan + * + * A method to populate CustomPlan node according to the supplied + * CustomPath node; being choosen by the planner. + */ +static CustomPlan * +CreateCustomMergeJoinPlan(PlannerInfo *root, CustomPath *custom_path) +{ + CustomMergePath *cmpath = (CustomMergePath *) custom_path; + CustomMergeJoin *cmjoin; + MergeJoin *mjplan; + Plan *outer_plan; + Plan *inner_plan; + + /* plans the underlying relations */ + outer_plan = create_plan_recurse(root, cmpath->outerjoinpath); + inner_plan = create_plan_recurse(root, cmpath->innerjoinpath); + + mjplan = create_mergejoin_plan(root, cmpath, outer_plan, inner_plan); + + /* + * If there are any pseudoconstant clauses attached to this node, insert a + * gating Result node that evaluates the pseudoconstants as one-time + * quals. + */ + if (root->hasPseudoConstantQuals) + mjplan = (MergeJoin *) + create_gating_plan(root, &mjplan->join.plan, + cmpath->joinrestrictinfo); + + /* construct a CustomMergeJoin plan */ + cmjoin = palloc0(sizeof(CustomMergeJoin)); + cmjoin->cplan.plan = mjplan->join.plan; + cmjoin->cplan.plan.type = T_CustomPlan; + cmjoin->cplan.methods = &custmj_plan_methods; + cmjoin->jointype = mjplan->join.jointype; + cmjoin->joinqual = mjplan->join.joinqual; + cmjoin->mergeclauses = mjplan->mergeclauses; + cmjoin->mergeFamilies = mjplan->mergeFamilies; + cmjoin->mergeCollations = mjplan->mergeCollations; + cmjoin->mergeStrategies = mjplan->mergeStrategies; + cmjoin->mergeNullsFirst = mjplan->mergeNullsFirst; + pfree(mjplan); + + return &cmjoin->cplan; +} + +/* + * TextOutCustomMergeJoinPath + * + * A method to support nodeToString for CustomPath node + */ +static void +TextOutCustomMergeJoinPath(StringInfo str, Node *node) +{ + CustomMergePath *cmpath = (CustomMergePath *) node; + char *temp; + + /* common fields should be dumped by the core backend */ + Assert(cmpath->cpath.methods == &custmj_path_methods); + appendStringInfo(str, " :jointype %d", cmpath->jointype); + temp = nodeToString(cmpath->outerjoinpath); + appendStringInfo(str, " :outerjoinpath %s", temp); + pfree(temp); + temp = nodeToString(cmpath->innerjoinpath); + appendStringInfo(str, " :innerjoinpath %s", temp); + pfree(temp); + temp = nodeToString(cmpath->joinrestrictinfo); + appendStringInfo(str, " :joinrestrictinfo %s", temp); + pfree(temp); + temp = nodeToString(cmpath->path_mergeclauses); + appendStringInfo(str, " :path_mergeclauses %s", temp); + pfree(temp); + temp = nodeToString(cmpath->outersortkeys); + appendStringInfo(str, " :outersortkeys %s", temp); + pfree(temp); + temp = nodeToString(cmpath->innersortkeys); + appendStringInfo(str, " :innersortkeys %s", temp); + pfree(temp); + appendStringInfo(str, " :materialize_inner %s", + cmpath->materialize_inner ? "true" : "false"); +} + +/* + * SetCustomMergeJoinRef + * + * A method to adjust varno/varattno in the expression clauses. + */ +static void +SetCustomMergeJoinRef(PlannerInfo *root, + CustomPlan *custom_plan, + int rtoffset) +{ + CustomMergeJoin *cmjoin = (CustomMergeJoin *) custom_plan; + /* overall logic copied from set_join_references() */ + Plan *outer_plan = cmjoin->cplan.plan.lefttree; + Plan *inner_plan = cmjoin->cplan.plan.righttree; + indexed_tlist *outer_itlist; + indexed_tlist *inner_itlist; + + outer_itlist = build_tlist_index(outer_plan->targetlist); + inner_itlist = build_tlist_index(inner_plan->targetlist); + + /* All join plans have tlist, qual, and joinqual */ + cmjoin->cplan.plan.targetlist + = fix_join_expr(root, + cmjoin->cplan.plan.targetlist, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset); + cmjoin->cplan.plan.qual + = fix_join_expr(root, + cmjoin->cplan.plan.qual, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset); + cmjoin->joinqual + = fix_join_expr(root, + cmjoin->joinqual, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset); + + /* Now do join-type-specific stuff */ + cmjoin->mergeclauses + = fix_join_expr(root, + cmjoin->mergeclauses, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset); + + /* + * outer_itlist is saved to test GetSpecialCustomVar method; that + * shows actual Var node referenced by special varno in EXPLAIN + * command. + */ + cmjoin->outer_itlist = outer_itlist; + + pfree(inner_itlist); +} + +/* + * FinalizeCustomMergePlan + * + * A method to + */ +static void +FinalizeCustomMergePlan(PlannerInfo *root, + CustomPlan *custom_plan, + Bitmapset **p_paramids, + Bitmapset **p_valid_params, + Bitmapset **p_scan_params) +{ + CustomMergeJoin *cmjoin = (CustomMergeJoin *) custom_plan; + Bitmapset *paramids = *p_paramids; + + paramids = finalize_primnode(root, + (Node *) cmjoin->joinqual, + paramids); + paramids = finalize_primnode(root, + (Node *) cmjoin->mergeclauses, + paramids); + *p_paramids = paramids; +} + +/* + * BeginCustomMergeJoin + * + * A method to populate CustomPlanState node according to the supplied + * CustomPlan node, and initialize this execution node itself. + */ +static CustomPlanState * +BeginCustomMergeJoin(CustomPlan *cplan, EState *estate, int eflags) +{ + CustomMergeJoin *cmplan = (CustomMergeJoin *) cplan; + CustomMergeJoinState *cmjs = palloc0(sizeof(CustomMergeJoinState)); + MergeJoinState *mjs; + + mjs = _ExecInitMergeJoin(cmplan, estate, eflags); + cmjs->cps.ps = mjs->js.ps; + cmjs->cps.ps.type = T_CustomPlanState; + cmjs->cps.methods = &custmj_plan_methods; + cmjs->jointype = mjs->js.jointype; + cmjs->joinqual = mjs->js.joinqual; + cmjs->mj_NumClauses = mjs->mj_NumClauses; + cmjs->mj_Clauses = mjs->mj_Clauses; + cmjs->mj_JoinState = mjs->mj_JoinState; + cmjs->mj_ExtraMarks = mjs->mj_ExtraMarks; + cmjs->mj_ConstFalseJoin = mjs->mj_ConstFalseJoin; + cmjs->mj_FillOuter = mjs->mj_FillOuter; + cmjs->mj_FillInner = mjs->mj_FillInner; + cmjs->mj_MatchedOuter = mjs->mj_MatchedOuter; + cmjs->mj_MatchedInner = mjs->mj_MatchedInner; + cmjs->mj_OuterTupleSlot = mjs->mj_OuterTupleSlot; + cmjs->mj_InnerTupleSlot = mjs->mj_InnerTupleSlot; + cmjs->mj_MarkedTupleSlot = mjs->mj_MarkedTupleSlot; + cmjs->mj_NullOuterTupleSlot = mjs->mj_NullOuterTupleSlot; + cmjs->mj_NullInnerTupleSlot = mjs->mj_NullInnerTupleSlot; + cmjs->mj_OuterEContext = mjs->mj_OuterEContext; + cmjs->mj_InnerEContext = mjs->mj_InnerEContext; + pfree(mjs); + + /* + * MEMO: In case when a custom-plan node replace a join by a scan, + * like a situation to implement remote-join stuff that receives + * a joined relation and scan on it, the extension should adjust + * varno / varattno of Var nodes in the targetlist of PlanState, + * instead of Plan. + * Because the executor evaluates expression nodes in the targetlist + * of PlanState, but EXPLAIN command shows Var names according to + * the targetlist of Plan, it shall not work if you adjusted the + * targetlist to reference the ecxt_scantuple of ExprContext. + */ + + return &cmjs->cps; +} + +/* + * ExecCustomMergeJoin + * + * A method to run this execution node + */ +static TupleTableSlot * +ExecCustomMergeJoin(CustomPlanState *node) +{ + return _ExecMergeJoin((CustomMergeJoinState *) node); +} + +/* + * EndCustomMergeJoin + * + * A method to end this execution node + */ +static void +EndCustomMergeJoin(CustomPlanState *node) +{ + _ExecEndMergeJoin((CustomMergeJoinState *) node); +} + +/* + * ReScanCustomMergeJoin + * + * A method to rescan this execution node + */ +static void +ReScanCustomMergeJoin(CustomPlanState *node) +{ + _ExecReScanMergeJoin((CustomMergeJoinState *) node); +} + +/* + * ExplainCustomMergeJoinTargetRel + * + * A method to show target relation in EXPLAIN command. + */ +static void +ExplainCustomMergeJoinTargetRel(CustomPlanState *node, + ExplainState *es) +{ + CustomMergeJoinState *cmjs = (CustomMergeJoinState *) node; + const char *jointype; + + switch (cmjs->jointype) + { + case JOIN_INNER: + jointype = "Inner"; + break; + case JOIN_LEFT: + jointype = "Left"; + break; + case JOIN_FULL: + jointype = "Full"; + break; + case JOIN_RIGHT: + jointype = "Right"; + break; + case JOIN_SEMI: + jointype = "Semi"; + break; + case JOIN_ANTI: + jointype = "Anti"; + break; + default: + jointype = "???"; + break; + } + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (cmjs->jointype != JOIN_INNER) + appendStringInfo(es->str, " %s Join", jointype); + else + appendStringInfoString(es->str, " Join"); + } + else + ExplainPropertyText("Join Type", jointype, es); +} + +/* a function copied from explain.c */ +static void +show_upper_qual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + ExplainState *es) +{ + bool useprefix = (list_length(es->rtable) > 1 || es->verbose); + Node *node; + List *context; + char *exprstr; + + /* No work if empty qual */ + if (qual == NIL) + return; + + /* Convert AND list to explicit AND */ + node = (Node *) make_ands_explicit(qual); + + /* And show it */ + context = deparse_context_for_planstate((Node *) planstate, + ancestors, + es->rtable, + es->rtable_names); + exprstr = deparse_expression(node, context, useprefix, false); + + ExplainPropertyText(qlabel, exprstr, es); +} + +/* a function copied from explain.c */ +static void +show_instrumentation_count(const char *qlabel, int which, + PlanState *planstate, ExplainState *es) +{ + double nfiltered; + double nloops; + + if (!es->analyze || !planstate->instrument) + return; + + if (which == 2) + nfiltered = planstate->instrument->nfiltered2; + else + nfiltered = planstate->instrument->nfiltered1; + nloops = planstate->instrument->nloops; + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) + { + if (nloops > 0) + ExplainPropertyFloat(qlabel, nfiltered / nloops, 0, es); + else + ExplainPropertyFloat(qlabel, 0.0, 0, es); + } +} + +/* + * ExplainCustomMergeJoin + * + * A method to construct EXPLAIN output. + */ +static void +ExplainCustomMergeJoin(CustomPlanState *node, + List *ancestors, + ExplainState *es) +{ + CustomMergeJoin *cmjoin = (CustomMergeJoin *)node->ps.plan; + + show_upper_qual(cmjoin->mergeclauses, + "Merge Cond", &node->ps, ancestors, es); + show_upper_qual(cmjoin->joinqual, + "Join Filter", &node->ps, ancestors, es); + if (cmjoin->joinqual) + show_instrumentation_count("Rows Removed by Join Filter", 1, + &node->ps, es); + show_upper_qual(cmjoin->cplan.plan.qual, + "Filter", &node->ps, ancestors, es); + if (cmjoin->cplan.plan.qual) + show_instrumentation_count("Rows Removed by Filter", 2, + &node->ps, es); +} + +/* + * GetRelidsCustomMergeJoin + * + * A method to inform underlying range-table indexes. + */ +static Bitmapset * +GetRelidsCustomMergeJoin(CustomPlanState *node) +{ + Bitmapset *result = NULL; + + if (outerPlanState(&node->ps)) + ExplainPreScanNode(outerPlanState(&node->ps), &result); + if (innerPlanState(&node->ps)) + ExplainPreScanNode(innerPlanState(&node->ps), &result); + + return result; +} + +/* + * GetSpecialCustomMergeVar + * + * Test handler of GetSpecialCustomVar method. + * In case when a custom-plan node replaced a join node but does not have + * two underlying sub-plan, like a remote join feature that retrieves one + * flat result set, EXPLAIN command cannot resolve name of the columns + * being referenced by special varno (INNER_VAR, OUTER_VAR or INDEX_VAR) + * because it tries to walk on the underlying sub-plan to be thre. + * However, such kind of custom-plan node does not have, because it replaces + * a part of plan sub-tree by one custom-plan node. In this case, custom- + * plan provider has to return an expression node that is referenced by + * the Var node with special varno. + */ +static Node * +GetSpecialCustomMergeVar(CustomPlanState *cpstate, Var *varnode) +{ + CustomMergeJoin *cmjoin = (CustomMergeJoin *)cpstate->ps.plan; + indexed_tlist *itlist; + int i; + + if (varnode->varno != OUTER_VAR) + return NULL; + + itlist = cmjoin->outer_itlist; + for (i=0; i < itlist->num_vars; i++) + { + if (itlist->vars[i].resno == varnode->varattno) + { + Var *newnode = copyObject(varnode); + + newnode->varno = itlist->vars[i].varno; + newnode->varattno = itlist->vars[i].varattno; + + elog(DEBUG2, "%s: (OUTER_VAR,%d) is reference to (%d,%d)", + __FUNCTION__, + varnode->varattno, newnode->varno, newnode->varattno); + + return (Node *) newnode; + } + } + elog(ERROR, "outer_itlist has no entry for Var: %s", + nodeToString(varnode)); + return NULL; +} + +/* + * TextOutCustomMergeJoin + * nodeToString() support in CustomMergeJoin + */ +static void +TextOutCustomMergeJoin(StringInfo str, const CustomPlan *node) +{ + CustomMergeJoin *cmjoin = (CustomMergeJoin *) node; + char *temp; + int i, num; + + /* common fields should be dumped by the core backend */ + Assert(cmjoin->cplan.methods == &custmj_plan_methods); + appendStringInfo(str, " :jointype %d", cmjoin->jointype); + temp = nodeToString(cmjoin->joinqual); + appendStringInfo(str, " :joinqual %s", temp); + pfree(temp); + temp = nodeToString(cmjoin->mergeclauses); + appendStringInfo(str, " :mergeclauses %s", temp); + pfree(temp); + + num = list_length(cmjoin->mergeclauses); + appendStringInfoString(str, " :mergeFamilies"); + for (i=0; i < num; i++) + appendStringInfo(str, " %u", cmjoin->mergeFamilies[i]); + appendStringInfoString(str, " :mergeCollations"); + for (i=0; i < num; i++) + appendStringInfo(str, " %u", cmjoin->mergeCollations[i]); + appendStringInfoString(str, " :mergeStrategies"); + for (i=0; i < num; i++) + appendStringInfo(str, " %d", cmjoin->mergeStrategies[i]); + appendStringInfoString(str, " :mergeNullsFirst"); + for (i=0; i < num; i++) + appendStringInfo(str, " %d", (int) cmjoin->mergeNullsFirst[i]); +} + +/* + * CopyCustomMergeJoin + * copyObject() support in CustomMergeJoin + */ +static CustomPlan * +CopyCustomMergeJoin(const CustomPlan *from) +{ + const CustomMergeJoin *oldnode = (const CustomMergeJoin *) from; + CustomMergeJoin *newnode = palloc(sizeof(CustomMergeJoin)); + int num; + + /* copying the common fields */ + CopyCustomPlanCommon((const Node *) oldnode, (Node *) newnode); + + newnode->jointype = oldnode->jointype; + newnode->joinqual = copyObject(oldnode->joinqual); + newnode->mergeclauses = copyObject(oldnode->mergeclauses); + num = list_length(oldnode->mergeclauses); + newnode->mergeFamilies = palloc(sizeof(Oid) * num); + memcpy(newnode->mergeFamilies, + oldnode->mergeFamilies, + sizeof(Oid) * num); + newnode->mergeCollations = palloc(sizeof(Oid) * num); + memcpy(newnode->mergeCollations, + oldnode->mergeCollations, + sizeof(Oid) * num); + newnode->mergeStrategies = palloc(sizeof(int) * num); + memcpy(newnode->mergeStrategies, + oldnode->mergeStrategies, + sizeof(int) * num); + newnode->mergeNullsFirst = palloc(sizeof(bool) * num); + memcpy(newnode->mergeNullsFirst, + oldnode->mergeNullsFirst, + sizeof(bool) * num); + num = oldnode->outer_itlist->num_vars; + newnode->outer_itlist = palloc(offsetof(indexed_tlist, vars[num])); + memcpy(newnode->outer_itlist, + oldnode->outer_itlist, + offsetof(indexed_tlist, vars[num])); + + return &newnode->cplan; +} + +/* + * Entrypoint of this extension + */ +void +_PG_init(void) +{ + /* "custnl.enabled" to control availability of this module */ + DefineCustomBoolVariable("enable_custom_mergejoin", + "enables the planner's use of custom merge join", + NULL, + &enable_custom_mergejoin, + true, + PGC_USERSET, + GUC_NOT_IN_SAMPLE, + NULL, NULL, NULL); + + /* methods of CustomMergeJoinPath */ + memset(&custmj_path_methods, 0, sizeof(CustomPathMethods)); + custmj_path_methods.CustomName = "CustomMergeJoin"; + custmj_path_methods.CreateCustomPlan = CreateCustomMergeJoinPlan; + custmj_path_methods.TextOutCustomPath = TextOutCustomMergeJoinPath; + + /* methods of CustomMergeJoinPlan */ + memset(&custmj_plan_methods, 0, sizeof(CustomPlanMethods)); + custmj_plan_methods.CustomName = "CustomMergeJoin"; + custmj_plan_methods.SetCustomPlanRef = SetCustomMergeJoinRef; + custmj_plan_methods.SupportBackwardScan = NULL; + custmj_plan_methods.FinalizeCustomPlan = FinalizeCustomMergePlan; + custmj_plan_methods.BeginCustomPlan = BeginCustomMergeJoin; + custmj_plan_methods.ExecCustomPlan = ExecCustomMergeJoin; + custmj_plan_methods.EndCustomPlan = EndCustomMergeJoin; + custmj_plan_methods.ReScanCustomPlan = ReScanCustomMergeJoin; + custmj_plan_methods.ExplainCustomPlanTargetRel + = ExplainCustomMergeJoinTargetRel; + custmj_plan_methods.ExplainCustomPlan = ExplainCustomMergeJoin; + custmj_plan_methods.GetRelidsCustomPlan = GetRelidsCustomMergeJoin; + custmj_plan_methods.GetSpecialCustomVar = GetSpecialCustomMergeVar; + custmj_plan_methods.TextOutCustomPlan = TextOutCustomMergeJoin; + custmj_plan_methods.CopyCustomPlan = CopyCustomMergeJoin; + + /* hook registration */ + add_join_path_orig = add_join_path_hook; + add_join_path_hook = custmjAddJoinPath; + + elog(INFO, "MergeJoin logic on top of CustomPlan interface"); +} diff --git a/contrib/custmj/custmj.h b/contrib/custmj/custmj.h new file mode 100644 index 0000000..732bbff --- /dev/null +++ b/contrib/custmj/custmj.h @@ -0,0 +1,148 @@ +/* + * definitions related to custom version of merge join + */ +#ifndef CUSTMJ_H +#define CUSTMJ_H +#include "nodes/nodes.h" +#include "nodes/plannodes.h" +#include "nodes/relation.h" + +typedef struct +{ + CustomPath cpath; + /* fields come from JoinPath */ + JoinType jointype; + Path *outerjoinpath; /* path for the outer side of the join */ + Path *innerjoinpath; /* path for the inner side of the join */ + List *joinrestrictinfo; /* RestrictInfos to apply to join */ + /* fields come from MergePath */ + List *path_mergeclauses; /* join clauses to be used for merge */ + List *outersortkeys; /* keys for explicit sort, if any */ + List *innersortkeys; /* keys for explicit sort, if any */ + bool materialize_inner; /* add Materialize to inner? */ +} CustomMergePath; + +struct indexed_tlist; + +typedef struct +{ + CustomPlan cplan; + /* fields come from Join */ + JoinType jointype; + List *joinqual; + /* fields come from MergeJoin */ + List *mergeclauses; /* mergeclauses as expression trees */ + /* these are arrays, but have the same length as the mergeclauses list: */ + Oid *mergeFamilies; /* per-clause OIDs of btree opfamilies */ + Oid *mergeCollations; /* per-clause OIDs of collations */ + int *mergeStrategies; /* per-clause ordering (ASC or DESC) */ + bool *mergeNullsFirst; /* per-clause nulls ordering */ + /* for transvar testing */ + struct indexed_tlist *outer_itlist; +} CustomMergeJoin; + +typedef struct +{ + CustomPlanState cps; + /* fields come from JoinState */ + JoinType jointype; + List *joinqual; /* JOIN quals (in addition to ps.qual) */ + /* fields come from MergeJoinState */ + int mj_NumClauses; + MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */ + int mj_JoinState; + bool mj_ExtraMarks; + bool mj_ConstFalseJoin; + bool mj_FillOuter; + bool mj_FillInner; + bool mj_MatchedOuter; + bool mj_MatchedInner; + TupleTableSlot *mj_OuterTupleSlot; + TupleTableSlot *mj_InnerTupleSlot; + TupleTableSlot *mj_MarkedTupleSlot; + TupleTableSlot *mj_NullOuterTupleSlot; + TupleTableSlot *mj_NullInnerTupleSlot; + ExprContext *mj_OuterEContext; + ExprContext *mj_InnerEContext; +} CustomMergeJoinState; + +/* custmj.c */ +extern bool enable_custom_mergejoin; +extern CustomPathMethods custmj_path_methods; +extern CustomPlanMethods custmj_plan_methods; + +extern void _PG_init(void); + +/* joinpath.c */ +extern List *select_mergejoin_clauses(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype, + bool *mergejoin_allowed); + +extern void sort_inner_and_outer(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + List *mergeclause_list, + JoinType jointype, + SpecialJoinInfo *sjinfo, + Relids param_source_rels, + Relids extra_lateral_rels); + +extern void match_unsorted_outer(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + List *mergeclause_list, + JoinType jointype, + SpecialJoinInfo *sjinfo, + SemiAntiJoinFactors *semifactors, + Relids param_source_rels, + Relids extra_lateral_rels); + +/* createplan.c */ +extern MergeJoin *create_mergejoin_plan(PlannerInfo *root, + CustomMergePath *best_path, + Plan *outer_plan, + Plan *inner_plan); +extern Plan *create_gating_plan(PlannerInfo *root, Plan *plan, List *quals); + +/* setrefs.c */ +typedef struct tlist_vinfo +{ + Index varno; /* RT index of Var */ + AttrNumber varattno; /* attr number of Var */ + AttrNumber resno; /* TLE position of Var */ +} tlist_vinfo; + +typedef struct indexed_tlist +{ + List *tlist; /* underlying target list */ + int num_vars; /* number of plain Var tlist entries */ + bool has_ph_vars; /* are there PlaceHolderVar entries? */ + bool has_non_vars; /* are there other entries? */ + /* array of num_vars entries: */ + tlist_vinfo vars[1]; /* VARIABLE LENGTH ARRAY */ +} indexed_tlist; /* VARIABLE LENGTH STRUCT */ + +extern indexed_tlist *build_tlist_index(List *tlist); +extern List *fix_join_expr(PlannerInfo *root, + List *clauses, + indexed_tlist *outer_itlist, + indexed_tlist *inner_itlist, + Index acceptable_rel, + int rtoffset); +/* nodeMergejoin.c */ +extern MergeJoinState *_ExecInitMergeJoin(CustomMergeJoin *node, + EState *estate, + int eflags); +extern TupleTableSlot *_ExecMergeJoin(CustomMergeJoinState *node); +extern void _ExecEndMergeJoin(CustomMergeJoinState *node); +extern void _ExecReScanMergeJoin(CustomMergeJoinState *node); + +#endif /* CUSTMJ_H */ diff --git a/contrib/custmj/expected/custmj.out b/contrib/custmj/expected/custmj.out new file mode 100644 index 0000000..19ba188 --- /dev/null +++ b/contrib/custmj/expected/custmj.out @@ -0,0 +1,378 @@ +-- regression test for custmj extension +-- +-- initial setup +-- +CREATE TABLE t1 (a int, b text); +CREATE TABLE t2 (x int, y text); +CREATE TABLE t3 (n int primary key, m text); +CREATE TABLE t4 (s int references t3(n), t text); +INSERT INTO t1 (SELECT x, md5(x::text) FROM generate_series( 1,600) x); +INSERT INTO t2 (SELECT x, md5(x::text) FROM generate_series(401,800) x); +INSERT INTO t3 (SELECT x, md5(x::text) FROM generate_series( 1,800) x); +INSERT INTO t4 (SELECT x, md5(x::text) FROM generate_series(201,600) x); +VACUUM ANALYZE t1; +VACUUM ANALYZE t2; +VACUUM ANALYZE t3; +VACUUM ANALYZE t4; +-- LOAD this extension +LOAD 'custmj'; +INFO: MergeJoin logic on top of CustomPlan interface +-- +-- explain output +-- +EXPLAIN (verbose, costs off) SELECT * FROM t1 JOIN t2 ON a = x; + QUERY PLAN +----------------------------------- + Hash Join + Output: t1.a, t1.b, t2.x, t2.y + Hash Cond: (t1.a = t2.x) + -> Seq Scan on public.t1 + Output: t1.a, t1.b + -> Hash + Output: t2.x, t2.y + -> Seq Scan on public.t2 + Output: t2.x, t2.y +(9 rows) + +EXPLAIN (verbose, costs off) SELECT * FROM t1 FULL JOIN t2 ON a = x; + QUERY PLAN +----------------------------------- + Hash Full Join + Output: t1.a, t1.b, t2.x, t2.y + Hash Cond: (t1.a = t2.x) + -> Seq Scan on public.t1 + Output: t1.a, t1.b + -> Hash + Output: t2.x, t2.y + -> Seq Scan on public.t2 + Output: t2.x, t2.y +(9 rows) + +EXPLAIN (verbose, costs off) SELECT * FROM t3 JOIN t4 ON n = s; + QUERY PLAN +----------------------------------- + Hash Join + Output: t3.n, t3.m, t4.s, t4.t + Hash Cond: (t3.n = t4.s) + -> Seq Scan on public.t3 + Output: t3.n, t3.m + -> Hash + Output: t4.s, t4.t + -> Seq Scan on public.t4 + Output: t4.s, t4.t +(9 rows) + +EXPLAIN (verbose, costs off) SELECT * FROM t3 FULL JOIN t4 ON n = s; + QUERY PLAN +----------------------------------- + Hash Full Join + Output: t3.n, t3.m, t4.s, t4.t + Hash Cond: (t3.n = t4.s) + -> Seq Scan on public.t3 + Output: t3.n, t3.m + -> Hash + Output: t4.s, t4.t + -> Seq Scan on public.t4 + Output: t4.s, t4.t +(9 rows) + +-- force off hash_join +SET enable_hashjoin = off; +EXPLAIN (verbose, costs off) SELECT * FROM t1 JOIN t2 ON a = x; + QUERY PLAN +----------------------------------- + Merge Join + Output: t1.a, t1.b, t2.x, t2.y + Merge Cond: (t1.a = t2.x) + -> Sort + Output: t1.a, t1.b + Sort Key: t1.a + -> Seq Scan on public.t1 + Output: t1.a, t1.b + -> Sort + Output: t2.x, t2.y + Sort Key: t2.x + -> Seq Scan on public.t2 + Output: t2.x, t2.y +(13 rows) + +SELECT * INTO bmj1 FROM t1 JOIN t2 ON a = x; +EXPLAIN (verbose, costs off) SELECT * FROM t1 FULL JOIN t2 ON a = x; + QUERY PLAN +----------------------------------- + Merge Full Join + Output: t1.a, t1.b, t2.x, t2.y + Merge Cond: (t1.a = t2.x) + -> Sort + Output: t1.a, t1.b + Sort Key: t1.a + -> Seq Scan on public.t1 + Output: t1.a, t1.b + -> Sort + Output: t2.x, t2.y + Sort Key: t2.x + -> Seq Scan on public.t2 + Output: t2.x, t2.y +(13 rows) + +SELECT * INTO bmj2 FROM t1 FULL JOIN t2 ON a = x; +EXPLAIN (verbose, costs off) SELECT * FROM t3 JOIN t4 ON n = s; + QUERY PLAN +--------------------------------------------- + Merge Join + Output: t3.n, t3.m, t4.s, t4.t + Merge Cond: (t3.n = t4.s) + -> Index Scan using t3_pkey on public.t3 + Output: t3.n, t3.m + -> Sort + Output: t4.s, t4.t + Sort Key: t4.s + -> Seq Scan on public.t4 + Output: t4.s, t4.t +(10 rows) + +SELECT * INTO bmj3 FROM t3 JOIN t4 ON n = s; +EXPLAIN (verbose, costs off) SELECT * FROM t3 FULL JOIN t4 ON n = s; + QUERY PLAN +--------------------------------------------- + Merge Full Join + Output: t3.n, t3.m, t4.s, t4.t + Merge Cond: (t3.n = t4.s) + -> Index Scan using t3_pkey on public.t3 + Output: t3.n, t3.m + -> Sort + Output: t4.s, t4.t + Sort Key: t4.s + -> Seq Scan on public.t4 + Output: t4.s, t4.t +(10 rows) + +SELECT * INTO bmj4 FROM t3 FULL JOIN t4 ON n = s; +-- force off built-in merge_join +SET enable_mergejoin = off; +EXPLAIN (verbose, costs off) SELECT * FROM t1 JOIN t2 ON a = x; + QUERY PLAN +----------------------------------- + Custom (CustomMergeJoin) Join + Output: t1.a, t1.b, t2.x, t2.y + Merge Cond: (t1.a = t2.x) + -> Sort + Output: t1.a, t1.b + Sort Key: t1.a + -> Seq Scan on public.t1 + Output: t1.a, t1.b + -> Sort + Output: t2.x, t2.y + Sort Key: t2.x + -> Seq Scan on public.t2 + Output: t2.x, t2.y +(13 rows) + +SELECT * INTO cmj1 FROM t1 JOIN t2 ON a = x; +EXPLAIN (verbose, costs off) SELECT * FROM t1 FULL JOIN t2 ON a = x; + QUERY PLAN +------------------------------------ + Custom (CustomMergeJoin) Full Join + Output: t1.a, t1.b, t2.x, t2.y + Merge Cond: (t1.a = t2.x) + -> Sort + Output: t1.a, t1.b + Sort Key: t1.a + -> Seq Scan on public.t1 + Output: t1.a, t1.b + -> Sort + Output: t2.x, t2.y + Sort Key: t2.x + -> Seq Scan on public.t2 + Output: t2.x, t2.y +(13 rows) + +SELECT * INTO cmj2 FROM t1 FULL JOIN t2 ON a = x; +EXPLAIN (verbose, costs off) SELECT * FROM t3 JOIN t4 ON n = s; + QUERY PLAN +--------------------------------------------- + Custom (CustomMergeJoin) Join + Output: t3.n, t3.m, t4.s, t4.t + Merge Cond: (t3.n = t4.s) + -> Index Scan using t3_pkey on public.t3 + Output: t3.n, t3.m + -> Sort + Output: t4.s, t4.t + Sort Key: t4.s + -> Seq Scan on public.t4 + Output: t4.s, t4.t +(10 rows) + +SELECT * INTO cmj3 FROM t3 JOIN t4 ON n = s; +EXPLAIN (verbose, costs off) SELECT * FROM t3 FULL JOIN t4 ON n = s; + QUERY PLAN +--------------------------------------------- + Custom (CustomMergeJoin) Full Join + Output: t3.n, t3.m, t4.s, t4.t + Merge Cond: (t3.n = t4.s) + -> Index Scan using t3_pkey on public.t3 + Output: t3.n, t3.m + -> Sort + Output: t4.s, t4.t + Sort Key: t4.s + -> Seq Scan on public.t4 + Output: t4.s, t4.t +(10 rows) + +SELECT * INTO cmj4 FROM t3 FULL JOIN t4 ON n = s; +-- compare the difference of simple result +SELECT * FROM bmj1 EXCEPT SELECT * FROM cmj1; + a | b | x | y +---+---+---+--- +(0 rows) + +SELECT * FROM cmj1 EXCEPT SELECT * FROM bmj1; + a | b | x | y +---+---+---+--- +(0 rows) + +SELECT * FROM bmj2 EXCEPT SELECT * FROM cmj2; + a | b | x | y +---+---+---+--- +(0 rows) + +SELECT * FROM cmj2 EXCEPT SELECT * FROM bmj2; + a | b | x | y +---+---+---+--- +(0 rows) + +SELECT * FROM bmj3 EXCEPT SELECT * FROM cmj3; + n | m | s | t +---+---+---+--- +(0 rows) + +SELECT * FROM cmj3 EXCEPT SELECT * FROM bmj3; + n | m | s | t +---+---+---+--- +(0 rows) + +SELECT * FROM bmj4 EXCEPT SELECT * FROM cmj4; + n | m | s | t +---+---+---+--- +(0 rows) + +SELECT * FROM cmj4 EXCEPT SELECT * FROM bmj4; + n | m | s | t +---+---+---+--- +(0 rows) + +-- a little bit complicated +EXPLAIN (verbose, costs off) + SELECT (a + x + n) % s AS c1, md5(b || y || m || t) AS c2 + FROM ((t1 join t2 on a = x) join t3 on y = m) join t4 on n = s + WHERE b like '%ab%' AND y like '%cd%' AND m like t; + QUERY PLAN +------------------------------------------------------------------------------------ + Nested Loop + Output: (((t1.a + t2.x) + t3.n) % t4.s), md5((((t1.b || t2.y) || t3.m) || t4.t)) + Join Filter: (t2.x = t1.a) + -> Nested Loop + Output: t2.x, t2.y, t3.n, t3.m, t4.s, t4.t + Join Filter: (t3.m = t2.y) + -> Seq Scan on public.t2 + Output: t2.x, t2.y + Filter: (t2.y ~~ '%cd%'::text) + -> Materialize + Output: t3.n, t3.m, t4.s, t4.t + -> Custom (CustomMergeJoin) Join + Output: t3.n, t3.m, t4.s, t4.t + Merge Cond: (t3.n = t4.s) + Join Filter: (t3.m ~~ t4.t) + -> Index Scan using t3_pkey on public.t3 + Output: t3.n, t3.m + -> Sort + Output: t4.s, t4.t + Sort Key: t4.s + -> Seq Scan on public.t4 + Output: t4.s, t4.t + -> Seq Scan on public.t1 + Output: t1.a, t1.b + Filter: (t1.b ~~ '%ab%'::text) +(25 rows) + +PREPARE p1(int,int) AS +SELECT * FROM t1 JOIN t3 ON a = n WHERE n BETWEEN $1 AND $2; +EXPLAIN (verbose, costs off) EXECUTE p1(100,100); + QUERY PLAN +------------------------------------------------------- + Nested Loop + Output: t1.a, t1.b, t3.n, t3.m + Join Filter: (t1.a = t3.n) + -> Index Scan using t3_pkey on public.t3 + Output: t3.n, t3.m + Index Cond: ((t3.n >= 100) AND (t3.n <= 100)) + -> Seq Scan on public.t1 + Output: t1.a, t1.b +(8 rows) + +EXPLAIN (verbose, costs off) EXECUTE p1(100,1000); + QUERY PLAN +-------------------------------------------------------- + Custom (CustomMergeJoin) Join + Output: t1.a, t1.b, t3.n, t3.m + Merge Cond: (t3.n = t1.a) + -> Index Scan using t3_pkey on public.t3 + Output: t3.n, t3.m + Index Cond: ((t3.n >= 100) AND (t3.n <= 1000)) + -> Sort + Output: t1.a, t1.b + Sort Key: t1.a + -> Seq Scan on public.t1 + Output: t1.a, t1.b +(11 rows) + +EXPLAIN (verbose, costs off) +SELECT * FROM t1 JOIN t2 ON a = x WHERE x IN (SELECT n % 100 FROM t3); + QUERY PLAN +------------------------------------------------ + Custom (CustomMergeJoin) Join + Output: t1.a, t1.b, t2.x, t2.y + Merge Cond: (t2.x = t1.a) + -> Custom (CustomMergeJoin) Semi Join + Output: t2.x, t2.y, t3.n + Merge Cond: (t2.x = ((t3.n % 100))) + -> Sort + Output: t2.x, t2.y + Sort Key: t2.x + -> Seq Scan on public.t2 + Output: t2.x, t2.y + -> Sort + Output: t3.n, ((t3.n % 100)) + Sort Key: ((t3.n % 100)) + -> Seq Scan on public.t3 + Output: t3.n, (t3.n % 100) + -> Sort + Output: t1.a, t1.b + Sort Key: t1.a + -> Seq Scan on public.t1 + Output: t1.a, t1.b +(21 rows) + +-- check GetSpecialCustomVar stuff +SET client_min_messages = debug; +EXPLAIN (verbose, costs off) SELECT * FROM t1 FULL JOIN t2 ON a = x; +DEBUG: GetSpecialCustomMergeVar: (OUTER_VAR,1) is reference to (1,1) +DEBUG: GetSpecialCustomMergeVar: (OUTER_VAR,2) is reference to (1,2) +DEBUG: GetSpecialCustomMergeVar: (OUTER_VAR,1) is reference to (1,1) + QUERY PLAN +------------------------------------ + Custom (CustomMergeJoin) Full Join + Output: t1.a, t1.b, t2.x, t2.y + Merge Cond: (t1.a = t2.x) + -> Sort + Output: t1.a, t1.b + Sort Key: t1.a + -> Seq Scan on public.t1 + Output: t1.a, t1.b + -> Sort + Output: t2.x, t2.y + Sort Key: t2.x + -> Seq Scan on public.t2 + Output: t2.x, t2.y +(13 rows) + diff --git a/contrib/custmj/joinpath.c b/contrib/custmj/joinpath.c new file mode 100644 index 0000000..9ef940b --- /dev/null +++ b/contrib/custmj/joinpath.c @@ -0,0 +1,988 @@ +/*------------------------------------------------------------------------- + * + * joinpath.c + * Routines to find all possible paths for processing a set of joins + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/path/joinpath.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "executor/executor.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "custmj.h" + +/* Hook for plugins to add custom join paths */ +add_join_path_hook_type add_join_path_hook = NULL; + +#define PATH_PARAM_BY_REL(path, rel) \ + ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) + +/* + * try_nestloop_path + * Consider a nestloop join path; if it appears useful, push it into + * the joinrel's pathlist via add_path(). + */ +static void +try_nestloop_path(PlannerInfo *root, + RelOptInfo *joinrel, + JoinType jointype, + SpecialJoinInfo *sjinfo, + SemiAntiJoinFactors *semifactors, + Relids param_source_rels, + Relids extra_lateral_rels, + Path *outer_path, + Path *inner_path, + List *restrict_clauses, + List *pathkeys) +{ + Relids required_outer; + JoinCostWorkspace workspace; + + /* + * Check to see if proposed path is still parameterized, and reject if the + * parameterization wouldn't be sensible. + */ + required_outer = calc_nestloop_required_outer(outer_path, + inner_path); + if (required_outer && + !bms_overlap(required_outer, param_source_rels)) + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + return; + } + + /* + * Independently of that, add parameterization needed for any + * PlaceHolderVars that need to be computed at the join. + */ + required_outer = bms_add_members(required_outer, extra_lateral_rels); + + /* + * Do a precheck to quickly eliminate obviously-inferior paths. We + * calculate a cheap lower bound on the path's cost and then use + * add_path_precheck() to see if the path is clearly going to be dominated + * by some existing path for the joinrel. If not, do the full pushup with + * creating a fully valid path structure and submitting it to add_path(). + * The latter two steps are expensive enough to make this two-phase + * methodology worthwhile. + */ + initial_cost_nestloop(root, &workspace, jointype, + outer_path, inner_path, + sjinfo, semifactors); + + if (add_path_precheck(joinrel, + workspace.startup_cost, workspace.total_cost, + pathkeys, required_outer)) + { + add_path(joinrel, (Path *) + create_nestloop_path(root, + joinrel, + jointype, + &workspace, + sjinfo, + semifactors, + outer_path, + inner_path, + restrict_clauses, + pathkeys, + required_outer)); + } + else + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + } +} + +/* + * try_mergejoin_path + * Consider a merge join path; if it appears useful, push it into + * the joinrel's pathlist via add_path(). + */ +static void +try_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + JoinType jointype, + SpecialJoinInfo *sjinfo, + Relids param_source_rels, + Relids extra_lateral_rels, + Path *outer_path, + Path *inner_path, + List *restrict_clauses, + List *pathkeys, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys) +{ + Relids required_outer; + JoinCostWorkspace workspace; + + /* + * Check to see if proposed path is still parameterized, and reject if the + * parameterization wouldn't be sensible. + */ + required_outer = calc_non_nestloop_required_outer(outer_path, + inner_path); + if (required_outer && + !bms_overlap(required_outer, param_source_rels)) + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + return; + } + + /* + * Independently of that, add parameterization needed for any + * PlaceHolderVars that need to be computed at the join. + */ + required_outer = bms_add_members(required_outer, extra_lateral_rels); + + /* + * If the given paths are already well enough ordered, we can skip doing + * an explicit sort. + */ + if (outersortkeys && + pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) + outersortkeys = NIL; + if (innersortkeys && + pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) + innersortkeys = NIL; + + /* + * See comments in try_nestloop_path(). + */ + initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, + outer_path, inner_path, + outersortkeys, innersortkeys, + sjinfo); + + if (add_path_precheck(joinrel, + workspace.startup_cost, workspace.total_cost, + pathkeys, required_outer)) + { + /* KG: adjust to create CustomMergePath, instead of MergePath */ + CustomMergePath *cmpath; + MergePath *mpath + = create_mergejoin_path(root, + joinrel, + jointype, + &workspace, + sjinfo, + outer_path, + inner_path, + restrict_clauses, + pathkeys, + required_outer, + mergeclauses, + outersortkeys, + innersortkeys); + + /* adjust cost according to enable_(custom)_mergejoin GUCs */ + if (!enable_mergejoin && enable_custom_mergejoin) + { + mpath->jpath.path.startup_cost -= disable_cost; + mpath->jpath.path.total_cost -= disable_cost; + } + else if (enable_mergejoin && !enable_custom_mergejoin) + { + mpath->jpath.path.startup_cost += disable_cost; + mpath->jpath.path.total_cost += disable_cost; + } + + /* construct CustomMergePath object */ + cmpath = palloc0(sizeof(CustomMergePath)); + cmpath->cpath.path = mpath->jpath.path; + cmpath->cpath.path.type = T_CustomPath; + cmpath->cpath.path.pathtype = T_CustomPlan; + cmpath->cpath.methods = &custmj_path_methods; + cmpath->jointype = mpath->jpath.jointype; + cmpath->outerjoinpath = mpath->jpath.outerjoinpath; + cmpath->innerjoinpath = mpath->jpath.innerjoinpath; + cmpath->joinrestrictinfo = mpath->jpath.joinrestrictinfo; + cmpath->path_mergeclauses = mpath->path_mergeclauses; + cmpath->outersortkeys = mpath->outersortkeys; + cmpath->innersortkeys = mpath->innersortkeys; + cmpath->materialize_inner = mpath->materialize_inner; + + add_path(joinrel, &cmpath->cpath.path); + } + else + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + } +} + +/* + * clause_sides_match_join + * Determine whether a join clause is of the right form to use in this join. + * + * We already know that the clause is a binary opclause referencing only the + * rels in the current join. The point here is to check whether it has the + * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", + * rather than mixing outer and inner vars on either side. If it matches, + * we set the transient flag outer_is_left to identify which side is which. + */ +static inline bool +clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, + RelOptInfo *innerrel) +{ + if (bms_is_subset(rinfo->left_relids, outerrel->relids) && + bms_is_subset(rinfo->right_relids, innerrel->relids)) + { + /* lefthand side is outer */ + rinfo->outer_is_left = true; + return true; + } + else if (bms_is_subset(rinfo->left_relids, innerrel->relids) && + bms_is_subset(rinfo->right_relids, outerrel->relids)) + { + /* righthand side is outer */ + rinfo->outer_is_left = false; + return true; + } + return false; /* no good for these input relations */ +} + +/* + * sort_inner_and_outer + * Create mergejoin join paths by explicitly sorting both the outer and + * inner join relations on each available merge ordering. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'restrictlist' contains all of the RestrictInfo nodes for restriction + * clauses that apply to this join + * 'mergeclause_list' is a list of RestrictInfo nodes for available + * mergejoin clauses in this join + * 'jointype' is the type of join to do + * 'sjinfo' is extra info about the join for selectivity estimation + * 'param_source_rels' are OK targets for parameterization of result paths + * 'extra_lateral_rels' are additional parameterization for result paths + */ +void +sort_inner_and_outer(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + List *mergeclause_list, + JoinType jointype, + SpecialJoinInfo *sjinfo, + Relids param_source_rels, + Relids extra_lateral_rels) +{ + Path *outer_path; + Path *inner_path; + List *all_pathkeys; + ListCell *l; + + /* + * We only consider the cheapest-total-cost input paths, since we are + * assuming here that a sort is required. We will consider + * cheapest-startup-cost input paths later, and only if they don't need a + * sort. + * + * This function intentionally does not consider parameterized input + * paths, except when the cheapest-total is parameterized. If we did so, + * we'd have a combinatorial explosion of mergejoin paths of dubious + * value. This interacts with decisions elsewhere that also discriminate + * against mergejoins with parameterized inputs; see comments in + * src/backend/optimizer/README. + */ + outer_path = outerrel->cheapest_total_path; + inner_path = innerrel->cheapest_total_path; + + /* + * If either cheapest-total path is parameterized by the other rel, we + * can't use a mergejoin. (There's no use looking for alternative input + * paths, since these should already be the least-parameterized available + * paths.) + */ + if (PATH_PARAM_BY_REL(outer_path, innerrel) || + PATH_PARAM_BY_REL(inner_path, outerrel)) + return; + + /* + * If unique-ification is requested, do it and then handle as a plain + * inner join. + */ + if (jointype == JOIN_UNIQUE_OUTER) + { + outer_path = (Path *) create_unique_path(root, outerrel, + outer_path, sjinfo); + Assert(outer_path); + jointype = JOIN_INNER; + } + else if (jointype == JOIN_UNIQUE_INNER) + { + inner_path = (Path *) create_unique_path(root, innerrel, + inner_path, sjinfo); + Assert(inner_path); + jointype = JOIN_INNER; + } + + /* + * Each possible ordering of the available mergejoin clauses will generate + * a differently-sorted result path at essentially the same cost. We have + * no basis for choosing one over another at this level of joining, but + * some sort orders may be more useful than others for higher-level + * mergejoins, so it's worth considering multiple orderings. + * + * Actually, it's not quite true that every mergeclause ordering will + * generate a different path order, because some of the clauses may be + * partially redundant (refer to the same EquivalenceClasses). Therefore, + * what we do is convert the mergeclause list to a list of canonical + * pathkeys, and then consider different orderings of the pathkeys. + * + * Generating a path for *every* permutation of the pathkeys doesn't seem + * like a winning strategy; the cost in planning time is too high. For + * now, we generate one path for each pathkey, listing that pathkey first + * and the rest in random order. This should allow at least a one-clause + * mergejoin without re-sorting against any other possible mergejoin + * partner path. But if we've not guessed the right ordering of secondary + * keys, we may end up evaluating clauses as qpquals when they could have + * been done as mergeclauses. (In practice, it's rare that there's more + * than two or three mergeclauses, so expending a huge amount of thought + * on that is probably not worth it.) + * + * The pathkey order returned by select_outer_pathkeys_for_merge() has + * some heuristics behind it (see that function), so be sure to try it + * exactly as-is as well as making variants. + */ + all_pathkeys = select_outer_pathkeys_for_merge(root, + mergeclause_list, + joinrel); + + foreach(l, all_pathkeys) + { + List *front_pathkey = (List *) lfirst(l); + List *cur_mergeclauses; + List *outerkeys; + List *innerkeys; + List *merge_pathkeys; + + /* Make a pathkey list with this guy first */ + if (l != list_head(all_pathkeys)) + outerkeys = lcons(front_pathkey, + list_delete_ptr(list_copy(all_pathkeys), + front_pathkey)); + else + outerkeys = all_pathkeys; /* no work at first one... */ + + /* Sort the mergeclauses into the corresponding ordering */ + cur_mergeclauses = find_mergeclauses_for_pathkeys(root, + outerkeys, + true, + mergeclause_list); + + /* Should have used them all... */ + Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list)); + + /* Build sort pathkeys for the inner side */ + innerkeys = make_inner_pathkeys_for_merge(root, + cur_mergeclauses, + outerkeys); + + /* Build pathkeys representing output sort order */ + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, + outerkeys); + + /* + * And now we can make the path. + * + * Note: it's possible that the cheapest paths will already be sorted + * properly. try_mergejoin_path will detect that case and suppress an + * explicit sort step, so we needn't do so here. + */ + try_mergejoin_path(root, + joinrel, + jointype, + sjinfo, + param_source_rels, + extra_lateral_rels, + outer_path, + inner_path, + restrictlist, + merge_pathkeys, + cur_mergeclauses, + outerkeys, + innerkeys); + } +} + +/* + * match_unsorted_outer + * Creates possible join paths for processing a single join relation + * 'joinrel' by employing either iterative substitution or + * mergejoining on each of its possible outer paths (considering + * only outer paths that are already ordered well enough for merging). + * + * We always generate a nestloop path for each available outer path. + * In fact we may generate as many as five: one on the cheapest-total-cost + * inner path, one on the same with materialization, one on the + * cheapest-startup-cost inner path (if different), one on the + * cheapest-total inner-indexscan path (if any), and one on the + * cheapest-startup inner-indexscan path (if different). + * + * We also consider mergejoins if mergejoin clauses are available. We have + * two ways to generate the inner path for a mergejoin: sort the cheapest + * inner path, or use an inner path that is already suitably ordered for the + * merge. If we have several mergeclauses, it could be that there is no inner + * path (or only a very expensive one) for the full list of mergeclauses, but + * better paths exist if we truncate the mergeclause list (thereby discarding + * some sort key requirements). So, we consider truncations of the + * mergeclause list as well as the full list. (Ideally we'd consider all + * subsets of the mergeclause list, but that seems way too expensive.) + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'restrictlist' contains all of the RestrictInfo nodes for restriction + * clauses that apply to this join + * 'mergeclause_list' is a list of RestrictInfo nodes for available + * mergejoin clauses in this join + * 'jointype' is the type of join to do + * 'sjinfo' is extra info about the join for selectivity estimation + * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'param_source_rels' are OK targets for parameterization of result paths + * 'extra_lateral_rels' are additional parameterization for result paths + */ +void +match_unsorted_outer(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + List *mergeclause_list, + JoinType jointype, + SpecialJoinInfo *sjinfo, + SemiAntiJoinFactors *semifactors, + Relids param_source_rels, + Relids extra_lateral_rels) +{ + JoinType save_jointype = jointype; + bool nestjoinOK; + bool useallclauses; + Path *inner_cheapest_total = innerrel->cheapest_total_path; + Path *matpath = NULL; + ListCell *lc1; + + /* + * Nestloop only supports inner, left, semi, and anti joins. Also, if we + * are doing a right or full mergejoin, we must use *all* the mergeclauses + * as join clauses, else we will not have a valid plan. (Although these + * two flags are currently inverses, keep them separate for clarity and + * possible future changes.) + */ + switch (jointype) + { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_SEMI: + case JOIN_ANTI: + nestjoinOK = true; + useallclauses = false; + break; + case JOIN_RIGHT: + case JOIN_FULL: + nestjoinOK = false; + useallclauses = true; + break; + case JOIN_UNIQUE_OUTER: + case JOIN_UNIQUE_INNER: + jointype = JOIN_INNER; + nestjoinOK = true; + useallclauses = false; + break; + default: + elog(ERROR, "unrecognized join type: %d", + (int) jointype); + nestjoinOK = false; /* keep compiler quiet */ + useallclauses = false; + break; + } + + /* + * If inner_cheapest_total is parameterized by the outer rel, ignore it; + * we will consider it below as a member of cheapest_parameterized_paths, + * but the other possibilities considered in this routine aren't usable. + */ + if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel)) + inner_cheapest_total = NULL; + + /* + * If we need to unique-ify the inner path, we will consider only the + * cheapest-total inner. + */ + if (save_jointype == JOIN_UNIQUE_INNER) + { + /* No way to do this with an inner path parameterized by outer rel */ + if (inner_cheapest_total == NULL) + return; + inner_cheapest_total = (Path *) + create_unique_path(root, innerrel, inner_cheapest_total, sjinfo); + Assert(inner_cheapest_total); + } + else if (nestjoinOK) + { + /* + * Consider materializing the cheapest inner path, unless + * enable_material is off or the path in question materializes its + * output anyway. + */ + if (enable_material && inner_cheapest_total != NULL && + !ExecMaterializesOutput(inner_cheapest_total->pathtype)) + matpath = (Path *) + create_material_path(innerrel, inner_cheapest_total); + } + + foreach(lc1, outerrel->pathlist) + { + Path *outerpath = (Path *) lfirst(lc1); + List *merge_pathkeys; + List *mergeclauses; + List *innersortkeys; + List *trialsortkeys; + Path *cheapest_startup_inner; + Path *cheapest_total_inner; + int num_sortkeys; + int sortkeycnt; + + /* + * We cannot use an outer path that is parameterized by the inner rel. + */ + if (PATH_PARAM_BY_REL(outerpath, innerrel)) + continue; + + /* + * If we need to unique-ify the outer path, it's pointless to consider + * any but the cheapest outer. (XXX we don't consider parameterized + * outers, nor inners, for unique-ified cases. Should we?) + */ + if (save_jointype == JOIN_UNIQUE_OUTER) + { + if (outerpath != outerrel->cheapest_total_path) + continue; + outerpath = (Path *) create_unique_path(root, outerrel, + outerpath, sjinfo); + Assert(outerpath); + } + + /* + * The result will have this sort order (even if it is implemented as + * a nestloop, and even if some of the mergeclauses are implemented by + * qpquals rather than as true mergeclauses): + */ + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, + outerpath->pathkeys); + + if (save_jointype == JOIN_UNIQUE_INNER) + { + /* + * Consider nestloop join, but only with the unique-ified cheapest + * inner path + */ + try_nestloop_path(root, + joinrel, + jointype, + sjinfo, + semifactors, + param_source_rels, + extra_lateral_rels, + outerpath, + inner_cheapest_total, + restrictlist, + merge_pathkeys); + } + else if (nestjoinOK) + { + /* + * Consider nestloop joins using this outer path and various + * available paths for the inner relation. We consider the + * cheapest-total paths for each available parameterization of the + * inner relation, including the unparameterized case. + */ + ListCell *lc2; + + foreach(lc2, innerrel->cheapest_parameterized_paths) + { + Path *innerpath = (Path *) lfirst(lc2); + + try_nestloop_path(root, + joinrel, + jointype, + sjinfo, + semifactors, + param_source_rels, + extra_lateral_rels, + outerpath, + innerpath, + restrictlist, + merge_pathkeys); + } + + /* Also consider materialized form of the cheapest inner path */ + if (matpath != NULL) + try_nestloop_path(root, + joinrel, + jointype, + sjinfo, + semifactors, + param_source_rels, + extra_lateral_rels, + outerpath, + matpath, + restrictlist, + merge_pathkeys); + } + + /* Can't do anything else if outer path needs to be unique'd */ + if (save_jointype == JOIN_UNIQUE_OUTER) + continue; + + /* Can't do anything else if inner rel is parameterized by outer */ + if (inner_cheapest_total == NULL) + continue; + + /* Look for useful mergeclauses (if any) */ + mergeclauses = find_mergeclauses_for_pathkeys(root, + outerpath->pathkeys, + true, + mergeclause_list); + + /* + * Done with this outer path if no chance for a mergejoin. + * + * Special corner case: for "x FULL JOIN y ON true", there will be no + * join clauses at all. Ordinarily we'd generate a clauseless + * nestloop path, but since mergejoin is our only join type that + * supports FULL JOIN without any join clauses, it's necessary to + * generate a clauseless mergejoin path instead. + */ + if (mergeclauses == NIL) + { + if (jointype == JOIN_FULL) + /* okay to try for mergejoin */ ; + else + continue; + } + if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list)) + continue; + + /* Compute the required ordering of the inner path */ + innersortkeys = make_inner_pathkeys_for_merge(root, + mergeclauses, + outerpath->pathkeys); + + /* + * Generate a mergejoin on the basis of sorting the cheapest inner. + * Since a sort will be needed, only cheapest total cost matters. (But + * try_mergejoin_path will do the right thing if inner_cheapest_total + * is already correctly sorted.) + */ + try_mergejoin_path(root, + joinrel, + jointype, + sjinfo, + param_source_rels, + extra_lateral_rels, + outerpath, + inner_cheapest_total, + restrictlist, + merge_pathkeys, + mergeclauses, + NIL, + innersortkeys); + + /* Can't do anything else if inner path needs to be unique'd */ + if (save_jointype == JOIN_UNIQUE_INNER) + continue; + + /* + * Look for presorted inner paths that satisfy the innersortkey list + * --- or any truncation thereof, if we are allowed to build a + * mergejoin using a subset of the merge clauses. Here, we consider + * both cheap startup cost and cheap total cost. + * + * Currently we do not consider parameterized inner paths here. This + * interacts with decisions elsewhere that also discriminate against + * mergejoins with parameterized inputs; see comments in + * src/backend/optimizer/README. + * + * As we shorten the sortkey list, we should consider only paths that + * are strictly cheaper than (in particular, not the same as) any path + * found in an earlier iteration. Otherwise we'd be intentionally + * using fewer merge keys than a given path allows (treating the rest + * as plain joinquals), which is unlikely to be a good idea. Also, + * eliminating paths here on the basis of compare_path_costs is a lot + * cheaper than building the mergejoin path only to throw it away. + * + * If inner_cheapest_total is well enough sorted to have not required + * a sort in the path made above, we shouldn't make a duplicate path + * with it, either. We handle that case with the same logic that + * handles the previous consideration, by initializing the variables + * that track cheapest-so-far properly. Note that we do NOT reject + * inner_cheapest_total if we find it matches some shorter set of + * pathkeys. That case corresponds to using fewer mergekeys to avoid + * sorting inner_cheapest_total, whereas we did sort it above, so the + * plans being considered are different. + */ + if (pathkeys_contained_in(innersortkeys, + inner_cheapest_total->pathkeys)) + { + /* inner_cheapest_total didn't require a sort */ + cheapest_startup_inner = inner_cheapest_total; + cheapest_total_inner = inner_cheapest_total; + } + else + { + /* it did require a sort, at least for the full set of keys */ + cheapest_startup_inner = NULL; + cheapest_total_inner = NULL; + } + num_sortkeys = list_length(innersortkeys); + if (num_sortkeys > 1 && !useallclauses) + trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ + else + trialsortkeys = innersortkeys; /* won't really truncate */ + + for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) + { + Path *innerpath; + List *newclauses = NIL; + + /* + * Look for an inner path ordered well enough for the first + * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified + * destructively, which is why we made a copy... + */ + trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + TOTAL_COST); + if (innerpath != NULL && + (cheapest_total_inner == NULL || + compare_path_costs(innerpath, cheapest_total_inner, + TOTAL_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted path */ + /* Select the right mergeclauses, if we didn't already */ + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + false, + mergeclauses); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; + try_mergejoin_path(root, + joinrel, + jointype, + sjinfo, + param_source_rels, + extra_lateral_rels, + outerpath, + innerpath, + restrictlist, + merge_pathkeys, + newclauses, + NIL, + NIL); + cheapest_total_inner = innerpath; + } + /* Same on the basis of cheapest startup cost ... */ + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + STARTUP_COST); + if (innerpath != NULL && + (cheapest_startup_inner == NULL || + compare_path_costs(innerpath, cheapest_startup_inner, + STARTUP_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted path */ + if (innerpath != cheapest_total_inner) + { + /* + * Avoid rebuilding clause list if we already made one; + * saves memory in big join trees... + */ + if (newclauses == NIL) + { + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + false, + mergeclauses); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; + } + try_mergejoin_path(root, + joinrel, + jointype, + sjinfo, + param_source_rels, + extra_lateral_rels, + outerpath, + innerpath, + restrictlist, + merge_pathkeys, + newclauses, + NIL, + NIL); + } + cheapest_startup_inner = innerpath; + } + + /* + * Don't consider truncated sortkeys if we need all clauses. + */ + if (useallclauses) + break; + } + } +} + +/* + * select_mergejoin_clauses + * Select mergejoin clauses that are usable for a particular join. + * Returns a list of RestrictInfo nodes for those clauses. + * + * *mergejoin_allowed is normally set to TRUE, but it is set to FALSE if + * this is a right/full join and there are nonmergejoinable join clauses. + * The executor's mergejoin machinery cannot handle such cases, so we have + * to avoid generating a mergejoin plan. (Note that this flag does NOT + * consider whether there are actually any mergejoinable clauses. This is + * correct because in some cases we need to build a clauseless mergejoin. + * Simply returning NIL is therefore not enough to distinguish safe from + * unsafe cases.) + * + * We also mark each selected RestrictInfo to show which side is currently + * being considered as outer. These are transient markings that are only + * good for the duration of the current add_paths_to_joinrel() call! + * + * We examine each restrictinfo clause known for the join to see + * if it is mergejoinable and involves vars from the two sub-relations + * currently of interest. + */ +List * +select_mergejoin_clauses(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype, + bool *mergejoin_allowed) +{ + List *result_list = NIL; + bool isouterjoin = IS_OUTER_JOIN(jointype); + bool have_nonmergeable_joinclause = false; + ListCell *l; + + foreach(l, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* + * If processing an outer join, only use its own join clauses in the + * merge. For inner joins we can use pushed-down clauses too. (Note: + * we don't set have_nonmergeable_joinclause here because pushed-down + * clauses will become otherquals not joinquals.) + */ + if (isouterjoin && restrictinfo->is_pushed_down) + continue; + + /* Check that clause is a mergeable operator clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + { + /* + * The executor can handle extra joinquals that are constants, but + * not anything else, when doing right/full merge join. (The + * reason to support constants is so we can do FULL JOIN ON + * FALSE.) + */ + if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const)) + have_nonmergeable_joinclause = true; + continue; /* not mergejoinable */ + } + + /* + * Check if clause has the form "outer op inner" or "inner op outer". + */ + if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) + { + have_nonmergeable_joinclause = true; + continue; /* no good for these input relations */ + } + + /* + * Insist that each side have a non-redundant eclass. This + * restriction is needed because various bits of the planner expect + * that each clause in a merge be associatable with some pathkey in a + * canonical pathkey list, but redundant eclasses can't appear in + * canonical sort orderings. (XXX it might be worth relaxing this, + * but not enough time to address it for 8.3.) + * + * Note: it would be bad if this condition failed for an otherwise + * mergejoinable FULL JOIN clause, since that would result in + * undesirable planner failure. I believe that is not possible + * however; a variable involved in a full join could only appear in + * below_outer_join eclasses, which aren't considered redundant. + * + * This case *can* happen for left/right join clauses: the outer-side + * variable could be equated to a constant. Because we will propagate + * that constant across the join clause, the loss of ability to do a + * mergejoin is not really all that big a deal, and so it's not clear + * that improving this is important. + */ + update_mergeclause_eclasses(root, restrictinfo); + + if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) || + EC_MUST_BE_REDUNDANT(restrictinfo->right_ec)) + { + have_nonmergeable_joinclause = true; + continue; /* can't handle redundant eclasses */ + } + + result_list = lappend(result_list, restrictinfo); + } + + /* + * Report whether mergejoin is allowed (see comment at top of function). + */ + switch (jointype) + { + case JOIN_RIGHT: + case JOIN_FULL: + *mergejoin_allowed = !have_nonmergeable_joinclause; + break; + default: + *mergejoin_allowed = true; + break; + } + + return result_list; +} diff --git a/contrib/custmj/nodeMergejoin.c b/contrib/custmj/nodeMergejoin.c new file mode 100644 index 0000000..62dd8c0 --- /dev/null +++ b/contrib/custmj/nodeMergejoin.c @@ -0,0 +1,1694 @@ +/*------------------------------------------------------------------------- + * + * nodeMergejoin.c + * routines supporting merge joins + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeMergejoin.c + * + *------------------------------------------------------------------------- + */ +/* + * INTERFACE ROUTINES + * ExecMergeJoin mergejoin outer and inner relations. + * ExecInitMergeJoin creates and initializes run time states + * ExecEndMergeJoin cleans up the node. + * + * NOTES + * + * Merge-join is done by joining the inner and outer tuples satisfying + * join clauses of the form ((= outerKey innerKey) ...). + * The join clause list is provided by the query planner and may contain + * more than one (= outerKey innerKey) clause (for composite sort key). + * + * However, the query executor needs to know whether an outer + * tuple is "greater/smaller" than an inner tuple so that it can + * "synchronize" the two relations. For example, consider the following + * relations: + * + * outer: (0 ^1 1 2 5 5 5 6 6 7) current tuple: 1 + * inner: (1 ^3 5 5 5 5 6) current tuple: 3 + * + * To continue the merge-join, the executor needs to scan both inner + * and outer relations till the matching tuples 5. It needs to know + * that currently inner tuple 3 is "greater" than outer tuple 1 and + * therefore it should scan the outer relation first to find a + * matching tuple and so on. + * + * Therefore, rather than directly executing the merge join clauses, + * we evaluate the left and right key expressions separately and then + * compare the columns one at a time (see MJCompare). The planner + * passes us enough information about the sort ordering of the inputs + * to allow us to determine how to make the comparison. We may use the + * appropriate btree comparison function, since Postgres' only notion + * of ordering is specified by btree opfamilies. + * + * + * Consider the above relations and suppose that the executor has + * just joined the first outer "5" with the last inner "5". The + * next step is of course to join the second outer "5" with all + * the inner "5's". This requires repositioning the inner "cursor" + * to point at the first inner "5". This is done by "marking" the + * first inner 5 so we can restore the "cursor" to it before joining + * with the second outer 5. The access method interface provides + * routines to mark and restore to a tuple. + * + * + * Essential operation of the merge join algorithm is as follows: + * + * Join { + * get initial outer and inner tuples INITIALIZE + * do forever { + * while (outer != inner) { SKIP_TEST + * if (outer < inner) + * advance outer SKIPOUTER_ADVANCE + * else + * advance inner SKIPINNER_ADVANCE + * } + * mark inner position SKIP_TEST + * do forever { + * while (outer == inner) { + * join tuples JOINTUPLES + * advance inner position NEXTINNER + * } + * advance outer position NEXTOUTER + * if (outer == mark) TESTOUTER + * restore inner position to mark TESTOUTER + * else + * break // return to top of outer loop + * } + * } + * } + * + * The merge join operation is coded in the fashion + * of a state machine. At each state, we do something and then + * proceed to another state. This state is stored in the node's + * execution state information and is preserved across calls to + * ExecMergeJoin. -cim 10/31/89 + */ +#include "postgres.h" + +#include "access/nbtree.h" +#include "executor/execdebug.h" +/* #include "executor/nodeMergejoin.h" */ +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "custmj.h" + +/* + * States of the ExecMergeJoin state machine + */ +#define EXEC_MJ_INITIALIZE_OUTER 1 +#define EXEC_MJ_INITIALIZE_INNER 2 +#define EXEC_MJ_JOINTUPLES 3 +#define EXEC_MJ_NEXTOUTER 4 +#define EXEC_MJ_TESTOUTER 5 +#define EXEC_MJ_NEXTINNER 6 +#define EXEC_MJ_SKIP_TEST 7 +#define EXEC_MJ_SKIPOUTER_ADVANCE 8 +#define EXEC_MJ_SKIPINNER_ADVANCE 9 +#define EXEC_MJ_ENDOUTER 10 +#define EXEC_MJ_ENDINNER 11 + +/* + * Runtime data for each mergejoin clause + */ +typedef struct MergeJoinClauseData +{ + /* Executable expression trees */ + ExprState *lexpr; /* left-hand (outer) input expression */ + ExprState *rexpr; /* right-hand (inner) input expression */ + + /* + * If we have a current left or right input tuple, the values of the + * expressions are loaded into these fields: + */ + Datum ldatum; /* current left-hand value */ + Datum rdatum; /* current right-hand value */ + bool lisnull; /* and their isnull flags */ + bool risnull; + + /* + * Everything we need to know to compare the left and right values is + * stored here. + */ + SortSupportData ssup; +} MergeJoinClauseData; + +/* Result type for MJEvalOuterValues and MJEvalInnerValues */ +typedef enum +{ + MJEVAL_MATCHABLE, /* normal, potentially matchable tuple */ + MJEVAL_NONMATCHABLE, /* tuple cannot join because it has a null */ + MJEVAL_ENDOFJOIN /* end of input (physical or effective) */ +} MJEvalResult; + + +#define MarkInnerTuple(innerTupleSlot, mergestate) \ + ExecCopySlot((mergestate)->mj_MarkedTupleSlot, (innerTupleSlot)) + + +/* + * MJExamineQuals + * + * This deconstructs the list of mergejoinable expressions, which is given + * to us by the planner in the form of a list of "leftexpr = rightexpr" + * expression trees in the order matching the sort columns of the inputs. + * We build an array of MergeJoinClause structs containing the information + * we will need at runtime. Each struct essentially tells us how to compare + * the two expressions from the original clause. + * + * In addition to the expressions themselves, the planner passes the btree + * opfamily OID, collation OID, btree strategy number (BTLessStrategyNumber or + * BTGreaterStrategyNumber), and nulls-first flag that identify the intended + * sort ordering for each merge key. The mergejoinable operator is an + * equality operator in the opfamily, and the two inputs are guaranteed to be + * ordered in either increasing or decreasing (respectively) order according + * to the opfamily and collation, with nulls at the indicated end of the range. + * This allows us to obtain the needed comparison function from the opfamily. + */ +static MergeJoinClause +MJExamineQuals(List *mergeclauses, + Oid *mergefamilies, + Oid *mergecollations, + int *mergestrategies, + bool *mergenullsfirst, + PlanState *parent) +{ + MergeJoinClause clauses; + int nClauses = list_length(mergeclauses); + int iClause; + ListCell *cl; + + clauses = (MergeJoinClause) palloc0(nClauses * sizeof(MergeJoinClauseData)); + + iClause = 0; + foreach(cl, mergeclauses) + { + OpExpr *qual = (OpExpr *) lfirst(cl); + MergeJoinClause clause = &clauses[iClause]; + Oid opfamily = mergefamilies[iClause]; + Oid collation = mergecollations[iClause]; + StrategyNumber opstrategy = mergestrategies[iClause]; + bool nulls_first = mergenullsfirst[iClause]; + int op_strategy; + Oid op_lefttype; + Oid op_righttype; + Oid sortfunc; + + if (!IsA(qual, OpExpr)) + elog(ERROR, "mergejoin clause is not an OpExpr"); + + /* + * Prepare the input expressions for execution. + */ + clause->lexpr = ExecInitExpr((Expr *) linitial(qual->args), parent); + clause->rexpr = ExecInitExpr((Expr *) lsecond(qual->args), parent); + + /* Set up sort support data */ + clause->ssup.ssup_cxt = CurrentMemoryContext; + clause->ssup.ssup_collation = collation; + if (opstrategy == BTLessStrategyNumber) + clause->ssup.ssup_reverse = false; + else if (opstrategy == BTGreaterStrategyNumber) + clause->ssup.ssup_reverse = true; + else /* planner screwed up */ + elog(ERROR, "unsupported mergejoin strategy %d", opstrategy); + clause->ssup.ssup_nulls_first = nulls_first; + + /* Extract the operator's declared left/right datatypes */ + get_op_opfamily_properties(qual->opno, opfamily, false, + &op_strategy, + &op_lefttype, + &op_righttype); + if (op_strategy != BTEqualStrategyNumber) /* should not happen */ + elog(ERROR, "cannot merge using non-equality operator %u", + qual->opno); + + /* And get the matching support or comparison function */ + sortfunc = get_opfamily_proc(opfamily, + op_lefttype, + op_righttype, + BTSORTSUPPORT_PROC); + if (OidIsValid(sortfunc)) + { + /* The sort support function should provide a comparator */ + OidFunctionCall1(sortfunc, PointerGetDatum(&clause->ssup)); + Assert(clause->ssup.comparator != NULL); + } + else + { + /* opfamily doesn't provide sort support, get comparison func */ + sortfunc = get_opfamily_proc(opfamily, + op_lefttype, + op_righttype, + BTORDER_PROC); + if (!OidIsValid(sortfunc)) /* should not happen */ + elog(ERROR, "missing support function %d(%u,%u) in opfamily %u", + BTORDER_PROC, op_lefttype, op_righttype, opfamily); + /* We'll use a shim to call the old-style btree comparator */ + PrepareSortSupportComparisonShim(sortfunc, &clause->ssup); + } + + iClause++; + } + + return clauses; +} + +/* + * MJEvalOuterValues + * + * Compute the values of the mergejoined expressions for the current + * outer tuple. We also detect whether it's impossible for the current + * outer tuple to match anything --- this is true if it yields a NULL + * input, since we assume mergejoin operators are strict. If the NULL + * is in the first join column, and that column sorts nulls last, then + * we can further conclude that no following tuple can match anything + * either, since they must all have nulls in the first column. However, + * that case is only interesting if we're not in FillOuter mode, else + * we have to visit all the tuples anyway. + * + * For the convenience of callers, we also make this routine responsible + * for testing for end-of-input (null outer tuple), and returning + * MJEVAL_ENDOFJOIN when that's seen. This allows the same code to be used + * for both real end-of-input and the effective end-of-input represented by + * a first-column NULL. + * + * We evaluate the values in OuterEContext, which can be reset each + * time we move to a new tuple. + */ +static MJEvalResult +MJEvalOuterValues(CustomMergeJoinState *mergestate) +{ + ExprContext *econtext = mergestate->mj_OuterEContext; + MJEvalResult result = MJEVAL_MATCHABLE; + int i; + MemoryContext oldContext; + + /* Check for end of outer subplan */ + if (TupIsNull(mergestate->mj_OuterTupleSlot)) + return MJEVAL_ENDOFJOIN; + + ResetExprContext(econtext); + + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + econtext->ecxt_outertuple = mergestate->mj_OuterTupleSlot; + + for (i = 0; i < mergestate->mj_NumClauses; i++) + { + MergeJoinClause clause = &mergestate->mj_Clauses[i]; + + clause->ldatum = ExecEvalExpr(clause->lexpr, econtext, + &clause->lisnull, NULL); + if (clause->lisnull) + { + /* match is impossible; can we end the join early? */ + if (i == 0 && !clause->ssup.ssup_nulls_first && + !mergestate->mj_FillOuter) + result = MJEVAL_ENDOFJOIN; + else if (result == MJEVAL_MATCHABLE) + result = MJEVAL_NONMATCHABLE; + } + } + + MemoryContextSwitchTo(oldContext); + + return result; +} + +/* + * MJEvalInnerValues + * + * Same as above, but for the inner tuple. Here, we have to be prepared + * to load data from either the true current inner, or the marked inner, + * so caller must tell us which slot to load from. + */ +static MJEvalResult +MJEvalInnerValues(CustomMergeJoinState *mergestate, TupleTableSlot *innerslot) +{ + ExprContext *econtext = mergestate->mj_InnerEContext; + MJEvalResult result = MJEVAL_MATCHABLE; + int i; + MemoryContext oldContext; + + /* Check for end of inner subplan */ + if (TupIsNull(innerslot)) + return MJEVAL_ENDOFJOIN; + + ResetExprContext(econtext); + + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + econtext->ecxt_innertuple = innerslot; + + for (i = 0; i < mergestate->mj_NumClauses; i++) + { + MergeJoinClause clause = &mergestate->mj_Clauses[i]; + + clause->rdatum = ExecEvalExpr(clause->rexpr, econtext, + &clause->risnull, NULL); + if (clause->risnull) + { + /* match is impossible; can we end the join early? */ + if (i == 0 && !clause->ssup.ssup_nulls_first && + !mergestate->mj_FillInner) + result = MJEVAL_ENDOFJOIN; + else if (result == MJEVAL_MATCHABLE) + result = MJEVAL_NONMATCHABLE; + } + } + + MemoryContextSwitchTo(oldContext); + + return result; +} + +/* + * MJCompare + * + * Compare the mergejoinable values of the current two input tuples + * and return 0 if they are equal (ie, the mergejoin equalities all + * succeed), >0 if outer > inner, <0 if outer < inner. + * + * MJEvalOuterValues and MJEvalInnerValues must already have been called + * for the current outer and inner tuples, respectively. + */ +static int +MJCompare(CustomMergeJoinState *mergestate) +{ + int result = 0; + bool nulleqnull = false; + ExprContext *econtext = mergestate->cps.ps.ps_ExprContext; + int i; + MemoryContext oldContext; + + /* + * Call the comparison functions in short-lived context, in case they leak + * memory. + */ + ResetExprContext(econtext); + + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + for (i = 0; i < mergestate->mj_NumClauses; i++) + { + MergeJoinClause clause = &mergestate->mj_Clauses[i]; + + /* + * Special case for NULL-vs-NULL, else use standard comparison. + */ + if (clause->lisnull && clause->risnull) + { + nulleqnull = true; /* NULL "=" NULL */ + continue; + } + + result = ApplySortComparator(clause->ldatum, clause->lisnull, + clause->rdatum, clause->risnull, + &clause->ssup); + + if (result != 0) + break; + } + + /* + * If we had any NULL-vs-NULL inputs, we do not want to report that the + * tuples are equal. Instead, if result is still 0, change it to +1. This + * will result in advancing the inner side of the join. + * + * Likewise, if there was a constant-false joinqual, do not report + * equality. We have to check this as part of the mergequals, else the + * rescan logic will do the wrong thing. + */ + if (result == 0 && + (nulleqnull || mergestate->mj_ConstFalseJoin)) + result = 1; + + MemoryContextSwitchTo(oldContext); + + return result; +} + + +/* + * Generate a fake join tuple with nulls for the inner tuple, + * and return it if it passes the non-join quals. + */ +static TupleTableSlot * +MJFillOuter(CustomMergeJoinState *node) +{ + ExprContext *econtext = node->cps.ps.ps_ExprContext; + List *otherqual = node->cps.ps.qual; + + ResetExprContext(econtext); + + econtext->ecxt_outertuple = node->mj_OuterTupleSlot; + econtext->ecxt_innertuple = node->mj_NullInnerTupleSlot; + + if (ExecQual(otherqual, econtext, false)) + { + /* + * qualification succeeded. now form the desired projection tuple and + * return the slot containing it. + */ + TupleTableSlot *result; + ExprDoneCond isDone; + + MJ_printf("ExecMergeJoin: returning outer fill tuple\n"); + + result = ExecProject(node->cps.ps.ps_ProjInfo, &isDone); + + if (isDone != ExprEndResult) + { + node->cps.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; + } + } + else + InstrCountFiltered2(node, 1); + + return NULL; +} + +/* + * Generate a fake join tuple with nulls for the outer tuple, + * and return it if it passes the non-join quals. + */ +static TupleTableSlot * +MJFillInner(CustomMergeJoinState *node) +{ + ExprContext *econtext = node->cps.ps.ps_ExprContext; + List *otherqual = node->cps.ps.qual; + + ResetExprContext(econtext); + + econtext->ecxt_outertuple = node->mj_NullOuterTupleSlot; + econtext->ecxt_innertuple = node->mj_InnerTupleSlot; + + if (ExecQual(otherqual, econtext, false)) + { + /* + * qualification succeeded. now form the desired projection tuple and + * return the slot containing it. + */ + TupleTableSlot *result; + ExprDoneCond isDone; + + MJ_printf("ExecMergeJoin: returning inner fill tuple\n"); + + result = ExecProject(node->cps.ps.ps_ProjInfo, &isDone); + + if (isDone != ExprEndResult) + { + node->cps.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; + } + } + else + InstrCountFiltered2(node, 1); + + return NULL; +} + + +/* + * Check that a qual condition is constant true or constant false. + * If it is constant false (or null), set *is_const_false to TRUE. + * + * Constant true would normally be represented by a NIL list, but we allow an + * actual bool Const as well. We do expect that the planner will have thrown + * away any non-constant terms that have been ANDed with a constant false. + */ +static bool +check_constant_qual(List *qual, bool *is_const_false) +{ + ListCell *lc; + + foreach(lc, qual) + { + Const *con = (Const *) lfirst(lc); + + if (!con || !IsA(con, Const)) + return false; + if (con->constisnull || !DatumGetBool(con->constvalue)) + *is_const_false = true; + } + return true; +} + + +/* ---------------------------------------------------------------- + * ExecMergeTupleDump + * + * This function is called through the MJ_dump() macro + * when EXEC_MERGEJOINDEBUG is defined + * ---------------------------------------------------------------- + */ +#ifdef EXEC_MERGEJOINDEBUG + +static void +ExecMergeTupleDumpOuter(MergeJoinState *mergestate) +{ + TupleTableSlot *outerSlot = mergestate->mj_OuterTupleSlot; + + printf("==== outer tuple ====\n"); + if (TupIsNull(outerSlot)) + printf("(nil)\n"); + else + MJ_debugtup(outerSlot); +} + +static void +ExecMergeTupleDumpInner(MergeJoinState *mergestate) +{ + TupleTableSlot *innerSlot = mergestate->mj_InnerTupleSlot; + + printf("==== inner tuple ====\n"); + if (TupIsNull(innerSlot)) + printf("(nil)\n"); + else + MJ_debugtup(innerSlot); +} + +static void +ExecMergeTupleDumpMarked(MergeJoinState *mergestate) +{ + TupleTableSlot *markedSlot = mergestate->mj_MarkedTupleSlot; + + printf("==== marked tuple ====\n"); + if (TupIsNull(markedSlot)) + printf("(nil)\n"); + else + MJ_debugtup(markedSlot); +} + +static void +ExecMergeTupleDump(MergeJoinState *mergestate) +{ + printf("******** ExecMergeTupleDump ********\n"); + + ExecMergeTupleDumpOuter(mergestate); + ExecMergeTupleDumpInner(mergestate); + ExecMergeTupleDumpMarked(mergestate); + + printf("******** \n"); +} +#endif + +/* ---------------------------------------------------------------- + * ExecMergeJoin + * ---------------------------------------------------------------- + */ +TupleTableSlot * +_ExecMergeJoin(CustomMergeJoinState *node) +{ + List *joinqual; + List *otherqual; + bool qualResult; + int compareResult; + PlanState *innerPlan; + TupleTableSlot *innerTupleSlot; + PlanState *outerPlan; + TupleTableSlot *outerTupleSlot; + ExprContext *econtext; + bool doFillOuter; + bool doFillInner; + + /* + * get information from node + */ + innerPlan = innerPlanState(node); + outerPlan = outerPlanState(node); + econtext = node->cps.ps.ps_ExprContext; + joinqual = node->joinqual; + otherqual = node->cps.ps.qual; + doFillOuter = node->mj_FillOuter; + doFillInner = node->mj_FillInner; + + /* + * Check to see if we're still projecting out tuples from a previous join + * tuple (because there is a function-returning-set in the projection + * expressions). If so, try to project another one. + */ + if (node->cps.ps.ps_TupFromTlist) + { + TupleTableSlot *result; + ExprDoneCond isDone; + + result = ExecProject(node->cps.ps.ps_ProjInfo, &isDone); + if (isDone == ExprMultipleResult) + return result; + /* Done with that source tuple... */ + node->cps.ps.ps_TupFromTlist = false; + } + + /* + * Reset per-tuple memory context to free any expression evaluation + * storage allocated in the previous tuple cycle. Note this can't happen + * until we're done projecting out tuples from a join tuple. + */ + ResetExprContext(econtext); + + /* + * ok, everything is setup.. let's go to work + */ + for (;;) + { + MJ_dump(node); + + /* + * get the current state of the join and do things accordingly. + */ + switch (node->mj_JoinState) + { + /* + * EXEC_MJ_INITIALIZE_OUTER means that this is the first time + * ExecMergeJoin() has been called and so we have to fetch the + * first matchable tuple for both outer and inner subplans. We + * do the outer side in INITIALIZE_OUTER state, then advance + * to INITIALIZE_INNER state for the inner subplan. + */ + case EXEC_MJ_INITIALIZE_OUTER: + MJ_printf("ExecMergeJoin: EXEC_MJ_INITIALIZE_OUTER\n"); + + outerTupleSlot = ExecProcNode(outerPlan); + node->mj_OuterTupleSlot = outerTupleSlot; + + /* Compute join values and check for unmatchability */ + switch (MJEvalOuterValues(node)) + { + case MJEVAL_MATCHABLE: + /* OK to go get the first inner tuple */ + node->mj_JoinState = EXEC_MJ_INITIALIZE_INNER; + break; + case MJEVAL_NONMATCHABLE: + /* Stay in same state to fetch next outer tuple */ + if (doFillOuter) + { + /* + * Generate a fake join tuple with nulls for the + * inner tuple, and return it if it passes the + * non-join quals. + */ + TupleTableSlot *result; + + result = MJFillOuter(node); + if (result) + return result; + } + break; + case MJEVAL_ENDOFJOIN: + /* No more outer tuples */ + MJ_printf("ExecMergeJoin: nothing in outer subplan\n"); + if (doFillInner) + { + /* + * Need to emit right-join tuples for remaining + * inner tuples. We set MatchedInner = true to + * force the ENDOUTER state to advance inner. + */ + node->mj_JoinState = EXEC_MJ_ENDOUTER; + node->mj_MatchedInner = true; + break; + } + /* Otherwise we're done. */ + return NULL; + } + break; + + case EXEC_MJ_INITIALIZE_INNER: + MJ_printf("ExecMergeJoin: EXEC_MJ_INITIALIZE_INNER\n"); + + innerTupleSlot = ExecProcNode(innerPlan); + node->mj_InnerTupleSlot = innerTupleSlot; + + /* Compute join values and check for unmatchability */ + switch (MJEvalInnerValues(node, innerTupleSlot)) + { + case MJEVAL_MATCHABLE: + + /* + * OK, we have the initial tuples. Begin by skipping + * non-matching tuples. + */ + node->mj_JoinState = EXEC_MJ_SKIP_TEST; + break; + case MJEVAL_NONMATCHABLE: + /* Mark before advancing, if wanted */ + if (node->mj_ExtraMarks) + ExecMarkPos(innerPlan); + /* Stay in same state to fetch next inner tuple */ + if (doFillInner) + { + /* + * Generate a fake join tuple with nulls for the + * outer tuple, and return it if it passes the + * non-join quals. + */ + TupleTableSlot *result; + + result = MJFillInner(node); + if (result) + return result; + } + break; + case MJEVAL_ENDOFJOIN: + /* No more inner tuples */ + MJ_printf("ExecMergeJoin: nothing in inner subplan\n"); + if (doFillOuter) + { + /* + * Need to emit left-join tuples for all outer + * tuples, including the one we just fetched. We + * set MatchedOuter = false to force the ENDINNER + * state to emit first tuple before advancing + * outer. + */ + node->mj_JoinState = EXEC_MJ_ENDINNER; + node->mj_MatchedOuter = false; + break; + } + /* Otherwise we're done. */ + return NULL; + } + break; + + /* + * EXEC_MJ_JOINTUPLES means we have two tuples which satisfied + * the merge clause so we join them and then proceed to get + * the next inner tuple (EXEC_MJ_NEXTINNER). + */ + case EXEC_MJ_JOINTUPLES: + MJ_printf("ExecMergeJoin: EXEC_MJ_JOINTUPLES\n"); + + /* + * Set the next state machine state. The right things will + * happen whether we return this join tuple or just fall + * through to continue the state machine execution. + */ + node->mj_JoinState = EXEC_MJ_NEXTINNER; + + /* + * Check the extra qual conditions to see if we actually want + * to return this join tuple. If not, can proceed with merge. + * We must distinguish the additional joinquals (which must + * pass to consider the tuples "matched" for outer-join logic) + * from the otherquals (which must pass before we actually + * return the tuple). + * + * We don't bother with a ResetExprContext here, on the + * assumption that we just did one while checking the merge + * qual. One per tuple should be sufficient. We do have to + * set up the econtext links to the tuples for ExecQual to + * use. + */ + outerTupleSlot = node->mj_OuterTupleSlot; + econtext->ecxt_outertuple = outerTupleSlot; + innerTupleSlot = node->mj_InnerTupleSlot; + econtext->ecxt_innertuple = innerTupleSlot; + + qualResult = (joinqual == NIL || + ExecQual(joinqual, econtext, false)); + MJ_DEBUG_QUAL(joinqual, qualResult); + + if (qualResult) + { + node->mj_MatchedOuter = true; + node->mj_MatchedInner = true; + + /* In an antijoin, we never return a matched tuple */ + if (node->jointype == JOIN_ANTI) + { + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + break; + } + + /* + * In a semijoin, we'll consider returning the first + * match, but after that we're done with this outer tuple. + */ + if (node->jointype == JOIN_SEMI) + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + + qualResult = (otherqual == NIL || + ExecQual(otherqual, econtext, false)); + MJ_DEBUG_QUAL(otherqual, qualResult); + + if (qualResult) + { + /* + * qualification succeeded. now form the desired + * projection tuple and return the slot containing it. + */ + TupleTableSlot *result; + ExprDoneCond isDone; + + MJ_printf("ExecMergeJoin: returning tuple\n"); + + result = ExecProject(node->cps.ps.ps_ProjInfo, + &isDone); + + if (isDone != ExprEndResult) + { + node->cps.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; + } + } + else + InstrCountFiltered2(node, 1); + } + else + InstrCountFiltered1(node, 1); + break; + + /* + * EXEC_MJ_NEXTINNER means advance the inner scan to the next + * tuple. If the tuple is not nil, we then proceed to test it + * against the join qualification. + * + * Before advancing, we check to see if we must emit an + * outer-join fill tuple for this inner tuple. + */ + case EXEC_MJ_NEXTINNER: + MJ_printf("ExecMergeJoin: EXEC_MJ_NEXTINNER\n"); + + if (doFillInner && !node->mj_MatchedInner) + { + /* + * Generate a fake join tuple with nulls for the outer + * tuple, and return it if it passes the non-join quals. + */ + TupleTableSlot *result; + + node->mj_MatchedInner = true; /* do it only once */ + + result = MJFillInner(node); + if (result) + return result; + } + + /* + * now we get the next inner tuple, if any. If there's none, + * advance to next outer tuple (which may be able to join to + * previously marked tuples). + * + * NB: must NOT do "extraMarks" here, since we may need to + * return to previously marked tuples. + */ + innerTupleSlot = ExecProcNode(innerPlan); + node->mj_InnerTupleSlot = innerTupleSlot; + MJ_DEBUG_PROC_NODE(innerTupleSlot); + node->mj_MatchedInner = false; + + /* Compute join values and check for unmatchability */ + switch (MJEvalInnerValues(node, innerTupleSlot)) + { + case MJEVAL_MATCHABLE: + + /* + * Test the new inner tuple to see if it matches + * outer. + * + * If they do match, then we join them and move on to + * the next inner tuple (EXEC_MJ_JOINTUPLES). + * + * If they do not match then advance to next outer + * tuple. + */ + compareResult = MJCompare(node); + MJ_DEBUG_COMPARE(compareResult); + + if (compareResult == 0) + node->mj_JoinState = EXEC_MJ_JOINTUPLES; + else + { + Assert(compareResult < 0); + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + } + break; + case MJEVAL_NONMATCHABLE: + + /* + * It contains a NULL and hence can't match any outer + * tuple, so we can skip the comparison and assume the + * new tuple is greater than current outer. + */ + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + break; + case MJEVAL_ENDOFJOIN: + + /* + * No more inner tuples. However, this might be only + * effective and not physical end of inner plan, so + * force mj_InnerTupleSlot to null to make sure we + * don't fetch more inner tuples. (We need this hack + * because we are not transiting to a state where the + * inner plan is assumed to be exhausted.) + */ + node->mj_InnerTupleSlot = NULL; + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + break; + } + break; + + /*------------------------------------------- + * EXEC_MJ_NEXTOUTER means + * + * outer inner + * outer tuple - 5 5 - marked tuple + * 5 5 + * 6 6 - inner tuple + * 7 7 + * + * we know we just bumped into the + * first inner tuple > current outer tuple (or possibly + * the end of the inner stream) + * so get a new outer tuple and then + * proceed to test it against the marked tuple + * (EXEC_MJ_TESTOUTER) + * + * Before advancing, we check to see if we must emit an + * outer-join fill tuple for this outer tuple. + *------------------------------------------------ + */ + case EXEC_MJ_NEXTOUTER: + MJ_printf("ExecMergeJoin: EXEC_MJ_NEXTOUTER\n"); + + if (doFillOuter && !node->mj_MatchedOuter) + { + /* + * Generate a fake join tuple with nulls for the inner + * tuple, and return it if it passes the non-join quals. + */ + TupleTableSlot *result; + + node->mj_MatchedOuter = true; /* do it only once */ + + result = MJFillOuter(node); + if (result) + return result; + } + + /* + * now we get the next outer tuple, if any + */ + outerTupleSlot = ExecProcNode(outerPlan); + node->mj_OuterTupleSlot = outerTupleSlot; + MJ_DEBUG_PROC_NODE(outerTupleSlot); + node->mj_MatchedOuter = false; + + /* Compute join values and check for unmatchability */ + switch (MJEvalOuterValues(node)) + { + case MJEVAL_MATCHABLE: + /* Go test the new tuple against the marked tuple */ + node->mj_JoinState = EXEC_MJ_TESTOUTER; + break; + case MJEVAL_NONMATCHABLE: + /* Can't match, so fetch next outer tuple */ + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + break; + case MJEVAL_ENDOFJOIN: + /* No more outer tuples */ + MJ_printf("ExecMergeJoin: end of outer subplan\n"); + innerTupleSlot = node->mj_InnerTupleSlot; + if (doFillInner && !TupIsNull(innerTupleSlot)) + { + /* + * Need to emit right-join tuples for remaining + * inner tuples. + */ + node->mj_JoinState = EXEC_MJ_ENDOUTER; + break; + } + /* Otherwise we're done. */ + return NULL; + } + break; + + /*-------------------------------------------------------- + * EXEC_MJ_TESTOUTER If the new outer tuple and the marked + * tuple satisfy the merge clause then we know we have + * duplicates in the outer scan so we have to restore the + * inner scan to the marked tuple and proceed to join the + * new outer tuple with the inner tuples. + * + * This is the case when + * outer inner + * 4 5 - marked tuple + * outer tuple - 5 5 + * new outer tuple - 5 5 + * 6 8 - inner tuple + * 7 12 + * + * new outer tuple == marked tuple + * + * If the outer tuple fails the test, then we are done + * with the marked tuples, and we have to look for a + * match to the current inner tuple. So we will + * proceed to skip outer tuples until outer >= inner + * (EXEC_MJ_SKIP_TEST). + * + * This is the case when + * + * outer inner + * 5 5 - marked tuple + * outer tuple - 5 5 + * new outer tuple - 6 8 - inner tuple + * 7 12 + * + * new outer tuple > marked tuple + * + *--------------------------------------------------------- + */ + case EXEC_MJ_TESTOUTER: + MJ_printf("ExecMergeJoin: EXEC_MJ_TESTOUTER\n"); + + /* + * Here we must compare the outer tuple with the marked inner + * tuple. (We can ignore the result of MJEvalInnerValues, + * since the marked inner tuple is certainly matchable.) + */ + innerTupleSlot = node->mj_MarkedTupleSlot; + (void) MJEvalInnerValues(node, innerTupleSlot); + + compareResult = MJCompare(node); + MJ_DEBUG_COMPARE(compareResult); + + if (compareResult == 0) + { + /* + * the merge clause matched so now we restore the inner + * scan position to the first mark, and go join that tuple + * (and any following ones) to the new outer. + * + * NOTE: we do not need to worry about the MatchedInner + * state for the rescanned inner tuples. We know all of + * them will match this new outer tuple and therefore + * won't be emitted as fill tuples. This works *only* + * because we require the extra joinquals to be constant + * when doing a right or full join --- otherwise some of + * the rescanned tuples might fail the extra joinquals. + * This obviously won't happen for a constant-true extra + * joinqual, while the constant-false case is handled by + * forcing the merge clause to never match, so we never + * get here. + */ + ExecRestrPos(innerPlan); + + /* + * ExecRestrPos probably should give us back a new Slot, + * but since it doesn't, use the marked slot. (The + * previously returned mj_InnerTupleSlot cannot be assumed + * to hold the required tuple.) + */ + node->mj_InnerTupleSlot = innerTupleSlot; + /* we need not do MJEvalInnerValues again */ + + node->mj_JoinState = EXEC_MJ_JOINTUPLES; + } + else + { + /* ---------------- + * if the new outer tuple didn't match the marked inner + * tuple then we have a case like: + * + * outer inner + * 4 4 - marked tuple + * new outer - 5 4 + * 6 5 - inner tuple + * 7 + * + * which means that all subsequent outer tuples will be + * larger than our marked inner tuples. So we need not + * revisit any of the marked tuples but can proceed to + * look for a match to the current inner. If there's + * no more inners, no more matches are possible. + * ---------------- + */ + Assert(compareResult > 0); + innerTupleSlot = node->mj_InnerTupleSlot; + + /* reload comparison data for current inner */ + switch (MJEvalInnerValues(node, innerTupleSlot)) + { + case MJEVAL_MATCHABLE: + /* proceed to compare it to the current outer */ + node->mj_JoinState = EXEC_MJ_SKIP_TEST; + break; + case MJEVAL_NONMATCHABLE: + + /* + * current inner can't possibly match any outer; + * better to advance the inner scan than the + * outer. + */ + node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; + break; + case MJEVAL_ENDOFJOIN: + /* No more inner tuples */ + if (doFillOuter) + { + /* + * Need to emit left-join tuples for remaining + * outer tuples. + */ + node->mj_JoinState = EXEC_MJ_ENDINNER; + break; + } + /* Otherwise we're done. */ + return NULL; + } + } + break; + + /*---------------------------------------------------------- + * EXEC_MJ_SKIP means compare tuples and if they do not + * match, skip whichever is lesser. + * + * For example: + * + * outer inner + * 5 5 + * 5 5 + * outer tuple - 6 8 - inner tuple + * 7 12 + * 8 14 + * + * we have to advance the outer scan + * until we find the outer 8. + * + * On the other hand: + * + * outer inner + * 5 5 + * 5 5 + * outer tuple - 12 8 - inner tuple + * 14 10 + * 17 12 + * + * we have to advance the inner scan + * until we find the inner 12. + *---------------------------------------------------------- + */ + case EXEC_MJ_SKIP_TEST: + MJ_printf("ExecMergeJoin: EXEC_MJ_SKIP_TEST\n"); + + /* + * before we advance, make sure the current tuples do not + * satisfy the mergeclauses. If they do, then we update the + * marked tuple position and go join them. + */ + compareResult = MJCompare(node); + MJ_DEBUG_COMPARE(compareResult); + + if (compareResult == 0) + { + ExecMarkPos(innerPlan); + + MarkInnerTuple(node->mj_InnerTupleSlot, node); + + node->mj_JoinState = EXEC_MJ_JOINTUPLES; + } + else if (compareResult < 0) + node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE; + else + /* compareResult > 0 */ + node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; + break; + + /* + * SKIPOUTER_ADVANCE: advance over an outer tuple that is + * known not to join to any inner tuple. + * + * Before advancing, we check to see if we must emit an + * outer-join fill tuple for this outer tuple. + */ + case EXEC_MJ_SKIPOUTER_ADVANCE: + MJ_printf("ExecMergeJoin: EXEC_MJ_SKIPOUTER_ADVANCE\n"); + + if (doFillOuter && !node->mj_MatchedOuter) + { + /* + * Generate a fake join tuple with nulls for the inner + * tuple, and return it if it passes the non-join quals. + */ + TupleTableSlot *result; + + node->mj_MatchedOuter = true; /* do it only once */ + + result = MJFillOuter(node); + if (result) + return result; + } + + /* + * now we get the next outer tuple, if any + */ + outerTupleSlot = ExecProcNode(outerPlan); + node->mj_OuterTupleSlot = outerTupleSlot; + MJ_DEBUG_PROC_NODE(outerTupleSlot); + node->mj_MatchedOuter = false; + + /* Compute join values and check for unmatchability */ + switch (MJEvalOuterValues(node)) + { + case MJEVAL_MATCHABLE: + /* Go test the new tuple against the current inner */ + node->mj_JoinState = EXEC_MJ_SKIP_TEST; + break; + case MJEVAL_NONMATCHABLE: + /* Can't match, so fetch next outer tuple */ + node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE; + break; + case MJEVAL_ENDOFJOIN: + /* No more outer tuples */ + MJ_printf("ExecMergeJoin: end of outer subplan\n"); + innerTupleSlot = node->mj_InnerTupleSlot; + if (doFillInner && !TupIsNull(innerTupleSlot)) + { + /* + * Need to emit right-join tuples for remaining + * inner tuples. + */ + node->mj_JoinState = EXEC_MJ_ENDOUTER; + break; + } + /* Otherwise we're done. */ + return NULL; + } + break; + + /* + * SKIPINNER_ADVANCE: advance over an inner tuple that is + * known not to join to any outer tuple. + * + * Before advancing, we check to see if we must emit an + * outer-join fill tuple for this inner tuple. + */ + case EXEC_MJ_SKIPINNER_ADVANCE: + MJ_printf("ExecMergeJoin: EXEC_MJ_SKIPINNER_ADVANCE\n"); + + if (doFillInner && !node->mj_MatchedInner) + { + /* + * Generate a fake join tuple with nulls for the outer + * tuple, and return it if it passes the non-join quals. + */ + TupleTableSlot *result; + + node->mj_MatchedInner = true; /* do it only once */ + + result = MJFillInner(node); + if (result) + return result; + } + + /* Mark before advancing, if wanted */ + if (node->mj_ExtraMarks) + ExecMarkPos(innerPlan); + + /* + * now we get the next inner tuple, if any + */ + innerTupleSlot = ExecProcNode(innerPlan); + node->mj_InnerTupleSlot = innerTupleSlot; + MJ_DEBUG_PROC_NODE(innerTupleSlot); + node->mj_MatchedInner = false; + + /* Compute join values and check for unmatchability */ + switch (MJEvalInnerValues(node, innerTupleSlot)) + { + case MJEVAL_MATCHABLE: + /* proceed to compare it to the current outer */ + node->mj_JoinState = EXEC_MJ_SKIP_TEST; + break; + case MJEVAL_NONMATCHABLE: + + /* + * current inner can't possibly match any outer; + * better to advance the inner scan than the outer. + */ + node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; + break; + case MJEVAL_ENDOFJOIN: + /* No more inner tuples */ + MJ_printf("ExecMergeJoin: end of inner subplan\n"); + outerTupleSlot = node->mj_OuterTupleSlot; + if (doFillOuter && !TupIsNull(outerTupleSlot)) + { + /* + * Need to emit left-join tuples for remaining + * outer tuples. + */ + node->mj_JoinState = EXEC_MJ_ENDINNER; + break; + } + /* Otherwise we're done. */ + return NULL; + } + break; + + /* + * EXEC_MJ_ENDOUTER means we have run out of outer tuples, but + * are doing a right/full join and therefore must null-fill + * any remaining unmatched inner tuples. + */ + case EXEC_MJ_ENDOUTER: + MJ_printf("ExecMergeJoin: EXEC_MJ_ENDOUTER\n"); + + Assert(doFillInner); + + if (!node->mj_MatchedInner) + { + /* + * Generate a fake join tuple with nulls for the outer + * tuple, and return it if it passes the non-join quals. + */ + TupleTableSlot *result; + + node->mj_MatchedInner = true; /* do it only once */ + + result = MJFillInner(node); + if (result) + return result; + } + + /* Mark before advancing, if wanted */ + if (node->mj_ExtraMarks) + ExecMarkPos(innerPlan); + + /* + * now we get the next inner tuple, if any + */ + innerTupleSlot = ExecProcNode(innerPlan); + node->mj_InnerTupleSlot = innerTupleSlot; + MJ_DEBUG_PROC_NODE(innerTupleSlot); + node->mj_MatchedInner = false; + + if (TupIsNull(innerTupleSlot)) + { + MJ_printf("ExecMergeJoin: end of inner subplan\n"); + return NULL; + } + + /* Else remain in ENDOUTER state and process next tuple. */ + break; + + /* + * EXEC_MJ_ENDINNER means we have run out of inner tuples, but + * are doing a left/full join and therefore must null- fill + * any remaining unmatched outer tuples. + */ + case EXEC_MJ_ENDINNER: + MJ_printf("ExecMergeJoin: EXEC_MJ_ENDINNER\n"); + + Assert(doFillOuter); + + if (!node->mj_MatchedOuter) + { + /* + * Generate a fake join tuple with nulls for the inner + * tuple, and return it if it passes the non-join quals. + */ + TupleTableSlot *result; + + node->mj_MatchedOuter = true; /* do it only once */ + + result = MJFillOuter(node); + if (result) + return result; + } + + /* + * now we get the next outer tuple, if any + */ + outerTupleSlot = ExecProcNode(outerPlan); + node->mj_OuterTupleSlot = outerTupleSlot; + MJ_DEBUG_PROC_NODE(outerTupleSlot); + node->mj_MatchedOuter = false; + + if (TupIsNull(outerTupleSlot)) + { + MJ_printf("ExecMergeJoin: end of outer subplan\n"); + return NULL; + } + + /* Else remain in ENDINNER state and process next tuple. */ + break; + + /* + * broken state value? + */ + default: + elog(ERROR, "unrecognized mergejoin state: %d", + (int) node->mj_JoinState); + } + } +} + +/* ---------------------------------------------------------------- + * ExecInitMergeJoin + * ---------------------------------------------------------------- + */ +MergeJoinState * +_ExecInitMergeJoin(CustomMergeJoin *node, EState *estate, int eflags) +{ + MergeJoinState *mergestate; + + /* check for unsupported flags */ + Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + + MJ1_printf("ExecInitMergeJoin: %s\n", + "initializing node"); + + /* + * create state structure + */ + mergestate = makeNode(MergeJoinState); + mergestate->js.ps.plan = (Plan *) node; + mergestate->js.ps.state = estate; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &mergestate->js.ps); + + /* + * we need two additional econtexts in which we can compute the join + * expressions from the left and right input tuples. The node's regular + * econtext won't do because it gets reset too often. + */ + mergestate->mj_OuterEContext = CreateExprContext(estate); + mergestate->mj_InnerEContext = CreateExprContext(estate); + + /* + * initialize child expressions + */ + mergestate->js.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->cplan.plan.targetlist, + (PlanState *) mergestate); + mergestate->js.ps.qual = (List *) + ExecInitExpr((Expr *) node->cplan.plan.qual, + (PlanState *) mergestate); + mergestate->js.jointype = node->jointype; + mergestate->js.joinqual = (List *) + ExecInitExpr((Expr *) node->joinqual, + (PlanState *) mergestate); + mergestate->mj_ConstFalseJoin = false; + /* mergeclauses are handled below */ + + /* + * initialize child nodes + * + * inner child must support MARK/RESTORE. + */ + outerPlanState(mergestate) = ExecInitNode(outerPlan(node), estate, eflags); + innerPlanState(mergestate) = ExecInitNode(innerPlan(node), estate, + eflags | EXEC_FLAG_MARK); + + /* + * For certain types of inner child nodes, it is advantageous to issue + * MARK every time we advance past an inner tuple we will never return to. + * For other types, MARK on a tuple we cannot return to is a waste of + * cycles. Detect which case applies and set mj_ExtraMarks if we want to + * issue "unnecessary" MARK calls. + * + * Currently, only Material wants the extra MARKs, and it will be helpful + * only if eflags doesn't specify REWIND. + */ + if (IsA(innerPlan(node), Material) && + (eflags & EXEC_FLAG_REWIND) == 0) + mergestate->mj_ExtraMarks = true; + else + mergestate->mj_ExtraMarks = false; + + /* + * tuple table initialization + */ + ExecInitResultTupleSlot(estate, &mergestate->js.ps); + + mergestate->mj_MarkedTupleSlot = ExecInitExtraTupleSlot(estate); + ExecSetSlotDescriptor(mergestate->mj_MarkedTupleSlot, + ExecGetResultType(innerPlanState(mergestate))); + + switch (node->jointype) + { + case JOIN_INNER: + case JOIN_SEMI: + mergestate->mj_FillOuter = false; + mergestate->mj_FillInner = false; + break; + case JOIN_LEFT: + case JOIN_ANTI: + mergestate->mj_FillOuter = true; + mergestate->mj_FillInner = false; + mergestate->mj_NullInnerTupleSlot = + ExecInitNullTupleSlot(estate, + ExecGetResultType(innerPlanState(mergestate))); + break; + case JOIN_RIGHT: + mergestate->mj_FillOuter = false; + mergestate->mj_FillInner = true; + mergestate->mj_NullOuterTupleSlot = + ExecInitNullTupleSlot(estate, + ExecGetResultType(outerPlanState(mergestate))); + + /* + * Can't handle right or full join with non-constant extra + * joinclauses. This should have been caught by planner. + */ + if (!check_constant_qual(node->joinqual, + &mergestate->mj_ConstFalseJoin)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("RIGHT JOIN is only supported with merge-joinable join conditions"))); + break; + case JOIN_FULL: + mergestate->mj_FillOuter = true; + mergestate->mj_FillInner = true; + mergestate->mj_NullOuterTupleSlot = + ExecInitNullTupleSlot(estate, + ExecGetResultType(outerPlanState(mergestate))); + mergestate->mj_NullInnerTupleSlot = + ExecInitNullTupleSlot(estate, + ExecGetResultType(innerPlanState(mergestate))); + + /* + * Can't handle right or full join with non-constant extra + * joinclauses. This should have been caught by planner. + */ + if (!check_constant_qual(node->joinqual, + &mergestate->mj_ConstFalseJoin)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("FULL JOIN is only supported with merge-joinable join conditions"))); + break; + default: + elog(ERROR, "unrecognized join type: %d", + (int) node->jointype); + } + + /* + * initialize tuple type and projection info + */ + ExecAssignResultTypeFromTL(&mergestate->js.ps); + ExecAssignProjectionInfo(&mergestate->js.ps, NULL); + + /* + * preprocess the merge clauses + */ + mergestate->mj_NumClauses = list_length(node->mergeclauses); + mergestate->mj_Clauses = MJExamineQuals(node->mergeclauses, + node->mergeFamilies, + node->mergeCollations, + node->mergeStrategies, + node->mergeNullsFirst, + (PlanState *) mergestate); + + /* + * initialize join state + */ + mergestate->mj_JoinState = EXEC_MJ_INITIALIZE_OUTER; + mergestate->js.ps.ps_TupFromTlist = false; + mergestate->mj_MatchedOuter = false; + mergestate->mj_MatchedInner = false; + mergestate->mj_OuterTupleSlot = NULL; + mergestate->mj_InnerTupleSlot = NULL; + + /* + * initialization successful + */ + MJ1_printf("ExecInitMergeJoin: %s\n", + "node initialized"); + + return mergestate; +} + +/* ---------------------------------------------------------------- + * ExecEndMergeJoin + * + * old comments + * frees storage allocated through C routines. + * ---------------------------------------------------------------- + */ +void +_ExecEndMergeJoin(CustomMergeJoinState *node) +{ + MJ1_printf("ExecEndMergeJoin: %s\n", + "ending node processing"); + + /* + * Free the exprcontext + */ + ExecFreeExprContext(&node->cps.ps); + + /* + * clean out the tuple table + */ + ExecClearTuple(node->cps.ps.ps_ResultTupleSlot); + ExecClearTuple(node->mj_MarkedTupleSlot); + + /* + * shut down the subplans + */ + ExecEndNode(innerPlanState(node)); + ExecEndNode(outerPlanState(node)); + + MJ1_printf("ExecEndMergeJoin: %s\n", + "node processing ended"); +} + +void +_ExecReScanMergeJoin(CustomMergeJoinState *node) +{ + ExecClearTuple(node->mj_MarkedTupleSlot); + + node->mj_JoinState = EXEC_MJ_INITIALIZE_OUTER; + node->cps.ps.ps_TupFromTlist = false; + node->mj_MatchedOuter = false; + node->mj_MatchedInner = false; + node->mj_OuterTupleSlot = NULL; + node->mj_InnerTupleSlot = NULL; + + /* + * if chgParam of subnodes is not null then plans will be re-scanned by + * first ExecProcNode. + */ + if (node->cps.ps.lefttree->chgParam == NULL) + ExecReScan(node->cps.ps.lefttree); + if (node->cps.ps.righttree->chgParam == NULL) + ExecReScan(node->cps.ps.righttree); + +} diff --git a/contrib/custmj/setrefs.c b/contrib/custmj/setrefs.c new file mode 100644 index 0000000..9eb0b14 --- /dev/null +++ b/contrib/custmj/setrefs.c @@ -0,0 +1,326 @@ +/*------------------------------------------------------------------------- + * + * setrefs.c + * Post-processing of a completed plan tree: fix references to subplan + * vars, compute regproc values for operators, etc + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/setrefs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/transam.h" +#include "catalog/pg_type.h" +#include "executor/nodeCustom.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/pathnode.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/tlist.h" +#include "tcop/utility.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" +#include "custmj.h" + +typedef struct +{ + PlannerInfo *root; + indexed_tlist *outer_itlist; + indexed_tlist *inner_itlist; + Index acceptable_rel; + int rtoffset; +} fix_join_expr_context; + +typedef struct +{ + PlannerInfo *root; + indexed_tlist *subplan_itlist; + Index newvarno; + int rtoffset; +} fix_upper_expr_context; + +static Var *search_indexed_tlist_for_non_var(Node *node, + indexed_tlist *itlist, + Index newvarno); +static Node *fix_join_expr_mutator(Node *node, + fix_join_expr_context *context); +/* + * copyVar + * Copy a Var node. + * + * fix_scan_expr and friends do this enough times that it's worth having + * a bespoke routine instead of using the generic copyObject() function. + */ +static inline Var * +copyVar(Var *var) +{ + Var *newvar = (Var *) palloc(sizeof(Var)); + + *newvar = *var; + return newvar; +} + +/* + * build_tlist_index --- build an index data structure for a child tlist + * + * In most cases, subplan tlists will be "flat" tlists with only Vars, + * so we try to optimize that case by extracting information about Vars + * in advance. Matching a parent tlist to a child is still an O(N^2) + * operation, but at least with a much smaller constant factor than plain + * tlist_member() searches. + * + * The result of this function is an indexed_tlist struct to pass to + * search_indexed_tlist_for_var() or search_indexed_tlist_for_non_var(). + * When done, the indexed_tlist may be freed with a single pfree(). + */ +indexed_tlist * +build_tlist_index(List *tlist) +{ + indexed_tlist *itlist; + tlist_vinfo *vinfo; + ListCell *l; + + /* Create data structure with enough slots for all tlist entries */ + itlist = (indexed_tlist *) + palloc(offsetof(indexed_tlist, vars) + + list_length(tlist) * sizeof(tlist_vinfo)); + + itlist->tlist = tlist; + itlist->has_ph_vars = false; + itlist->has_non_vars = false; + + /* Find the Vars and fill in the index array */ + vinfo = itlist->vars; + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->expr && IsA(tle->expr, Var)) + { + Var *var = (Var *) tle->expr; + + vinfo->varno = var->varno; + vinfo->varattno = var->varattno; + vinfo->resno = tle->resno; + vinfo++; + } + else if (tle->expr && IsA(tle->expr, PlaceHolderVar)) + itlist->has_ph_vars = true; + else + itlist->has_non_vars = true; + } + + itlist->num_vars = (vinfo - itlist->vars); + + return itlist; +} + +/* + * search_indexed_tlist_for_var --- find a Var in an indexed tlist + * + * If a match is found, return a copy of the given Var with suitably + * modified varno/varattno (to wit, newvarno and the resno of the TLE entry). + * Also ensure that varnoold is incremented by rtoffset. + * If no match, return NULL. + */ +static Var * +search_indexed_tlist_for_var(Var *var, indexed_tlist *itlist, + Index newvarno, int rtoffset) +{ + Index varno = var->varno; + AttrNumber varattno = var->varattno; + tlist_vinfo *vinfo; + int i; + + vinfo = itlist->vars; + i = itlist->num_vars; + while (i-- > 0) + { + if (vinfo->varno == varno && vinfo->varattno == varattno) + { + /* Found a match */ + Var *newvar = copyVar(var); + + newvar->varno = newvarno; + newvar->varattno = vinfo->resno; + if (newvar->varnoold > 0) + newvar->varnoold += rtoffset; + return newvar; + } + vinfo++; + } + return NULL; /* no match */ +} + +/* + * search_indexed_tlist_for_non_var --- find a non-Var in an indexed tlist + * + * If a match is found, return a Var constructed to reference the tlist item. + * If no match, return NULL. + * + * NOTE: it is a waste of time to call this unless itlist->has_ph_vars or + * itlist->has_non_vars + */ +static Var * +search_indexed_tlist_for_non_var(Node *node, + indexed_tlist *itlist, Index newvarno) +{ + TargetEntry *tle; + + tle = tlist_member(node, itlist->tlist); + if (tle) + { + /* Found a matching subplan output expression */ + Var *newvar; + + newvar = makeVarFromTargetEntry(newvarno, tle); + newvar->varnoold = 0; /* wasn't ever a plain Var */ + newvar->varoattno = 0; + return newvar; + } + return NULL; /* no match */ +} + +/* + * fix_join_expr + * Create a new set of targetlist entries or join qual clauses by + * changing the varno/varattno values of variables in the clauses + * to reference target list values from the outer and inner join + * relation target lists. Also perform opcode lookup and add + * regclass OIDs to root->glob->relationOids. + * + * This is used in two different scenarios: a normal join clause, where all + * the Vars in the clause *must* be replaced by OUTER_VAR or INNER_VAR + * references; and a RETURNING clause, which may contain both Vars of the + * target relation and Vars of other relations. In the latter case we want + * to replace the other-relation Vars by OUTER_VAR references, while leaving + * target Vars alone. + * + * For a normal join, acceptable_rel should be zero so that any failure to + * match a Var will be reported as an error. For the RETURNING case, pass + * inner_itlist = NULL and acceptable_rel = the ID of the target relation. + * + * 'clauses' is the targetlist or list of join clauses + * 'outer_itlist' is the indexed target list of the outer join relation + * 'inner_itlist' is the indexed target list of the inner join relation, + * or NULL + * 'acceptable_rel' is either zero or the rangetable index of a relation + * whose Vars may appear in the clause without provoking an error + * 'rtoffset': how much to increment varnoold by + * + * Returns the new expression tree. The original clause structure is + * not modified. + */ +List * +fix_join_expr(PlannerInfo *root, + List *clauses, + indexed_tlist *outer_itlist, + indexed_tlist *inner_itlist, + Index acceptable_rel, + int rtoffset) +{ + fix_join_expr_context context; + + context.root = root; + context.outer_itlist = outer_itlist; + context.inner_itlist = inner_itlist; + context.acceptable_rel = acceptable_rel; + context.rtoffset = rtoffset; + return (List *) fix_join_expr_mutator((Node *) clauses, &context); +} + +static Node * +fix_join_expr_mutator(Node *node, fix_join_expr_context *context) +{ + Var *newvar; + + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + /* First look for the var in the input tlists */ + newvar = search_indexed_tlist_for_var(var, + context->outer_itlist, + OUTER_VAR, + context->rtoffset); + if (newvar) + return (Node *) newvar; + if (context->inner_itlist) + { + newvar = search_indexed_tlist_for_var(var, + context->inner_itlist, + INNER_VAR, + context->rtoffset); + if (newvar) + return (Node *) newvar; + } + + /* If it's for acceptable_rel, adjust and return it */ + if (var->varno == context->acceptable_rel) + { + var = copyVar(var); + var->varno += context->rtoffset; + if (var->varnoold > 0) + var->varnoold += context->rtoffset; + return (Node *) var; + } + + /* No referent found for Var */ + elog(ERROR, "variable not found in subplan target lists"); + } + if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + /* See if the PlaceHolderVar has bubbled up from a lower plan node */ + if (context->outer_itlist->has_ph_vars) + { + newvar = search_indexed_tlist_for_non_var((Node *) phv, + context->outer_itlist, + OUTER_VAR); + if (newvar) + return (Node *) newvar; + } + if (context->inner_itlist && context->inner_itlist->has_ph_vars) + { + newvar = search_indexed_tlist_for_non_var((Node *) phv, + context->inner_itlist, + INNER_VAR); + if (newvar) + return (Node *) newvar; + } + + /* If not supplied by input plans, evaluate the contained expr */ + return fix_join_expr_mutator((Node *) phv->phexpr, context); + } + /* Try matching more complex expressions too, if tlists have any */ + if (context->outer_itlist->has_non_vars) + { + newvar = search_indexed_tlist_for_non_var(node, + context->outer_itlist, + OUTER_VAR); + if (newvar) + return (Node *) newvar; + } + if (context->inner_itlist && context->inner_itlist->has_non_vars) + { + newvar = search_indexed_tlist_for_non_var(node, + context->inner_itlist, + INNER_VAR); + if (newvar) + return (Node *) newvar; + } + fix_expr_common(context->root, node); + return expression_tree_mutator(node, + fix_join_expr_mutator, + (void *) context); +} diff --git a/contrib/custmj/sql/custmj.sql b/contrib/custmj/sql/custmj.sql new file mode 100644 index 0000000..ffb6d9d --- /dev/null +++ b/contrib/custmj/sql/custmj.sql @@ -0,0 +1,79 @@ +-- regression test for custmj extension + +-- +-- initial setup +-- +CREATE TABLE t1 (a int, b text); +CREATE TABLE t2 (x int, y text); +CREATE TABLE t3 (n int primary key, m text); +CREATE TABLE t4 (s int references t3(n), t text); + +INSERT INTO t1 (SELECT x, md5(x::text) FROM generate_series( 1,600) x); +INSERT INTO t2 (SELECT x, md5(x::text) FROM generate_series(401,800) x); +INSERT INTO t3 (SELECT x, md5(x::text) FROM generate_series( 1,800) x); +INSERT INTO t4 (SELECT x, md5(x::text) FROM generate_series(201,600) x); + +VACUUM ANALYZE t1; +VACUUM ANALYZE t2; +VACUUM ANALYZE t3; +VACUUM ANALYZE t4; +-- LOAD this extension +LOAD 'custmj'; + +-- +-- explain output +-- +EXPLAIN (verbose, costs off) SELECT * FROM t1 JOIN t2 ON a = x; +EXPLAIN (verbose, costs off) SELECT * FROM t1 FULL JOIN t2 ON a = x; +EXPLAIN (verbose, costs off) SELECT * FROM t3 JOIN t4 ON n = s; +EXPLAIN (verbose, costs off) SELECT * FROM t3 FULL JOIN t4 ON n = s; + +-- force off hash_join +SET enable_hashjoin = off; +EXPLAIN (verbose, costs off) SELECT * FROM t1 JOIN t2 ON a = x; +SELECT * INTO bmj1 FROM t1 JOIN t2 ON a = x; +EXPLAIN (verbose, costs off) SELECT * FROM t1 FULL JOIN t2 ON a = x; +SELECT * INTO bmj2 FROM t1 FULL JOIN t2 ON a = x; +EXPLAIN (verbose, costs off) SELECT * FROM t3 JOIN t4 ON n = s; +SELECT * INTO bmj3 FROM t3 JOIN t4 ON n = s; +EXPLAIN (verbose, costs off) SELECT * FROM t3 FULL JOIN t4 ON n = s; +SELECT * INTO bmj4 FROM t3 FULL JOIN t4 ON n = s; + +-- force off built-in merge_join +SET enable_mergejoin = off; +EXPLAIN (verbose, costs off) SELECT * FROM t1 JOIN t2 ON a = x; +SELECT * INTO cmj1 FROM t1 JOIN t2 ON a = x; +EXPLAIN (verbose, costs off) SELECT * FROM t1 FULL JOIN t2 ON a = x; +SELECT * INTO cmj2 FROM t1 FULL JOIN t2 ON a = x; +EXPLAIN (verbose, costs off) SELECT * FROM t3 JOIN t4 ON n = s; +SELECT * INTO cmj3 FROM t3 JOIN t4 ON n = s; +EXPLAIN (verbose, costs off) SELECT * FROM t3 FULL JOIN t4 ON n = s; +SELECT * INTO cmj4 FROM t3 FULL JOIN t4 ON n = s; + +-- compare the difference of simple result +SELECT * FROM bmj1 EXCEPT SELECT * FROM cmj1; +SELECT * FROM cmj1 EXCEPT SELECT * FROM bmj1; +SELECT * FROM bmj2 EXCEPT SELECT * FROM cmj2; +SELECT * FROM cmj2 EXCEPT SELECT * FROM bmj2; +SELECT * FROM bmj3 EXCEPT SELECT * FROM cmj3; +SELECT * FROM cmj3 EXCEPT SELECT * FROM bmj3; +SELECT * FROM bmj4 EXCEPT SELECT * FROM cmj4; +SELECT * FROM cmj4 EXCEPT SELECT * FROM bmj4; + +-- a little bit complicated +EXPLAIN (verbose, costs off) + SELECT (a + x + n) % s AS c1, md5(b || y || m || t) AS c2 + FROM ((t1 join t2 on a = x) join t3 on y = m) join t4 on n = s + WHERE b like '%ab%' AND y like '%cd%' AND m like t; + +PREPARE p1(int,int) AS +SELECT * FROM t1 JOIN t3 ON a = n WHERE n BETWEEN $1 AND $2; +EXPLAIN (verbose, costs off) EXECUTE p1(100,100); +EXPLAIN (verbose, costs off) EXECUTE p1(100,1000); + +EXPLAIN (verbose, costs off) +SELECT * FROM t1 JOIN t2 ON a = x WHERE x IN (SELECT n % 100 FROM t3); + +-- check GetSpecialCustomVar stuff +SET client_min_messages = debug; +EXPLAIN (verbose, costs off) SELECT * FROM t1 FULL JOIN t2 ON a = x;