diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 96f00fc..1bb36c6 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -50,7 +50,17 @@ static List *select_mergejoin_clauses(PlannerInfo *root, List *restrictlist, JoinType jointype, bool *mergejoin_allowed); - +static void generate_mergejoin_paths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + Path *outerpath, + JoinType jointype, + JoinType save_jointype, + JoinPathExtraData *extra, + bool useallclauses, + Path *inner_cheapest_total, + List *merge_pathkeys, + bool is_partial); /* * add_paths_to_joinrel @@ -472,6 +482,76 @@ try_mergejoin_path(PlannerInfo *root, } /* + * try_partial_mergejoin_path + * Consider a merge join path; if it appears useful, push it into + * the joinrel's pathlist via add_path(). + */ +static void +try_partial_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys, + JoinType jointype, + JoinPathExtraData *extra) +{ + JoinCostWorkspace workspace; + + /* + * See comments in try_partial_nestloop_path(). + */ + Assert(bms_is_empty(joinrel->lateral_relids)); + if (inner_path->param_info != NULL) + { + Relids inner_paramrels = inner_path->param_info->ppi_req_outer; + + if (!bms_is_subset(inner_paramrels, outer_path->parent->relids)) + return; + } + + /* + * If the given paths are already well enough ordered, we can skip doing + * an explicit sort. + */ + if (outersortkeys && + pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) + outersortkeys = NIL; + if (innersortkeys && + pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) + innersortkeys = NIL; + + /* + * See comments in try_nestloop_path(). + */ + initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, + outer_path, inner_path, + outersortkeys, innersortkeys, + extra->sjinfo); + + if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) + return; + + /* Might be good enough to be worth trying, so let's try it. */ + add_partial_path(joinrel, (Path *) + create_mergejoin_path(root, + joinrel, + jointype, + &workspace, + extra->sjinfo, + outer_path, + inner_path, + extra->restrictlist, + pathkeys, + NULL, + mergeclauses, + outersortkeys, + innersortkeys)); +} + +/* * try_hashjoin_path * Consider a hash join path; if it appears useful, push it into * the joinrel's pathlist via add_path(). @@ -640,6 +720,7 @@ sort_inner_and_outer(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { + JoinType save_jointype = jointype; Path *outer_path; Path *inner_path; List *all_pathkeys; @@ -773,6 +854,37 @@ sort_inner_and_outer(PlannerInfo *root, innerkeys, jointype, extra); + + /* + * If the joinrel is parallel-safe, we may be able to consider a + * partial merge join. However, we can't handle JOIN_UNIQUE_OUTER, + * because the outer path will be partial, and therefore we won't be + * able to properly guarantee uniqueness. Similarly, we can't handle + * JOIN_FULL and JOIN_RIGHT, because they can produce false null + * extended rows. Also, the resulting path must not be parameterized. + */ + if (joinrel->consider_parallel && + save_jointype != JOIN_UNIQUE_OUTER && + jointype != JOIN_FULL && + jointype != JOIN_RIGHT && + outerrel->partial_pathlist != NIL && + bms_is_empty(joinrel->lateral_relids) && + inner_path->parallel_safe) + { + Path *cheapest_partial_outer = + (Path *) linitial(outerrel->partial_pathlist); + + try_partial_mergejoin_path(root, + joinrel, + cheapest_partial_outer, + inner_path, + merge_pathkeys, + cur_mergeclauses, + outerkeys, + innerkeys, + jointype, + extra); + } } } @@ -790,15 +902,7 @@ sort_inner_and_outer(PlannerInfo *root, * cheapest-total inner-indexscan path (if any), and one on the * cheapest-startup inner-indexscan path (if different). * - * We also consider mergejoins if mergejoin clauses are available. We have - * two ways to generate the inner path for a mergejoin: sort the cheapest - * inner path, or use an inner path that is already suitably ordered for the - * merge. If we have several mergeclauses, it could be that there is no inner - * path (or only a very expensive one) for the full list of mergeclauses, but - * better paths exist if we truncate the mergeclause list (thereby discarding - * some sort key requirements). So, we consider truncations of the - * mergeclause list as well as the full list. (Ideally we'd consider all - * subsets of the mergeclause list, but that seems way too expensive.) + * We also consider mergejoins if mergejoin clauses are available. * * 'joinrel' is the join relation * 'outerrel' is the outer join relation @@ -894,13 +998,6 @@ match_unsorted_outer(PlannerInfo *root, { Path *outerpath = (Path *) lfirst(lc1); List *merge_pathkeys; - List *mergeclauses; - List *innersortkeys; - List *trialsortkeys; - Path *cheapest_startup_inner; - Path *cheapest_total_inner; - int num_sortkeys; - int sortkeycnt; /* * We cannot use an outer path that is parameterized by the inner rel. @@ -986,139 +1083,318 @@ match_unsorted_outer(PlannerInfo *root, if (inner_cheapest_total == NULL) continue; - /* Look for useful mergeclauses (if any) */ - mergeclauses = find_mergeclauses_for_pathkeys(root, - outerpath->pathkeys, - true, - extra->mergeclause_list); + /* Generate merge join paths for the outer path */ + generate_mergejoin_paths(root, joinrel, innerrel, outerpath, + jointype, save_jointype, extra, useallclauses, + inner_cheapest_total, merge_pathkeys, false); + } - /* - * Done with this outer path if no chance for a mergejoin. - * - * Special corner case: for "x FULL JOIN y ON true", there will be no - * join clauses at all. Ordinarily we'd generate a clauseless - * nestloop path, but since mergejoin is our only join type that - * supports FULL JOIN without any join clauses, it's necessary to - * generate a clauseless mergejoin path instead. - */ - if (mergeclauses == NIL) + /* + * Consider partial nestloop and mergejoin plan if the joinrel is + * parallel-safe. However, we can't handle JOIN_UNIQUE_OUTER, because + * the outer path will be partial, and therefore we won't be able to + * properly guarantee uniqueness. Nor can we handle extra_lateral_rels, + * since partial paths must not be parameterized. + * Similarly, we can't handle JOIN_FULL and JOIN_RIGHT, because they + * can produce false null extended rows. + */ + if (!joinrel->consider_parallel || + save_jointype == JOIN_UNIQUE_OUTER || + !bms_is_empty(joinrel->lateral_relids) || + jointype == JOIN_FULL || + jointype == JOIN_RIGHT) + return; + + if (nestjoinOK) + consider_parallel_nestloop(root, joinrel, outerrel, innerrel, + save_jointype, extra); + + /* Can't generate mergejoin path if inner rel is parameterized by outer */ + if (inner_cheapest_total != NULL) + { + ListCell *lc1; + JoinType save_jointype = jointype; + + if (jointype == JOIN_UNIQUE_INNER) + jointype = JOIN_INNER; + + /* generate merge join path for each partial outer path */ + foreach(lc1, outerrel->partial_pathlist) { - if (jointype == JOIN_FULL) - /* okay to try for mergejoin */ ; - else - continue; + Path *outerpath = (Path *) lfirst(lc1); + List *merge_pathkeys; + + /* + * Figure out what useful ordering any paths we create + * will have. + */ + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, + outerpath->pathkeys); + + generate_mergejoin_paths(root, joinrel, innerrel, outerpath, + jointype, save_jointype, extra, false, + inner_cheapest_total, merge_pathkeys, + true); } - if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list)) - continue; - /* Compute the required ordering of the inner path */ - innersortkeys = make_inner_pathkeys_for_merge(root, - mergeclauses, - outerpath->pathkeys); + } +} - /* - * Generate a mergejoin on the basis of sorting the cheapest inner. - * Since a sort will be needed, only cheapest total cost matters. (But - * try_mergejoin_path will do the right thing if inner_cheapest_total - * is already correctly sorted.) - */ +/* + * generate_mergejoin_paths + * Creates possible mergejoin paths for input outerpath. + * + * We generate mergejoins if mergejoin clauses are available. We have + * two ways to generate the inner path for a mergejoin: sort the cheapest + * inner path, or use an inner path that is already suitably ordered for the + * merge. If we have several mergeclauses, it could be that there is no inner + * path (or only a very expensive one) for the full list of mergeclauses, but + * better paths exist if we truncate the mergeclause list (thereby discarding + * some sort key requirements). So, we consider truncations of the + * mergeclause list as well as the full list. (Ideally we'd consider all + * subsets of the mergeclause list, but that seems way too expensive.) + * + * If is_partial is true then caller will make sure that jointype is neither + * FULL JOIN nor RIGHT JOIN + * + * 'useallclauses' only true in case of JOIN_FULL or JOIN_RIGHT + * 'inner_cheapest_total' cheapest total path of inner relation + * 'is_partial' generate partial path if this flag is set or else normal path + */ +static void +generate_mergejoin_paths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + Path *outerpath, + JoinType jointype, + JoinType save_jointype, + JoinPathExtraData *extra, + bool useallclauses, + Path *inner_cheapest_total, + List *merge_pathkeys, + bool is_partial) +{ + List *mergeclauses; + List *innersortkeys; + List *trialsortkeys; + Path *cheapest_startup_inner; + Path *cheapest_total_inner; + int num_sortkeys; + int sortkeycnt; + + /* Look for useful mergeclauses (if any) */ + mergeclauses = find_mergeclauses_for_pathkeys(root, + outerpath->pathkeys, + true, + extra->mergeclause_list); + + /* + * Done with this outer path if no chance for a mergejoin. + * + * Special corner case: for "x FULL JOIN y ON true", there will be no + * join clauses at all. Ordinarily we'd generate a clauseless + * nestloop path, but since mergejoin is our only join type that + * supports FULL JOIN without any join clauses, it's necessary to + * generate a clauseless mergejoin path instead. + */ + if (mergeclauses == NIL) + { + if (jointype == JOIN_FULL) + /* okay to try for mergejoin */ ; + else + return; + } + if (useallclauses && + list_length(mergeclauses) != list_length(extra->mergeclause_list)) + return; + + /* Compute the required ordering of the inner path */ + innersortkeys = make_inner_pathkeys_for_merge(root, + mergeclauses, + outerpath->pathkeys); + + /* + * Generate a mergejoin on the basis of sorting the cheapest inner. + * Since a sort will be needed, only cheapest total cost matters. (But + * try_mergejoin_path will do the right thing if inner_cheapest_total + * is already correctly sorted.) + */ + if (!is_partial) try_mergejoin_path(root, - joinrel, - outerpath, - inner_cheapest_total, - merge_pathkeys, - mergeclauses, - NIL, - innersortkeys, - jointype, - extra); + joinrel, + outerpath, + inner_cheapest_total, + merge_pathkeys, + mergeclauses, + NIL, + innersortkeys, + jointype, + extra); + /* Generate partial path if inner is parallel safe. */ + else if (inner_cheapest_total->parallel_safe) + try_partial_mergejoin_path(root, + joinrel, + outerpath, + inner_cheapest_total, + merge_pathkeys, + mergeclauses, + NIL, + innersortkeys, + jointype, + extra); + + /* Can't do anything else if inner path needs to be unique'd */ + if (jointype == JOIN_UNIQUE_INNER) + return; - /* Can't do anything else if inner path needs to be unique'd */ - if (save_jointype == JOIN_UNIQUE_INNER) - continue; + /* + * Look for presorted inner paths that satisfy the innersortkey list + * --- or any truncation thereof, if we are allowed to build a + * mergejoin using a subset of the merge clauses. Here, we consider + * both cheap startup cost and cheap total cost. + * + * Currently we do not consider parameterized inner paths here. This + * interacts with decisions elsewhere that also discriminate against + * mergejoins with parameterized inputs; see comments in + * src/backend/optimizer/README. + * + * As we shorten the sortkey list, we should consider only paths that + * are strictly cheaper than (in particular, not the same as) any path + * found in an earlier iteration. Otherwise we'd be intentionally + * using fewer merge keys than a given path allows (treating the rest + * as plain joinquals), which is unlikely to be a good idea. Also, + * eliminating paths here on the basis of compare_path_costs is a lot + * cheaper than building the mergejoin path only to throw it away. + * + * If inner_cheapest_total is well enough sorted to have not required + * a sort in the path made above, we shouldn't make a duplicate path + * with it, either. We handle that case with the same logic that + * handles the previous consideration, by initializing the variables + * that track cheapest-so-far properly. Note that we do NOT reject + * inner_cheapest_total if we find it matches some shorter set of + * pathkeys. That case corresponds to using fewer mergekeys to avoid + * sorting inner_cheapest_total, whereas we did sort it above, so the + * plans being considered are different. + */ + if (pathkeys_contained_in(innersortkeys, + inner_cheapest_total->pathkeys)) + { + /* inner_cheapest_total didn't require a sort */ + cheapest_startup_inner = inner_cheapest_total; + cheapest_total_inner = inner_cheapest_total; + } + else + { + /* it did require a sort, at least for the full set of keys */ + cheapest_startup_inner = NULL; + cheapest_total_inner = NULL; + } + num_sortkeys = list_length(innersortkeys); + if (num_sortkeys > 1 && !useallclauses) + trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ + else + trialsortkeys = innersortkeys; /* won't really truncate */ + + for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) + { + Path *innerpath; + List *newclauses = NIL; /* - * Look for presorted inner paths that satisfy the innersortkey list - * --- or any truncation thereof, if we are allowed to build a - * mergejoin using a subset of the merge clauses. Here, we consider - * both cheap startup cost and cheap total cost. - * - * Currently we do not consider parameterized inner paths here. This - * interacts with decisions elsewhere that also discriminate against - * mergejoins with parameterized inputs; see comments in - * src/backend/optimizer/README. - * - * As we shorten the sortkey list, we should consider only paths that - * are strictly cheaper than (in particular, not the same as) any path - * found in an earlier iteration. Otherwise we'd be intentionally - * using fewer merge keys than a given path allows (treating the rest - * as plain joinquals), which is unlikely to be a good idea. Also, - * eliminating paths here on the basis of compare_path_costs is a lot - * cheaper than building the mergejoin path only to throw it away. - * - * If inner_cheapest_total is well enough sorted to have not required - * a sort in the path made above, we shouldn't make a duplicate path - * with it, either. We handle that case with the same logic that - * handles the previous consideration, by initializing the variables - * that track cheapest-so-far properly. Note that we do NOT reject - * inner_cheapest_total if we find it matches some shorter set of - * pathkeys. That case corresponds to using fewer mergekeys to avoid - * sorting inner_cheapest_total, whereas we did sort it above, so the - * plans being considered are different. + * Look for an inner path ordered well enough for the first + * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified + * destructively, which is why we made a copy... */ - if (pathkeys_contained_in(innersortkeys, - inner_cheapest_total->pathkeys)) + trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + TOTAL_COST); + if (innerpath != NULL && + (cheapest_total_inner == NULL || + compare_path_costs(innerpath, cheapest_total_inner, + TOTAL_COST) < 0)) { - /* inner_cheapest_total didn't require a sort */ - cheapest_startup_inner = inner_cheapest_total; - cheapest_total_inner = inner_cheapest_total; - } - else - { - /* it did require a sort, at least for the full set of keys */ - cheapest_startup_inner = NULL; - cheapest_total_inner = NULL; - } - num_sortkeys = list_length(innersortkeys); - if (num_sortkeys > 1 && !useallclauses) - trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ - else - trialsortkeys = innersortkeys; /* won't really truncate */ + /* Found a cheap (or even-cheaper) sorted path */ + /* Select the right mergeclauses, if we didn't already */ + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + false, + mergeclauses); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; - for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) - { - Path *innerpath; - List *newclauses = NIL; + if (!is_partial) + { + try_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra); - /* - * Look for an inner path ordered well enough for the first - * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified - * destructively, which is why we made a copy... - */ - trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); - innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, - trialsortkeys, - NULL, - TOTAL_COST); - if (innerpath != NULL && - (cheapest_total_inner == NULL || - compare_path_costs(innerpath, cheapest_total_inner, - TOTAL_COST) < 0)) + cheapest_total_inner = innerpath; + } + /* Generate partial path only if innerpath is parallel safe. */ + else if (innerpath->parallel_safe) + { + try_partial_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra); + cheapest_total_inner = innerpath; + } + } + /* Same on the basis of cheapest startup cost ... */ + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + STARTUP_COST); + if (innerpath != NULL && + (cheapest_startup_inner == NULL || + compare_path_costs(innerpath, cheapest_startup_inner, + STARTUP_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted path */ + if (innerpath != cheapest_total_inner) { - /* Found a cheap (or even-cheaper) sorted path */ - /* Select the right mergeclauses, if we didn't already */ - if (sortkeycnt < num_sortkeys) + /* + * Avoid rebuilding clause list if we already made one; + * saves memory in big join trees... + */ + if (newclauses == NIL) { - newclauses = - find_mergeclauses_for_pathkeys(root, - trialsortkeys, - false, - mergeclauses); - Assert(newclauses != NIL); + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + false, + mergeclauses); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; } - else - newclauses = mergeclauses; - try_mergejoin_path(root, + + if (!is_partial) + { + try_mergejoin_path(root, joinrel, outerpath, innerpath, @@ -1128,74 +1404,32 @@ match_unsorted_outer(PlannerInfo *root, NIL, jointype, extra); - cheapest_total_inner = innerpath; - } - /* Same on the basis of cheapest startup cost ... */ - innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, - trialsortkeys, - NULL, - STARTUP_COST); - if (innerpath != NULL && - (cheapest_startup_inner == NULL || - compare_path_costs(innerpath, cheapest_startup_inner, - STARTUP_COST) < 0)) - { - /* Found a cheap (or even-cheaper) sorted path */ - if (innerpath != cheapest_total_inner) + cheapest_startup_inner = innerpath; + } + /* Generate partial path only if innerpath is parallel safe. */ + else if (innerpath->parallel_safe) { - /* - * Avoid rebuilding clause list if we already made one; - * saves memory in big join trees... - */ - if (newclauses == NIL) - { - if (sortkeycnt < num_sortkeys) - { - newclauses = - find_mergeclauses_for_pathkeys(root, - trialsortkeys, - false, - mergeclauses); - Assert(newclauses != NIL); - } - else - newclauses = mergeclauses; - } - try_mergejoin_path(root, - joinrel, - outerpath, - innerpath, - merge_pathkeys, - newclauses, - NIL, - NIL, - jointype, - extra); + try_partial_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra); + cheapest_startup_inner = innerpath; } - cheapest_startup_inner = innerpath; } - - /* - * Don't consider truncated sortkeys if we need all clauses. - */ - if (useallclauses) - break; } - } - /* - * If the joinrel is parallel-safe and the join type supports nested - * loops, we may be able to consider a partial nestloop plan. However, we - * can't handle JOIN_UNIQUE_OUTER, because the outer path will be partial, - * and therefore we won't be able to properly guarantee uniqueness. Nor - * can we handle extra_lateral_rels, since partial paths must not be - * parameterized. - */ - if (joinrel->consider_parallel && nestjoinOK && - save_jointype != JOIN_UNIQUE_OUTER && - bms_is_empty(joinrel->lateral_relids)) - consider_parallel_nestloop(root, joinrel, outerrel, innerrel, - save_jointype, extra); + /* + * Don't consider truncated sortkeys if we need all clauses. + */ + if (useallclauses) + break; + } } /*