From 4ba8c1d67b323c9c2d9e41a2aad06d95df611bcb Mon Sep 17 00:00:00 2001 From: Alexandra Wang Date: Wed, 23 Oct 2024 14:26:40 -0500 Subject: [PATCH v1] Remove unnecessary Sort node above Index Scan of btree-compatible AM When determining whether two sort keys are equivalent, they may not belong to the same operator family but could still use the same underlying sort operator. In such cases, a simple pointer comparison of two pathkeys marks them as different, leading to the addition of an unnecessary Sort node, even though the path is already sorted as desired. This patch improves pathkeys_contained_in() and pathkeys_count_contained_in() by comparing the sort operators directly, preventing redundant sorting. --- src/backend/optimizer/path/costsize.c | 4 +-- src/backend/optimizer/path/pathkeys.c | 43 +++++++++++++++++++++++-- src/backend/optimizer/plan/createplan.c | 6 ++-- 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b7f6ab40dec..e95762ac9a4 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -3607,9 +3607,7 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, opathkey = (PathKey *) linitial(opathkeys); ipathkey = (PathKey *) linitial(ipathkeys); /* debugging check */ - if (opathkey->pk_opfamily != ipathkey->pk_opfamily || - opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation || - opathkey->pk_strategy != ipathkey->pk_strategy || + if (opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation || opathkey->pk_cmptype != ipathkey->pk_cmptype || opathkey->pk_nulls_first != ipathkey->pk_nulls_first) elog(ERROR, "left and right pathkeys do not match in mergejoin"); diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index a0826ed5008..a709148ea9f 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -39,7 +39,7 @@ static bool matches_boolean_partition_clause(RestrictInfo *rinfo, int partkeycol); static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); - +static bool pathkeys_have_same_sortop(PathKey *pk1, PathKey *pk2); /**************************************************************************** * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING @@ -355,7 +355,7 @@ compare_pathkeys(List *keys1, List *keys2) PathKey *pathkey1 = (PathKey *) lfirst(key1); PathKey *pathkey2 = (PathKey *) lfirst(key2); - if (pathkey1 != pathkey2) + if (!pathkeys_have_same_sortop(pathkey1, pathkey2)) return PATHKEYS_DIFFERENT; /* no need to keep looking */ } @@ -585,6 +585,43 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path) return infos; } +/* + * pathkeys_have_same_sortop + * + * Determines whether two PathKey objects should be considered equivalent + * for sorting purposes. Two PathKeys are treated as the same if: + * - They are identical pointers OR + * - They belong to the same equivalence class. AND + * - They have the same comparison type and null ordering. AND + * - They have the same sorting operator, even if they belong to different + * operator families. + * + * The function retrieves the sort operator (sortop) for both PathKeys using + * their operator family, datatype, and strategy. If both PathKeys resolve to + * the same sort operator, they are considered equivalent. + */ +static bool +pathkeys_have_same_sortop(PathKey *pk1, PathKey *pk2) +{ + Oid pk_op1; + Oid pk_op2; + Oid pk_datatype; + + if (pk1 == pk2) + return true; + + if (pk1->pk_eclass != pk2->pk_eclass || + pk1->pk_cmptype != pk2->pk_cmptype || + pk1->pk_nulls_first != pk2->pk_nulls_first) + return false; + + pk_datatype = ((EquivalenceMember *) linitial(pk1->pk_eclass->ec_members))->em_datatype; + pk_op1 = get_opfamily_member(pk1->pk_opfamily, pk_datatype, pk_datatype, pk1->pk_strategy); + pk_op2 = get_opfamily_member(pk2->pk_opfamily, pk_datatype, pk_datatype, pk2->pk_strategy); + + return pk_op1 == pk_op2; +} + /* * pathkeys_count_contained_in * Same as pathkeys_contained_in, but also sets length of longest @@ -626,7 +663,7 @@ pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common) PathKey *pathkey1 = (PathKey *) lfirst(key1); PathKey *pathkey2 = (PathKey *) lfirst(key2); - if (pathkey1 != pathkey2) + if (!pathkeys_have_same_sortop(pathkey1, pathkey2)) { *n_common = n; return false; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 6a7a8d68db5..015f7f05801 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4747,12 +4747,10 @@ create_mergejoin_plan(PlannerInfo *root, * matter which way we imagine this column to be ordered.) But a * non-redundant inner pathkey had better match outer's ordering too. */ - if (opathkey->pk_opfamily != ipathkey->pk_opfamily || - opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation) + if (opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation) elog(ERROR, "left and right pathkeys do not match in mergejoin"); if (first_inner_match && - (opathkey->pk_strategy != ipathkey->pk_strategy || - opathkey->pk_cmptype != ipathkey->pk_cmptype || + (opathkey->pk_cmptype != ipathkey->pk_cmptype || opathkey->pk_nulls_first != ipathkey->pk_nulls_first)) elog(ERROR, "left and right pathkeys do not match in mergejoin"); -- 2.39.5 (Apple Git-154)