From 3446f549f37e13f6793ac60138481ab78ec4436f Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 17 Nov 2023 23:47:26 +0300 Subject: [PATCH] [PATCH 1/2] Introduce indexes for RestrictInfo This change was picked up from the previous patch, v20. --- src/backend/nodes/outfuncs.c | 6 +- src/backend/nodes/read.c | 20 ++ src/backend/nodes/readfuncs.c | 22 ++ src/backend/optimizer/path/costsize.c | 3 +- src/backend/optimizer/path/equivclass.c | 322 +++++++++++++++++++--- src/backend/optimizer/plan/analyzejoins.c | 65 +++-- src/backend/optimizer/plan/planner.c | 2 + src/backend/optimizer/prep/prepjointree.c | 2 + src/backend/optimizer/util/inherit.c | 7 + src/include/nodes/parsenodes.h | 6 + src/include/nodes/pathnodes.h | 47 +++- src/include/nodes/readfuncs.h | 2 + src/include/optimizer/paths.h | 18 +- 13 files changed, 454 insertions(+), 68 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e66a99247e4..0bc0228ab2c 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -463,8 +463,8 @@ _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) WRITE_NODE_FIELD(ec_opfamilies); WRITE_OID_FIELD(ec_collation); WRITE_NODE_FIELD(ec_members); - WRITE_NODE_FIELD(ec_sources); - WRITE_NODE_FIELD(ec_derives); + WRITE_BITMAPSET_FIELD(ec_source_indexes); + WRITE_BITMAPSET_FIELD(ec_derive_indexes); WRITE_BITMAPSET_FIELD(ec_relids); WRITE_BOOL_FIELD(ec_has_const); WRITE_BOOL_FIELD(ec_has_volatile); @@ -567,6 +567,8 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node) WRITE_BOOL_FIELD(inh); WRITE_BOOL_FIELD(inFromCl); WRITE_NODE_FIELD(securityQuals); + WRITE_BITMAPSET_FIELD(eclass_source_indexes); + WRITE_BITMAPSET_FIELD(eclass_derive_indexes); } static void diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c index 813eda3e739..314736d989b 100644 --- a/src/backend/nodes/read.c +++ b/src/backend/nodes/read.c @@ -514,3 +514,23 @@ nodeRead(const char *token, int tok_len) return (void *) result; } + +/* + * pg_strtok_save_context - + * Save context initialized by stringToNode. + */ +void +pg_strtok_save_context(const char **pcontext) +{ + *pcontext = pg_strtok_ptr; +} + +/* + * pg_strtok_restore_context - + * Resore saved context. + */ +void +pg_strtok_restore_context(const char *context) +{ + pg_strtok_ptr = context; +} diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index cc2021c1f7b..0915c0e661d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -191,6 +191,26 @@ nullable_string(const char *token, int length) return debackslash(token, length); } +/* Read an equivalence field (anything written as ":fldname %u") and check it */ +#define READ_EQ_BITMAPSET_FIELD_CHECK(fldname) \ +{ \ + int save_length = length; \ + const char *context; \ + pg_strtok_save_context(&context); \ + token = pg_strtok(&length); \ + if (length > 0 && strncmp(token, ":"#fldname, strlen(":"#fldname))) \ + { \ + /* "fldname" field was not found - fill it and restore context. */ \ + local_node->fldname = NULL; \ + pg_strtok_restore_context(context); \ + length = save_length; \ + } \ + else \ + { \ + local_node->fldname = _readBitmapset(); \ + } \ +} + /* * _readBitmapset @@ -574,6 +594,8 @@ _readRangeTblEntry(void) READ_BOOL_FIELD(inh); READ_BOOL_FIELD(inFromCl); READ_NODE_FIELD(securityQuals); + READ_EQ_BITMAPSET_FIELD_CHECK(eclass_source_indexes); + READ_EQ_BITMAPSET_FIELD_CHECK(eclass_derive_indexes); READ_DONE(); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d6ceafd51c5..fcda83ad2c4 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -5755,7 +5755,8 @@ get_foreign_key_join_selectivity(PlannerInfo *root, if (ec && ec->ec_has_const) { EquivalenceMember *em = fkinfo->fk_eclass_member[i]; - RestrictInfo *rinfo = find_derived_clause_for_ec_member(ec, + RestrictInfo *rinfo = find_derived_clause_for_ec_member(root, + ec, em); if (rinfo) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 7fa502d6e25..329419c38d9 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -38,6 +38,8 @@ static EquivalenceMember *add_eq_member(EquivalenceClass *ec, JoinDomain *jdomain, EquivalenceMember *parent, Oid datatype); +static void add_eq_derive(PlannerInfo *root, EquivalenceClass *ec, + RestrictInfo *rinfo); static bool is_exprlist_member(Expr *node, List *exprs); static void generate_base_implied_equalities_const(PlannerInfo *root, EquivalenceClass *ec); @@ -74,6 +76,49 @@ static Bitmapset *get_eclass_indexes_for_relids(PlannerInfo *root, static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1, Relids relids2); +/* + * add_eq_source - add 'rinfo' in eq_sources for this 'ec' + */ +void +add_eq_source(PlannerInfo *root, EquivalenceClass *ec, RestrictInfo *rinfo) +{ + int source_idx = list_length(root->eq_sources); + int i; + + ec->ec_source_indexes = bms_add_member(ec->ec_source_indexes, source_idx); + root->eq_sources = lappend(root->eq_sources, rinfo); + + i = -1; + while ((i = bms_next_member(rinfo->clause_relids, i)) >= 0) + { + RangeTblEntry *rte = root->simple_rte_array[i]; + + rte->eclass_source_indexes = bms_add_member(rte->eclass_source_indexes, + source_idx); + } +} + +/* + * add_eq_derive - add 'rinfo' in eq_derives for this 'ec' + */ +static void +add_eq_derive(PlannerInfo *root, EquivalenceClass *ec, RestrictInfo *rinfo) +{ + int derive_idx = list_length(root->eq_derives); + int i; + + ec->ec_derive_indexes = bms_add_member(ec->ec_derive_indexes, derive_idx); + root->eq_derives = lappend(root->eq_derives, rinfo); + + i = -1; + while ((i = bms_next_member(rinfo->clause_relids, i)) >= 0) + { + RangeTblEntry *rte = root->simple_rte_array[i]; + + rte->eclass_derive_indexes = bms_add_member(rte->eclass_derive_indexes, + derive_idx); + } +} /* * process_equivalence @@ -311,7 +356,6 @@ process_equivalence(PlannerInfo *root, /* If case 1, nothing to do, except add to sources */ if (ec1 == ec2) { - ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_min_security = Min(ec1->ec_min_security, restrictinfo->security_level); ec1->ec_max_security = Max(ec1->ec_max_security, @@ -322,6 +366,8 @@ process_equivalence(PlannerInfo *root, /* mark the RI as usable with this pair of EMs */ restrictinfo->left_em = em1; restrictinfo->right_em = em2; + + add_eq_source(root, ec1, restrictinfo); return true; } @@ -342,8 +388,10 @@ process_equivalence(PlannerInfo *root, * be found. */ ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members); - ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources); - ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives); + ec1->ec_source_indexes = bms_join(ec1->ec_source_indexes, + ec2->ec_source_indexes); + ec1->ec_derive_indexes = bms_join(ec1->ec_derive_indexes, + ec2->ec_derive_indexes); ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids); ec1->ec_has_const |= ec2->ec_has_const; /* can't need to set has_volatile */ @@ -355,10 +403,9 @@ process_equivalence(PlannerInfo *root, root->eq_classes = list_delete_nth_cell(root->eq_classes, ec2_idx); /* just to avoid debugging confusion w/ dangling pointers: */ ec2->ec_members = NIL; - ec2->ec_sources = NIL; - ec2->ec_derives = NIL; + ec2->ec_source_indexes = NULL; + ec2->ec_derive_indexes = NULL; ec2->ec_relids = NULL; - ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_min_security = Min(ec1->ec_min_security, restrictinfo->security_level); ec1->ec_max_security = Max(ec1->ec_max_security, @@ -369,13 +416,13 @@ process_equivalence(PlannerInfo *root, /* mark the RI as usable with this pair of EMs */ restrictinfo->left_em = em1; restrictinfo->right_em = em2; + add_eq_source(root, ec1, restrictinfo); } else if (ec1) { /* Case 3: add item2 to ec1 */ em2 = add_eq_member(ec1, item2, item2_relids, jdomain, NULL, item2_type); - ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_min_security = Min(ec1->ec_min_security, restrictinfo->security_level); ec1->ec_max_security = Max(ec1->ec_max_security, @@ -386,13 +433,13 @@ process_equivalence(PlannerInfo *root, /* mark the RI as usable with this pair of EMs */ restrictinfo->left_em = em1; restrictinfo->right_em = em2; + add_eq_source(root, ec1, restrictinfo); } else if (ec2) { /* Case 3: add item1 to ec2 */ em1 = add_eq_member(ec2, item1, item1_relids, jdomain, NULL, item1_type); - ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); ec2->ec_min_security = Min(ec2->ec_min_security, restrictinfo->security_level); ec2->ec_max_security = Max(ec2->ec_max_security, @@ -403,6 +450,8 @@ process_equivalence(PlannerInfo *root, /* mark the RI as usable with this pair of EMs */ restrictinfo->left_em = em1; restrictinfo->right_em = em2; + + add_eq_source(root, ec2, restrictinfo); } else { @@ -412,8 +461,8 @@ process_equivalence(PlannerInfo *root, ec->ec_opfamilies = opfamilies; ec->ec_collation = collation; ec->ec_members = NIL; - ec->ec_sources = list_make1(restrictinfo); - ec->ec_derives = NIL; + ec->ec_source_indexes = NULL; + ec->ec_derive_indexes = NULL; ec->ec_relids = NULL; ec->ec_has_const = false; ec->ec_has_volatile = false; @@ -435,6 +484,8 @@ process_equivalence(PlannerInfo *root, /* mark the RI as usable with this pair of EMs */ restrictinfo->left_em = em1; restrictinfo->right_em = em2; + + add_eq_source(root, ec, restrictinfo); } return true; @@ -551,7 +602,6 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, return em; } - /* * get_eclass_for_sort_expr * Given an expression and opfamily/collation info, find an existing @@ -671,8 +721,8 @@ get_eclass_for_sort_expr(PlannerInfo *root, newec->ec_opfamilies = list_copy(opfamilies); newec->ec_collation = collation; newec->ec_members = NIL; - newec->ec_sources = NIL; - newec->ec_derives = NIL; + newec->ec_source_indexes = NULL; + newec->ec_derive_indexes = NULL; newec->ec_relids = NULL; newec->ec_has_const = false; newec->ec_has_volatile = contain_volatile_functions((Node *) expr); @@ -1026,7 +1076,7 @@ relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel, * scanning of the quals and before Path construction begins. * * We make no attempt to avoid generating duplicate RestrictInfos here: we - * don't search ec_sources or ec_derives for matches. It doesn't really + * don't search eq_sources or eq_derives for matches. It doesn't really * seem worth the trouble to do so. */ void @@ -1115,6 +1165,7 @@ generate_base_implied_equalities_const(PlannerInfo *root, { EquivalenceMember *const_em = NULL; ListCell *lc; + int i; /* * In the trivial case where we just had one "var = const" clause, push @@ -1124,9 +1175,9 @@ generate_base_implied_equalities_const(PlannerInfo *root, * equivalent to the old one. */ if (list_length(ec->ec_members) == 2 && - list_length(ec->ec_sources) == 1) + bms_get_singleton_member(ec->ec_source_indexes, &i)) { - RestrictInfo *restrictinfo = (RestrictInfo *) linitial(ec->ec_sources); + RestrictInfo *restrictinfo = list_nth_node(RestrictInfo, root->eq_sources, i); distribute_restrictinfo_to_rels(root, restrictinfo); return; @@ -1184,9 +1235,9 @@ generate_base_implied_equalities_const(PlannerInfo *root, /* * If the clause didn't degenerate to a constant, fill in the correct - * markings for a mergejoinable clause, and save it in ec_derives. (We + * markings for a mergejoinable clause, and save it in eq_derives. (We * will not re-use such clauses directly, but selectivity estimation - * may consult the list later. Note that this use of ec_derives does + * may consult the list later. Note that this use of eq_derives does * not overlap with its use for join clauses, since we never generate * join clauses from an ec_has_const eclass.) */ @@ -1196,7 +1247,8 @@ generate_base_implied_equalities_const(PlannerInfo *root, rinfo->left_ec = rinfo->right_ec = ec; rinfo->left_em = cur_em; rinfo->right_em = const_em; - ec->ec_derives = lappend(ec->ec_derives, rinfo); + + add_eq_derive(root, ec, rinfo); } } } @@ -1262,7 +1314,7 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, /* * If the clause didn't degenerate to a constant, fill in the * correct markings for a mergejoinable clause. We don't put it - * in ec_derives however; we don't currently need to re-find such + * in eq_derives however; we don't currently need to re-find such * clauses, and we don't want to clutter that list with non-join * clauses. */ @@ -1318,11 +1370,12 @@ static void generate_base_implied_equalities_broken(PlannerInfo *root, EquivalenceClass *ec) { - ListCell *lc; + int i = -1; - foreach(lc, ec->ec_sources) + while ((i = bms_next_member(ec->ec_source_indexes, i)) >= 0) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *restrictinfo = list_nth_node(RestrictInfo, + root->eq_sources, i); if (ec->ec_has_const || bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE) @@ -1363,11 +1416,11 @@ generate_base_implied_equalities_broken(PlannerInfo *root, * Because the same join clauses are likely to be needed multiple times as * we consider different join paths, we avoid generating multiple copies: * whenever we select a particular pair of EquivalenceMembers to join, - * we check to see if the pair matches any original clause (in ec_sources) - * or previously-built clause (in ec_derives). This saves memory and allows - * re-use of information cached in RestrictInfos. We also avoid generating - * commutative duplicates, i.e. if the algorithm selects "a.x = b.y" but - * we already have "b.y = a.x", we return the existing clause. + * we check to see if the pair matches any original clause in root's + * eq_sources or previously-built clause (in root's eq_derives). This saves + * memory and allows re-use of information cached in RestrictInfos. We also + * avoid generating commutative duplicates, i.e. if the algorithm selects + * "a.x = b.y" but we already have "b.y = a.x", we return the existing clause. * * If we are considering an outer join, sjinfo is the associated OJ info, * otherwise it can be NULL. @@ -1732,12 +1785,16 @@ generate_join_implied_equalities_broken(PlannerInfo *root, Relids nominal_inner_relids, RelOptInfo *inner_rel) { + Bitmapset *matching_es; List *result = NIL; - ListCell *lc; + int i; - foreach(lc, ec->ec_sources) + matching_es = get_ec_source_indexes(root, ec, nominal_join_relids); + i = -1; + while ((i = bms_next_member(matching_es, i)) >= 0) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *restrictinfo = list_nth_node(RestrictInfo, + root->eq_sources, i); Relids clause_relids = restrictinfo->required_relids; if (bms_is_subset(clause_relids, nominal_join_relids) && @@ -1749,12 +1806,12 @@ generate_join_implied_equalities_broken(PlannerInfo *root, /* * If we have to translate, just brute-force apply adjust_appendrel_attrs * to all the RestrictInfos at once. This will result in returning - * RestrictInfos that are not listed in ec_derives, but there shouldn't be + * RestrictInfos that are not listed in eq_derives, but there shouldn't be * any duplication, and it's a sufficiently narrow corner case that we * shouldn't sweat too much over it anyway. * * Since inner_rel might be an indirect descendant of the baserel - * mentioned in the ec_sources clauses, we have to be prepared to apply + * mentioned in the eq_sources clauses, we have to be prepared to apply * multiple levels of Var translation. */ if (IS_OTHER_REL(inner_rel) && result != NIL) @@ -1816,10 +1873,11 @@ create_join_clause(PlannerInfo *root, EquivalenceMember *rightem, EquivalenceClass *parent_ec) { + Bitmapset *matches; RestrictInfo *rinfo; RestrictInfo *parent_rinfo = NULL; - ListCell *lc; MemoryContext oldcontext; + int i; /* * Search to see if we already built a RestrictInfo for this pair of @@ -1830,9 +1888,12 @@ create_join_clause(PlannerInfo *root, * it's not identical, it'd better have the same effects, or the operator * families we're using are broken. */ - foreach(lc, ec->ec_sources) + matches = bms_int_members(get_ec_source_indexes_strict(root, ec, leftem->em_relids), + get_ec_source_indexes_strict(root, ec, rightem->em_relids)); + i = -1; + while ((i = bms_next_member(matches, i)) >= 0) { - rinfo = (RestrictInfo *) lfirst(lc); + rinfo = list_nth_node(RestrictInfo, root->eq_sources, i); if (rinfo->left_em == leftem && rinfo->right_em == rightem && rinfo->parent_ec == parent_ec) @@ -1843,9 +1904,13 @@ create_join_clause(PlannerInfo *root, return rinfo; } - foreach(lc, ec->ec_derives) + matches = bms_int_members(get_ec_derive_indexes_strict(root, ec, leftem->em_relids), + get_ec_derive_indexes_strict(root, ec, rightem->em_relids)); + + i = -1; + while ((i = bms_next_member(matches, i)) >= 0) { - rinfo = (RestrictInfo *) lfirst(lc); + rinfo = list_nth_node(RestrictInfo, root->eq_derives, i); if (rinfo->left_em == leftem && rinfo->right_em == rightem && rinfo->parent_ec == parent_ec) @@ -1903,7 +1968,7 @@ create_join_clause(PlannerInfo *root, rinfo->left_em = leftem; rinfo->right_em = rightem; /* and save it for possible re-use */ - ec->ec_derives = lappend(ec->ec_derives, rinfo); + add_eq_derive(root, ec, rinfo); MemoryContextSwitchTo(oldcontext); @@ -2578,16 +2643,19 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root, * Returns NULL if no such clause can be found. */ RestrictInfo * -find_derived_clause_for_ec_member(EquivalenceClass *ec, +find_derived_clause_for_ec_member(PlannerInfo *root, EquivalenceClass *ec, EquivalenceMember *em) { - ListCell *lc; + int i; Assert(ec->ec_has_const); Assert(!em->em_is_const); - foreach(lc, ec->ec_derives) + + i = -1; + while ((i = bms_next_member(ec->ec_derive_indexes, i)) >= 0) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = list_nth_node(RestrictInfo, root->eq_derives, + i); /* * generate_base_implied_equalities_const will have put non-const @@ -3063,7 +3131,7 @@ have_relevant_eclass_joinclause(PlannerInfo *root, * surely be true if both of them overlap ec_relids.) * * Note we don't test ec_broken; if we did, we'd need a separate code - * path to look through ec_sources. Checking the membership anyway is + * path to look through eq_sources. Checking the membership anyway is * OK as a possibly-overoptimistic heuristic. * * We don't test ec_has_const either, even though a const eclass won't @@ -3151,7 +3219,7 @@ eclass_useful_for_merging(PlannerInfo *root, /* * Note we don't test ec_broken; if we did, we'd need a separate code path - * to look through ec_sources. Checking the members anyway is OK as a + * to look through eq_sources. Checking the members anyway is OK as a * possibly-overoptimistic heuristic. */ @@ -3304,3 +3372,167 @@ get_common_eclass_indexes(PlannerInfo *root, Relids relids1, Relids relids2) /* Calculate and return the common EC indexes, recycling the left input. */ return bms_int_members(rel1ecs, rel2ecs); } + +/* + * get_ec_source_indexes + * Returns a Bitmapset with indexes into root->eq_sources for all + * RestrictInfos in 'ec' that have + * bms_overlap(relids, rinfo->clause_relids) as true. + * + * Returns a newly allocated Bitmapset which the caller is free to modify. + */ +Bitmapset * +get_ec_source_indexes(PlannerInfo *root, EquivalenceClass *ec, Relids relids) +{ + Bitmapset *rel_esis = NULL; + int i = -1; + + while ((i = bms_next_member(relids, i)) >= 0) + { + RangeTblEntry *rte = root->simple_rte_array[i]; + + rel_esis = bms_add_members(rel_esis, rte->eclass_source_indexes); + } + + /* bitwise-AND to leave only the ones for this EquivalenceClass */ + return bms_int_members(rel_esis, ec->ec_source_indexes); +} + +/* + * get_ec_source_indexes_strict + * Returns a Bitmapset with indexes into root->eq_sources for all + * RestrictInfos in 'ec' that have + * bms_is_subset(relids, rinfo->clause_relids) as true. + * + * Returns a newly allocated Bitmapset which the caller is free to modify. + */ +Bitmapset * +get_ec_source_indexes_strict(PlannerInfo *root, EquivalenceClass *ec, + Relids relids) +{ + Bitmapset *esis = NULL; + int i = bms_next_member(relids, -1); + + if (i >= 0) + { + RangeTblEntry *rte = root->simple_rte_array[i]; + + /* + * bms_intersect to the first relation to try to keep the resulting + * Bitmapset as small as possible. This saves having to make a + * complete bms_copy() of one of them. One may contain significantly + * more words than the other. + */ + esis = bms_intersect(ec->ec_source_indexes, + rte->eclass_source_indexes); + + while ((i = bms_next_member(relids, i)) >= 0) + { + rte = root->simple_rte_array[i]; + esis = bms_int_members(esis, rte->eclass_source_indexes); + } + } + +#ifdef USE_ASSERT_CHECKING + /* verify the results look sane */ + i = -1; + while ((i = bms_next_member(esis, i)) >= 0) + { + RestrictInfo *rinfo = list_nth_node(RestrictInfo, root->eq_sources, + i); + + Assert(bms_is_subset(relids, rinfo->clause_relids)); + } +#endif + + return esis; +} + +/* + * get_ec_derive_indexes + * Returns a Bitmapset with indexes into root->eq_derives for all + * RestrictInfos in 'ec' that have + * bms_overlap(relids, rinfo->clause_relids) as true. + * + * Returns a newly allocated Bitmapset which the caller is free to modify. + * + * XXX is this function even needed? + */ +Bitmapset * +get_ec_derive_indexes(PlannerInfo *root, EquivalenceClass *ec, Relids relids) +{ + Bitmapset *rel_edis = NULL; + int i = -1; + + while ((i = bms_next_member(relids, i)) >= 0) + { + RangeTblEntry *rte = root->simple_rte_array[i]; + + rel_edis = bms_add_members(rel_edis, rte->eclass_derive_indexes); + } + +#ifdef USE_ASSERT_CHECKING + /* verify the results look sane */ + i = -1; + while ((i = bms_next_member(rel_edis, i)) >= 0) + { + RestrictInfo *rinfo = list_nth_node(RestrictInfo, root->eq_derives, + i); + + Assert(bms_overlap(relids, rinfo->clause_relids)); + } +#endif + + /* bitwise-AND to leave only the ones for this EquivalenceClass */ + return bms_int_members(rel_edis, ec->ec_derive_indexes); +} + +/* + * get_ec_derive_indexes_strict + * Returns a Bitmapset with indexes into root->eq_derives for all + * RestrictInfos in 'ec' that have + * bms_is_subset(relids, rinfo->clause_relids) as true. + * + * Returns a newly allocated Bitmapset which the caller is free to modify. + */ +Bitmapset * +get_ec_derive_indexes_strict(PlannerInfo *root, EquivalenceClass *ec, + Relids relids) +{ + Bitmapset *edis = NULL; + int i = bms_next_member(relids, -1); + + if (i >= 0) + { + RangeTblEntry *rte = root->simple_rte_array[i]; + + /* + * bms_intersect to the first relation to try to keep the resulting + * Bitmapset as small as possible. This saves having to make a + * complete bms_copy() of one of them. One may contain significantly + * more words than the other. + */ + edis = bms_intersect(ec->ec_derive_indexes, + rte->eclass_derive_indexes); + + while ((i = bms_next_member(relids, i)) >= 0) + { + rte = root->simple_rte_array[i]; + edis = bms_int_members(edis, rte->eclass_derive_indexes); + } + } + +#ifdef USE_ASSERT_CHECKING + /* verify the results look sane */ + i = -1; + while ((i = bms_next_member(edis, i)) >= 0) + { + RestrictInfo *rinfo = list_nth_node(RestrictInfo, root->eq_derives, + i); + + Assert(bms_is_subset(relids, rinfo->clause_relids)); + } +#endif + + return edis; +} diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index b9be19a687f..92dcbbdf2e0 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -61,7 +61,7 @@ static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo); static void remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid); -static void remove_rel_from_eclass(EquivalenceClass *ec, +static void remove_rel_from_eclass(PlannerInfo *root, EquivalenceClass *ec, int relid, int ojrelid); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); @@ -578,7 +578,7 @@ remove_leftjoinrel_from_query(PlannerInfo *root, int relid, if (bms_is_member(relid, ec->ec_relids) || bms_is_member(ojrelid, ec->ec_relids)) - remove_rel_from_eclass(ec, relid, ojrelid); + remove_rel_from_eclass(root, ec, relid, ojrelid); } /* @@ -663,9 +663,10 @@ remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid) * level(s). */ static void -remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid) +remove_rel_from_eclass(PlannerInfo *root, EquivalenceClass *ec, int relid, int ojrelid) { ListCell *lc; + int i; /* Fix up the EC's overall relids */ ec->ec_relids = bms_del_member(ec->ec_relids, relid); @@ -692,9 +693,10 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid) } /* Fix up the source clauses, in case we can re-use them later */ - foreach(lc, ec->ec_sources) + i = -1; + while ((i = bms_next_member(ec->ec_source_indexes, i)) >= 0) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = list_nth_node(RestrictInfo, root->eq_sources, i); remove_rel_from_restrictinfo(rinfo, relid, ojrelid); } @@ -704,7 +706,7 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid) * drop them. (At this point, any such clauses would be base restriction * clauses, which we'd not need anymore anyway.) */ - ec->ec_derives = NIL; + ec->ec_derive_indexes = NULL; } /* @@ -1528,6 +1530,24 @@ replace_relid(Relids relids, int oldId, int newId) return relids; } +void +del_eq_derives(PlannerInfo *root, EquivalenceClass *ec) +{ + int i = -1; + + while ((i = bms_next_member(ec->ec_derive_indexes, i)) >= 0) + { + /* + * Can't delete the element because we would need to rebuild all + * the eq_derives indexes. But add a nuke to detect potential problems. + */ + list_nth_cell(root->eq_derives, i)->ptr_value = NULL; + } + bms_free(ec->ec_derive_indexes); + ec->ec_derive_indexes = NULL; +} + + /* * Update EC members to point to the remaining relation instead of the removed * one, removing duplicates. @@ -1551,10 +1571,11 @@ replace_relid(Relids relids, int oldId, int newId) * delete them. */ static void -update_eclasses(EquivalenceClass *ec, int from, int to) +update_eclass(PlannerInfo *root, EquivalenceClass *ec, int from, int to) { List *new_members = NIL; List *new_sources = NIL; + int i = -1; ListCell *lc; ListCell *lc1; @@ -1593,16 +1614,13 @@ update_eclasses(EquivalenceClass *ec, int from, int to) new_members = lappend(new_members, em); } - list_free(ec->ec_members); - ec->ec_members = new_members; - - list_free(ec->ec_derives); - ec->ec_derives = NULL; + del_eq_derives(root, ec); /* Update EC source expressions */ - foreach(lc, ec->ec_sources) + while ((i = bms_next_member(ec->ec_source_indexes, i)) > 0) { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + RestrictInfo *rinfo = list_nth_node(RestrictInfo, + root->eq_sources, i); bool is_redundant = false; if (!bms_is_member(from, rinfo->required_relids)) @@ -1636,8 +1654,21 @@ update_eclasses(EquivalenceClass *ec, int from, int to) new_sources = lappend(new_sources, rinfo); } - list_free(ec->ec_sources); - ec->ec_sources = new_sources; + i=-1; + + while ((i = bms_next_member(ec->ec_source_indexes, i)) > 0) + { + list_nth_cell(root->eq_sources, i)->ptr_value = NULL; + } + bms_free(ec->ec_source_indexes); + ec->ec_source_indexes = NULL; + + foreach(lc, new_sources) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + add_eq_source(root, ec, rinfo); + } + ec->ec_relids = replace_relid(ec->ec_relids, from, to); } @@ -1815,7 +1846,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, { EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i); - update_eclasses(ec, toRemove->relid, toKeep->relid); + update_eclass(root, ec, toRemove->relid, toKeep->relid); toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i); } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a8cea5efe14..773d703c881 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -646,6 +646,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->multiexpr_params = NIL; root->join_domains = NIL; root->eq_classes = NIL; + root->eq_sources = NIL; + root->eq_derives = NIL; root->ec_merging_done = false; root->last_rinfo_serial = 0; root->all_result_relids = diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 73ff40721c9..1cce43a94e0 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -993,6 +993,8 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, subroot->multiexpr_params = NIL; subroot->join_domains = NIL; subroot->eq_classes = NIL; + subroot->eq_sources = NIL; + subroot->eq_derives = NIL; subroot->ec_merging_done = false; subroot->last_rinfo_serial = 0; subroot->all_result_relids = NULL; diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index f9d3ff1e7ac..3d7671e2e85 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -482,6 +482,13 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, */ childrte = makeNode(RangeTblEntry); memcpy(childrte, parentrte, sizeof(RangeTblEntry)); + /* + * We do not want to inherit the EquivalenceMember indexes of the parent + * to its child + */ + childrte->eclass_source_indexes = NULL; + childrte->eclass_derive_indexes = NULL; + Assert(parentrte->rtekind == RTE_RELATION); /* else this is dubious */ childrte->relid = childOID; childrte->relkind = childrel->rd_rel->relkind; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index e494309da8d..475a65475e6 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1194,6 +1194,12 @@ typedef struct RangeTblEntry bool inh; /* inheritance requested? */ bool inFromCl; /* present in FROM clause? */ List *securityQuals; /* security barrier quals to apply, if any */ + Bitmapset *eclass_source_indexes; /* Indexes in PlannerInfo's eq_sources + * list for RestrictInfos that mention + * this relation */ + Bitmapset *eclass_derive_indexes; /* Indexes in PlannerInfo's eq_derives + * list for RestrictInfos that mention + * this relation */ } RangeTblEntry; /* diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index ed85dc7414b..339e09e6abf 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -310,6 +310,12 @@ struct PlannerInfo /* list of active EquivalenceClasses */ List *eq_classes; + /* list of source RestrictInfos used to build EquivalenceClasses */ + List *eq_sources; + + /* list of RestrictInfos derived from EquivalenceClasses */ + List *eq_derives; + /* set true once ECs are canonical */ bool ec_merging_done; @@ -1350,6 +1356,41 @@ typedef struct JoinDomain * entry: consider SELECT random() AS a, random() AS b ... ORDER BY b,a. * So we record the SortGroupRef of the originating sort clause. * + * TODO: We should update the following comments. + * + * At various locations in the query planner, we must search for + * EquivalenceMembers within a given EquivalenceClass. For the common case, + * an EquivalenceClass does not have a large number of EquivalenceMembers, + * however, in cases such as planning queries to partitioned tables, the number + * of members can become large. To maintain planning performance, we make use + * of a bitmap index to allow us to quickly find EquivalenceMembers in a given + * EquivalenceClass belonging to a given relation or set of relations. This + * is done by storing a list of EquivalenceMembers belonging to all + * EquivalenceClasses in PlannerInfo and storing a Bitmapset for each + * RelOptInfo which has a bit set for each EquivalenceMember in that list + * which relates to the given relation. We also store a Bitmapset to mark all + * of the indexes in the PlannerInfo's list of EquivalenceMembers in the + * EquivalenceClass. We can quickly find the interesting indexes into the + * PlannerInfo's list by performing a bit-wise AND on the RelOptInfo's + * Bitmapset and the EquivalenceClasses. + * + * To further speed up the lookup of EquivalenceMembers we also record the + * non-child indexes. This allows us to deal with fewer bits for searches + * that don't require "is_child" EquivalenceMembers. We must also store the + * indexes into EquivalenceMembers which have no relids mentioned as some + * searches require that. + * + * Additionally, we also store the EquivalenceMembers of a given + * EquivalenceClass in the ec_members list. Technically, we could obtain this + * from looking at the bits in ec_member_indexes, however, finding all members + * of a given EquivalenceClass is common enough that it pays to have fast + * access to a dense list containing all members. + * + * The source and derived RestrictInfos are indexed in a similar method, + * although we don't maintain a List in the EquivalenceClass for these. These + * must be looked up in PlannerInfo using the ec_source_indexes and + * ec_derive_indexes Bitmapsets. + * * NB: if ec_merged isn't NULL, this class has been merged into another, and * should be ignored in favor of using the pointed-to class. * @@ -1368,8 +1409,10 @@ typedef struct EquivalenceClass List *ec_opfamilies; /* btree operator family OIDs */ Oid ec_collation; /* collation, if datatypes are collatable */ List *ec_members; /* list of EquivalenceMembers */ - List *ec_sources; /* list of generating RestrictInfos */ - List *ec_derives; /* list of derived RestrictInfos */ + Bitmapset *ec_source_indexes; /* indexes into PlannerInfo's eq_sources + * list of generating RestrictInfos */ + Bitmapset *ec_derive_indexes; /* indexes into PlannerInfo's eq_derives + * list of derived RestrictInfos */ Relids ec_relids; /* all relids appearing in ec_members, except * for child members (see below) */ bool ec_has_const; /* any pseudoconstants in ec_members? */ diff --git a/src/include/nodes/readfuncs.h b/src/include/nodes/readfuncs.h index cba6f0be75a..2fa68e59cdf 100644 --- a/src/include/nodes/readfuncs.h +++ b/src/include/nodes/readfuncs.h @@ -29,6 +29,8 @@ extern PGDLLIMPORT bool restore_location_fields; extern const char *pg_strtok(int *length); extern char *debackslash(const char *token, int length); extern void *nodeRead(const char *token, int tok_len); +extern void pg_strtok_save_context(const char **pcontext); +extern void pg_strtok_restore_context(const char *context); /* * prototypes for functions in readfuncs.c diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9e7408c7ecf..83695e2f66f 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -111,6 +111,9 @@ extern bool have_join_order_restriction(PlannerInfo *root, extern bool have_dangerous_phv(PlannerInfo *root, Relids outer_relids, Relids inner_params); extern void mark_dummy_rel(RelOptInfo *rel); +extern void del_eq_derives(PlannerInfo *root, EquivalenceClass *ec); +extern void add_eq_source(PlannerInfo *root, EquivalenceClass *ec, RestrictInfo *rinfo); + /* * equivclass.c @@ -162,7 +165,8 @@ extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2); extern EquivalenceClass *match_eclasses_to_foreign_key_col(PlannerInfo *root, ForeignKeyOptInfo *fkinfo, int colno); -extern RestrictInfo *find_derived_clause_for_ec_member(EquivalenceClass *ec, +extern RestrictInfo *find_derived_clause_for_ec_member(PlannerInfo *root, + EquivalenceClass *ec, EquivalenceMember *em); extern void add_child_rel_equivalences(PlannerInfo *root, AppendRelInfo *appinfo, @@ -188,6 +192,18 @@ extern bool eclass_useful_for_merging(PlannerInfo *root, extern bool is_redundant_derived_clause(RestrictInfo *rinfo, List *clauselist); extern bool is_redundant_with_indexclauses(RestrictInfo *rinfo, List *indexclauses); +extern Bitmapset *get_ec_source_indexes(PlannerInfo *root, + EquivalenceClass *ec, + Relids relids); +extern Bitmapset *get_ec_source_indexes_strict(PlannerInfo *root, + EquivalenceClass *ec, + Relids relids); +extern Bitmapset *get_ec_derive_indexes(PlannerInfo *root, + EquivalenceClass *ec, + Relids relids); +extern Bitmapset *get_ec_derive_indexes_strict(PlannerInfo *root, + EquivalenceClass *ec, + Relids relids); /* * pathkeys.c -- 2.34.1