From 95cefd94e2130bf7802c5d1a95fd9e212a78f06b Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Mon, 31 Mar 2025 13:50:01 +0530 Subject: [PATCH 4/4] Canonical keys for ec_derives_hash The derived clauses are looked up by the EMs that they contain by providing the EMs in the same order as they appear in the clause or in commuted order. We use canonical key to store and search the clauses either way. The lower of the EM pointers is used as the first part of the key and other EM is used as the second part of the key. parent_ec is the third part of the key. This avoids storing each derived clause twice with keys corresponding to either order of EMs. Similary it avoids looking up a clause twice using either order of given EMs. The derived clauses containing constant EM are looked with a key with NULL as the first part, non-constant EM pointer as the second part and parent_ec as the third part. Since this reduces the number of entries in the hash table at the time of its creation by half, the patches reduces the specified initial size of hash table by half. NOTE: This patch relies on the C pointer comparison operators to be stable in the sense that they will return the same value every time the same operands are passed to them in the same order. While according to the C standard, the result of comparison between pointers within the same array or a struct is specified, that between pointers from two different objects is unspecified. The existing code relies on the EM pointers being stable and also relies on equality between them to be stable. It has withstood the test of time and a variety of compilers. Hence it should be safe to rely on pointer comparisons being stable. Further, if we rely on NULL pointer being always lower than any of non-NULL pointers, we could avoid one condition each while inserting the clause and looking up. According to C standard, a NULL pointer has value 0 but it doesn't specify its order with respect to non-NULL pointers. Author: Ashutosh Bapat, per suggestion by David Rowley --- src/backend/optimizer/path/equivclass.c | 86 +++++++++++++------------ 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 76b8179e285..51a333469c5 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -3450,7 +3450,7 @@ get_common_eclass_indexes(PlannerInfo *root, Relids relids1, Relids relids2) /* * ec_build_derives_hash - * Construct the auxiliary hash table for derived clauses. + * Construct the auxiliary hash table for derived clause lookups. */ static void ec_build_derives_hash(PlannerInfo *root, EquivalenceClass *ec) @@ -3460,15 +3460,12 @@ ec_build_derives_hash(PlannerInfo *root, EquivalenceClass *ec) /* * Create the hash table. * - * ec_add_clause_to_derives_hash() adds two entries for every clause which - * does not have a constant EM in it. Hence initially the hash table may - * contain twice the number of derived clauses. Specify the initial hash - * table size as four times the number of existing derived clauses in the - * list so that we don't require to expand the hash table immediately when - * inserting the next clause. + * Specify the initial hash table size as twice the number of existing + * derived clauses in the list so that we don't require to expand the hash + * table immediately when inserting the next clause. */ ec->ec_derives_hash = derives_create(root->planner_cxt, - list_length(ec->ec_derives_list) * 4, + list_length(ec->ec_derives_list) * 2, NULL); foreach_node(RestrictInfo, rinfo, ec->ec_derives_list) @@ -3533,14 +3530,13 @@ ec_add_clause_to_derives_hash(EquivalenceClass *ec, RestrictInfo *rinfo) Assert(!rinfo->parent_ec); /* - * find_derived_clause_for_ec_member() performs lookup of a clause - * involving a constant using only the non-constant EM and NULL for - * the RHS. Since there's only one constant EM per EC, we don't need - * to store or match it during lookup. We set key.em2 = NULL to - * reflect this. + * find_derived_clause_for_ec_member() looks up a clause involving a + * constant EM using only the non-constant EM. Since there's only one + * constant EM per EC, we don't need to store or match it during + * lookup. We set key.em1 = NULL to reflect this. */ - key.em1 = rinfo->left_em; - key.em2 = NULL; + key.em1 = NULL; + key.em2 = rinfo->left_em;; key.parent_ec = rinfo->parent_ec; entry = derives_insert(ec->ec_derives_hash, key, &found); Assert(!found); @@ -3548,20 +3544,23 @@ ec_add_clause_to_derives_hash(EquivalenceClass *ec, RestrictInfo *rinfo) } else { - key.em1 = rinfo->left_em; - key.em2 = rinfo->right_em; - key.parent_ec = rinfo->parent_ec; - entry = derives_insert(ec->ec_derives_hash, key, &found); - Assert(!found); - entry->rinfo = rinfo; - /* - * Insert the clause under the given EM pair key, and also under the - * reverse order. This ensures we can find the clause regardless of - * the order in which EMs are passed to the lookup function. + * A clause may be looked up by EMs in the same order in which they + * appear in the clause or in commuted order. To serve both the + * requests, store the clause using a canonical key with the lower of + * the EM pointers as first part of the key and the other as the + * second part of the key. */ - key.em1 = rinfo->right_em; - key.em2 = rinfo->left_em; + if (rinfo->left_em < rinfo->right_em) + { + key.em1 = rinfo->left_em; + key.em2 = rinfo->right_em; + } + else + { + key.em1 = rinfo->right_em; + key.em2 = rinfo->left_em; + } key.parent_ec = rinfo->parent_ec; entry = derives_insert(ec->ec_derives_hash, key, &found); Assert(!found); @@ -3629,7 +3628,7 @@ ec_search_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec, /* * ec_search_derived_clause_for_ems - * Search for an existing derived clause between two EquivalenceMembers. + * Search an existing derived clause containing given EquivalenceMembers. * * If the number of derived clauses exceeds a threshold, switch to hash table * lookup; otherwise, scan ec_derives_list linearly. @@ -3638,9 +3637,6 @@ ec_search_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec, * (leftem) and a NULL rightem. In that case, we expect to find a clause with * a constant on the RHS. * - * We do not attempt a second lookup with EMs swapped when using the hash - * table; such clauses are inserted under both orderings at the time of - * insertion. */ static RestrictInfo * ec_search_derived_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec, @@ -3663,23 +3659,30 @@ ec_search_derived_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec, ECDerivesEntry *entry; /* - * See ec_add_clause_to_derives_hash() for rationale: derived clauses - * are inserted into the hash table under both (em1, em2) and (em2, - * em1), so a single lookup with the original order is sufficient. + * Construct a canonical key with given EMs. See + * ec_add_clause_to_derives_hash() for rationale. */ - key.em1 = leftem; - key.em2 = rightem; + if (!rightem) + { + key.em1 = NULL; + key.em2 = leftem; + } + if (leftem < rightem) + { + key.em1 = leftem; + key.em2 = rightem; + } + else + { + key.em1 = rightem; + key.em2 = leftem; + } key.parent_ec = parent_ec; entry = derives_lookup(ec->ec_derives_hash, key); if (entry) { rinfo = entry->rinfo; Assert(rinfo); - - /* - * If this is a lookup in a const-containing EC, the RHS must be a - * constant. The caller signals this by passing NULL for rightem. - */ Assert(rightem || rinfo->right_em->em_is_const); return rinfo; } @@ -3693,7 +3696,6 @@ ec_search_derived_clause_for_ems(PlannerInfo *root, EquivalenceClass *ec, if (!rightem && rinfo->left_em == leftem) { - /* See the comment above in hash path for rationale. */ Assert(rinfo->right_em->em_is_const); return rinfo; } -- 2.34.1