From 44aca769b993d8d2e0882e6494fd8fd5e583b3de Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Mon, 19 Feb 2024 15:16:51 +0800 Subject: [PATCH v3 1/9] Introduce RelInfoList structure This commit introduces the RelInfoList structure, which encapsulates both a list and a hash table, so that we can leverage the hash table for faster lookups not only for join relations but also for upper relations. --- contrib/postgres_fdw/postgres_fdw.c | 3 +- src/backend/optimizer/geqo/geqo_eval.c | 20 +-- src/backend/optimizer/path/allpaths.c | 7 +- src/backend/optimizer/plan/planmain.c | 5 +- src/backend/optimizer/util/relnode.c | 164 ++++++++++++++----------- src/include/nodes/pathnodes.h | 31 +++-- 6 files changed, 133 insertions(+), 97 deletions(-) diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 142dcfc995..f46fc604b4 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -6069,7 +6069,8 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, */ Assert(fpinfo->relation_index == 0); /* shouldn't be set yet */ fpinfo->relation_index = - list_length(root->parse->rtable) + list_length(root->join_rel_list); + list_length(root->parse->rtable) + + list_length(root->join_rel_list->items); return true; } diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index d2f7f4e5f3..1141156899 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -85,18 +85,18 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) * truncating the list to its original length. NOTE this assumes that any * added entries are appended at the end! * - * We also must take care not to mess up the outer join_rel_hash, if there - * is one. We can do this by just temporarily setting the link to NULL. - * (If we are dealing with enough join rels, which we very likely are, a - * new hash table will get built and used locally.) + * We also must take care not to mess up the outer join_rel_list->hash, if + * there is one. We can do this by just temporarily setting the link to + * NULL. (If we are dealing with enough join rels, which we very likely + * are, a new hash table will get built and used locally.) * * join_rel_level[] shouldn't be in use, so just Assert it isn't. */ - savelength = list_length(root->join_rel_list); - savehash = root->join_rel_hash; + savelength = list_length(root->join_rel_list->items); + savehash = root->join_rel_list->hash; Assert(root->join_rel_level == NULL); - root->join_rel_hash = NULL; + root->join_rel_list->hash = NULL; /* construct the best path for the given combination of relations */ joinrel = gimme_tree(root, tour, num_gene); @@ -121,9 +121,9 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) * Restore join_rel_list to its former state, and put back original * hashtable if any. */ - root->join_rel_list = list_truncate(root->join_rel_list, - savelength); - root->join_rel_hash = savehash; + root->join_rel_list->items = list_truncate(root->join_rel_list->items, + savelength); + root->join_rel_list->hash = savehash; /* release all the memory acquired within gimme_tree */ MemoryContextSwitchTo(oldcxt); diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 0b98f0856e..f8a5fbcb0a 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -3410,9 +3410,10 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist) * needed for these paths need have been instantiated. * * Note to plugin authors: the functions invoked during standard_join_search() - * modify root->join_rel_list and root->join_rel_hash. If you want to do more - * than one join-order search, you'll probably need to save and restore the - * original states of those data structures. See geqo_eval() for an example. + * modify root->join_rel_list->items and root->join_rel_list->hash. If you + * want to do more than one join-order search, you'll probably need to save and + * restore the original states of those data structures. See geqo_eval() for + * an example. */ RelOptInfo * standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 075d36c7ec..eb78e37317 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -64,8 +64,9 @@ query_planner(PlannerInfo *root, * NOTE: append_rel_list was set up by subquery_planner, so do not touch * here. */ - root->join_rel_list = NIL; - root->join_rel_hash = NULL; + root->join_rel_list = makeNode(RelInfoList); + root->join_rel_list->items = NIL; + root->join_rel_list->hash = NULL; root->join_rel_level = NULL; root->join_cur_level = 0; root->canon_pathkeys = NIL; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index e5f4062bfb..9e25750acd 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -35,11 +35,15 @@ #include "utils/lsyscache.h" -typedef struct JoinHashEntry +/* + * An entry of a hash table that we use to make lookup for RelOptInfo + * structures more efficient. + */ +typedef struct RelInfoEntry { - Relids join_relids; /* hash key --- MUST BE FIRST */ - RelOptInfo *join_rel; -} JoinHashEntry; + Relids relids; /* hash key --- MUST BE FIRST */ + RelOptInfo *rel; +} RelInfoEntry; static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *input_rel, @@ -472,11 +476,11 @@ find_base_rel_ignore_join(PlannerInfo *root, int relid) } /* - * build_join_rel_hash - * Construct the auxiliary hash table for join relations. + * build_rel_hash + * Construct the auxiliary hash table for relations. */ static void -build_join_rel_hash(PlannerInfo *root) +build_rel_hash(RelInfoList *list) { HTAB *hashtab; HASHCTL hash_ctl; @@ -484,47 +488,49 @@ build_join_rel_hash(PlannerInfo *root) /* Create the hash table */ hash_ctl.keysize = sizeof(Relids); - hash_ctl.entrysize = sizeof(JoinHashEntry); + hash_ctl.entrysize = sizeof(RelInfoEntry); hash_ctl.hash = bitmap_hash; hash_ctl.match = bitmap_match; hash_ctl.hcxt = CurrentMemoryContext; - hashtab = hash_create("JoinRelHashTable", + hashtab = hash_create("RelHashTable", 256L, &hash_ctl, HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); - /* Insert all the already-existing joinrels */ - foreach(l, root->join_rel_list) + /* Insert all the already-existing relations */ + foreach(l, list->items) { RelOptInfo *rel = (RelOptInfo *) lfirst(l); - JoinHashEntry *hentry; + RelInfoEntry *hentry; bool found; - hentry = (JoinHashEntry *) hash_search(hashtab, - &(rel->relids), - HASH_ENTER, - &found); + hentry = (RelInfoEntry *) hash_search(hashtab, + &(rel->relids), + HASH_ENTER, + &found); Assert(!found); - hentry->join_rel = rel; + hentry->rel = rel; } - root->join_rel_hash = hashtab; + list->hash = hashtab; } /* - * find_join_rel - * Returns relation entry corresponding to 'relids' (a set of RT indexes), - * or NULL if none exists. This is for join relations. + * find_rel_info + * Find an RelOptInfo entry. */ -RelOptInfo * -find_join_rel(PlannerInfo *root, Relids relids) +static RelOptInfo * +find_rel_info(RelInfoList *list, Relids relids) { + if (list == NULL) + return NULL; + /* * Switch to using hash lookup when list grows "too long". The threshold * is arbitrary and is known only here. */ - if (!root->join_rel_hash && list_length(root->join_rel_list) > 32) - build_join_rel_hash(root); + if (!list->hash && list_length(list->items) > 32) + build_rel_hash(list); /* * Use either hashtable lookup or linear search, as appropriate. @@ -534,23 +540,23 @@ find_join_rel(PlannerInfo *root, Relids relids) * so would force relids out of a register and thus probably slow down the * list-search case. */ - if (root->join_rel_hash) + if (list->hash) { Relids hashkey = relids; - JoinHashEntry *hentry; + RelInfoEntry *hentry; - hentry = (JoinHashEntry *) hash_search(root->join_rel_hash, - &hashkey, - HASH_FIND, - NULL); + hentry = (RelInfoEntry *) hash_search(list->hash, + &hashkey, + HASH_FIND, + NULL); if (hentry) - return hentry->join_rel; + return hentry->rel; } else { ListCell *l; - foreach(l, root->join_rel_list) + foreach(l, list->items) { RelOptInfo *rel = (RelOptInfo *) lfirst(l); @@ -562,6 +568,54 @@ find_join_rel(PlannerInfo *root, Relids relids) return NULL; } +/* + * find_join_rel + * Returns relation entry corresponding to 'relids' (a set of RT indexes), + * or NULL if none exists. This is for join relations. + */ +RelOptInfo * +find_join_rel(PlannerInfo *root, Relids relids) +{ + return find_rel_info(root->join_rel_list, relids); +} + +/* + * add_rel_info + * Add given relation to the given list. Also add it to the auxiliary + * hashtable if there is one. + */ +static void +add_rel_info(RelInfoList *list, RelOptInfo *rel) +{ + /* GEQO requires us to append the new relation to the end of the list! */ + list->items = lappend(list->items, rel); + + /* store it into the auxiliary hashtable if there is one. */ + if (list->hash) + { + RelInfoEntry *hentry; + bool found; + + hentry = (RelInfoEntry *) hash_search(list->hash, + &(rel->relids), + HASH_ENTER, + &found); + Assert(!found); + hentry->rel = rel; + } +} + +/* + * add_join_rel + * Add given join relation to the list of join relations in the given + * PlannerInfo. + */ +static void +add_join_rel(PlannerInfo *root, RelOptInfo *joinrel) +{ + add_rel_info(root->join_rel_list, joinrel); +} + /* * set_foreign_rel_properties * Set up foreign-join fields if outer and inner relation are foreign @@ -611,32 +665,6 @@ set_foreign_rel_properties(RelOptInfo *joinrel, RelOptInfo *outer_rel, } } -/* - * add_join_rel - * Add given join relation to the list of join relations in the given - * PlannerInfo. Also add it to the auxiliary hashtable if there is one. - */ -static void -add_join_rel(PlannerInfo *root, RelOptInfo *joinrel) -{ - /* GEQO requires us to append the new joinrel to the end of the list! */ - root->join_rel_list = lappend(root->join_rel_list, joinrel); - - /* store it into the auxiliary hashtable if there is one. */ - if (root->join_rel_hash) - { - JoinHashEntry *hentry; - bool found; - - hentry = (JoinHashEntry *) hash_search(root->join_rel_hash, - &(joinrel->relids), - HASH_ENTER, - &found); - Assert(!found); - hentry->join_rel = joinrel; - } -} - /* * build_join_rel * Returns relation entry corresponding to the union of two given rels, @@ -1462,22 +1490,14 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel, RelOptInfo * fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids) { + RelInfoList *list = &root->upper_rels[kind]; RelOptInfo *upperrel; - ListCell *lc; - - /* - * For the moment, our indexing data structure is just a List for each - * relation kind. If we ever get so many of one kind that this stops - * working well, we can improve it. No code outside this function should - * assume anything about how to find a particular upperrel. - */ /* If we already made this upperrel for the query, return it */ - foreach(lc, root->upper_rels[kind]) + if (list) { - upperrel = (RelOptInfo *) lfirst(lc); - - if (bms_equal(upperrel->relids, relids)) + upperrel = find_rel_info(list, relids); + if (upperrel) return upperrel; } @@ -1496,7 +1516,7 @@ fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids) upperrel->cheapest_unique_path = NULL; upperrel->cheapest_parameterized_paths = NIL; - root->upper_rels[kind] = lappend(root->upper_rels[kind], upperrel); + add_rel_info(&root->upper_rels[kind], upperrel); return upperrel; } diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 534692bee1..a003433178 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -80,6 +80,25 @@ typedef enum UpperRelationKind /* NB: UPPERREL_FINAL must be last enum entry; it's used to size arrays */ } UpperRelationKind; +/* + * Hashed list to store relation specific info and to retrieve it by relids. + * + * For small problems we just scan the list to do lookups, but when there are + * many relations we build a hash table for faster lookups. The hash table is + * present and valid when 'hash' is not NULL. Note that we still maintain the + * list even when using the hash table for lookups; this simplifies life for + * GEQO. + */ +typedef struct RelInfoList +{ + pg_node_attr(no_copy_equal, no_read) + + NodeTag type; + + List *items; + struct HTAB *hash pg_node_attr(read_write_ignore); +} RelInfoList; + /*---------- * PlannerGlobal * Global information for planning/optimization @@ -267,15 +286,9 @@ struct PlannerInfo /* * join_rel_list is a list of all join-relation RelOptInfos we have - * considered in this planning run. For small problems we just scan the - * list to do lookups, but when there are many join relations we build a - * hash table for faster lookups. The hash table is present and valid - * when join_rel_hash is not NULL. Note that we still maintain the list - * even when using the hash table for lookups; this simplifies life for - * GEQO. + * considered in this planning run. */ - List *join_rel_list; - struct HTAB *join_rel_hash pg_node_attr(read_write_ignore); + RelInfoList *join_rel_list; /* list of join-relation RelOptInfos */ /* * When doing a dynamic-programming-style join search, join_rel_level[k] @@ -408,7 +421,7 @@ struct PlannerInfo * Upper-rel RelOptInfos. Use fetch_upper_rel() to get any particular * upper rel. */ - List *upper_rels[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore); + RelInfoList upper_rels[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore); /* Result tlists chosen by grouping_planner for upper-stage processing */ struct PathTarget *upper_targets[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore); -- 2.31.0