From 7081c00c4e8ca0c386d9a594d130492876f18ad5 Mon Sep 17 00:00:00 2001 From: amit Date: Thu, 18 Jul 2019 10:22:31 +0900 Subject: [PATCH v5 1/3] Some cosmetic improvements to partitionwise join code --- src/backend/optimizer/path/joinrels.c | 18 ++++-- src/backend/optimizer/util/plancat.c | 20 +++--- src/backend/optimizer/util/relnode.c | 92 +++++++++++++++++---------- src/include/nodes/pathnodes.h | 36 ++++++++--- 4 files changed, 109 insertions(+), 57 deletions(-) diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index a21c295b99..b896e3e474 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1575,8 +1575,11 @@ build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo, } /* - * Returns true if there exists an equi-join condition for each pair of - * partition keys from given relations being joined. + * have_partkey_equi_join + * + * Returns true if there exist equi-join conditions involving pairs + * of matching partition keys of the relations being joined for all + * partition keys. */ bool have_partkey_equi_join(RelOptInfo *joinrel, @@ -1692,8 +1695,15 @@ have_partkey_equi_join(RelOptInfo *joinrel, } /* - * Find the partition key from the given relation matching the given - * expression. If found, return the index of the partition key, else return -1. + * match_expr_to_partition_keys + * + * Tries to match an expression to one of the nullable or non-nullable + * partition keys and if a match is found, returns the matched key's + * ordinal position or -1 if the expression could not be matched to any + * of the keys. + * + * strict_op must be true if the expression will be compared with the + * partition key using a strict operator. */ static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index d82fc5ab8b..980dc6499b 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -2247,9 +2247,8 @@ find_partition_scheme(PlannerInfo *root, Relation relation) /* * set_baserel_partition_key_exprs * - * Builds partition key expressions for the given base relation and sets them - * in given RelOptInfo. Any single column partition keys are converted to Var - * nodes. All Var nodes are restamped with the relid of given relation. + * Builds partition key expressions for the given base relation and sets + * rel->partexprs. */ static void set_baserel_partition_key_exprs(Relation relation, @@ -2297,17 +2296,20 @@ set_baserel_partition_key_exprs(Relation relation, lc = lnext(partkey->partexprs, lc); } + /* Base relations have a single expression per key. */ partexprs[cnt] = list_make1(partexpr); } + /* + * For base relations, we assume that the partition keys are non-nullable, + * although they are nullable in principle; list and hash partitioned + * tables may contain nulls in the partition key(s), for example. + * Assuming non-nullability is okay for the considerations of partition + * pruning, because pruning is never performed with non-strict operators. + */ rel->partexprs = partexprs; - /* - * A base relation can not have nullable partition key expressions. We - * still allocate array of empty expressions lists to keep partition key - * expression handling code simple. See build_joinrel_partition_info() and - * match_expr_to_partition_keys(). - */ + /* Assigning NIL for each key means there are no nullable keys. */ rel->nullable_partexprs = (List **) palloc0(sizeof(List *) * partnatts); } diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 374f93890b..81ec600ecb 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -29,6 +29,7 @@ #include "optimizer/tlist.h" #include "partitioning/partbounds.h" #include "utils/hsearch.h" +#include "utils/lsyscache.h" typedef struct JoinHashEntry @@ -58,6 +59,9 @@ static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel); static void build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, List *restrictlist, JoinType jointype); +static void set_joinrel_partition_key_exprs(RelOptInfo *joinrel, + RelOptInfo *outer_rel, RelOptInfo *inner_rel, + JoinType jointype); static void build_child_join_reltarget(PlannerInfo *root, RelOptInfo *parentrel, RelOptInfo *childrel, @@ -1607,18 +1611,18 @@ find_param_path_info(RelOptInfo *rel, Relids required_outer) /* * build_joinrel_partition_info - * If the two relations have same partitioning scheme, their join may be - * partitioned and will follow the same partitioning scheme as the joining - * relations. Set the partition scheme and partition key expressions in - * the join relation. + * Checks if the two relations being joined can use partitionwise join + * and if yes, initialize partitioning information of the resulting + * partitioned relation + * + * This will set part_scheme and partition key expressions (partexprs and + * nullable_partexprs) if required. */ static void build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, List *restrictlist, JoinType jointype) { - int partnatts; - int cnt; PartitionScheme part_scheme; /* Nothing to do if partitionwise join technique is disabled. */ @@ -1685,11 +1689,8 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, */ joinrel->part_scheme = part_scheme; joinrel->boundinfo = outer_rel->boundinfo; - partnatts = joinrel->part_scheme->partnatts; - joinrel->partexprs = (List **) palloc0(sizeof(List *) * partnatts); - joinrel->nullable_partexprs = - (List **) palloc0(sizeof(List *) * partnatts); joinrel->nparts = outer_rel->nparts; + set_joinrel_partition_key_exprs(joinrel, outer_rel, inner_rel, jointype); joinrel->part_rels = (RelOptInfo **) palloc0(sizeof(RelOptInfo *) * joinrel->nparts); @@ -1699,32 +1700,31 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, Assert(outer_rel->consider_partitionwise_join); Assert(inner_rel->consider_partitionwise_join); joinrel->consider_partitionwise_join = true; +} + +/* + * set_joinrel_partition_key_exprs + * Initialize partition key expressions + */ +static void +set_joinrel_partition_key_exprs(RelOptInfo *joinrel, + RelOptInfo *outer_rel, RelOptInfo *inner_rel, + JoinType jointype) +{ + int partnatts; + int cnt; + + Assert(joinrel->part_scheme != NULL); + + partnatts = joinrel->part_scheme->partnatts; + joinrel->partexprs = (List **) palloc0(sizeof(List *) * partnatts); + joinrel->nullable_partexprs = + (List **) palloc0(sizeof(List *) * partnatts); /* - * Construct partition keys for the join. - * - * An INNER join between two partitioned relations can be regarded as - * partitioned by either key expression. For example, A INNER JOIN B ON - * A.a = B.b can be regarded as partitioned on A.a or on B.b; they are - * equivalent. - * - * For a SEMI or ANTI join, the result can only be regarded as being - * partitioned in the same manner as the outer side, since the inner - * columns are not retained. - * - * An OUTER join like (A LEFT JOIN B ON A.a = B.b) may produce rows with - * B.b NULL. These rows may not fit the partitioning conditions imposed on - * B.b. Hence, strictly speaking, the join is not partitioned by B.b and - * thus partition keys of an OUTER join should include partition key - * expressions from the OUTER side only. However, because all - * commonly-used comparison operators are strict, the presence of nulls on - * the outer side doesn't cause any problem; they can't match anything at - * future join levels anyway. Therefore, we track two sets of - * expressions: those that authentically partition the relation - * (partexprs) and those that partition the relation with the exception - * that extra nulls may be present (nullable_partexprs). When the - * comparison operator is strict, the latter is just as good as the - * former. + * Join type determines which partition keys are assumed by the resulting + * join relation. Note that these keys are to be considered when checking + * if any further joins involving this joinrel may be partitioned. */ for (cnt = 0; cnt < partnatts; cnt++) { @@ -1738,18 +1738,36 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, switch (jointype) { + /* + * Join relation resulting from an INNER join may be regarded as + * partitioned by either of inner and outer relation keys. For + * example, A INNER JOIN B ON A.a = B.b can be regarded as + * partitioned on either A.a or B.b. + */ case JOIN_INNER: partexpr = list_concat_copy(outer_expr, inner_expr); nullable_partexpr = list_concat_copy(outer_null_expr, inner_null_expr); break; + /* + * Join relation resulting from a SEMI or ANTI join may be + * regarded as partitioned on the outer relation keys, since the + * inner columns are omitted from the output. + */ case JOIN_SEMI: case JOIN_ANTI: partexpr = list_copy(outer_expr); nullable_partexpr = list_copy(outer_null_expr); break; + /* + * Join relation resulting from a LEFT OUTER JOIN likewise may be + * regarded as partitioned on the (non-nullable) outer relation + * keys. The inner (nullable) relation keys are okay as partition + * keys for further joins as long as they involve strict join + * operators. + */ case JOIN_LEFT: partexpr = list_copy(outer_expr); nullable_partexpr = list_concat_copy(inner_expr, @@ -1758,6 +1776,12 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, inner_null_expr); break; + /* + * For FULL OUTER JOINs, both relations are nullable, so the + * resulting join relation may be regarded as partitioned on + * either of inner and outer relation keys, but only for joins + * that involve strict join operators. + */ case JOIN_FULL: nullable_partexpr = list_concat_copy(outer_expr, inner_expr); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 0ceb809644..213bc41420 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -586,16 +586,32 @@ typedef struct PartitionSchemeData *PartitionScheme; * this relation that are partitioned tables * themselves, in hierarchical order * - * Note: A base relation always has only one set of partition keys, but a join - * relation may have as many sets of partition keys as the number of relations - * being joined. partexprs and nullable_partexprs are arrays containing - * part_scheme->partnatts elements each. Each of these elements is a list of - * partition key expressions. For a base relation each list in partexprs - * contains only one expression and nullable_partexprs is not populated. For a - * join relation, partexprs and nullable_partexprs contain partition key - * expressions from non-nullable and nullable relations resp. Lists at any - * given position in those arrays together contain as many elements as the - * number of joining relations. + * Notes on partition key expressions (partexprs and nullable_partexprs): + * + * Partition key expressions will be used to spot references to the partition + * keys of the relation in the expressions of a given query so as to apply + * various partitioning-based optimizations to certain query constructs. For + * example, pruning unnecessary partitions of a table using baserestrictinfo + * clauses that contain partition keys, converting a join between two + * partitioned relations into a series of joins between pairs of their + * constituent partitions if the joined rows follow the same partitioning + * as the relations being joined. + * + * The partexprs and nullable_partexprs arrays each contain + * part_scheme->partnatts elements. Each of the elements is a list of + * partition key expressions. For partitioned *base* relations, there is one + * expression in every list, whereas for partitioned *join* relations, there + * can be as many as the number of component relations. + * + * nullable_partexprs are populated only in partitioned *join* relationss, + * that is, if any of their component relations are nullable due to OUTER JOIN + * considerations. It contains only the expressions of the nullable component + * relations, while those of the non-nullable relations are present in the + * partexprs. For the considerations of partitionwise join, nullable partition + * keys can be considered to partition the underlying relation in the same + * manner as the non-nullable partition keys do, as long as the join operator + * is stable, because those null-valued keys can't be joined further, thus + * preserving the partitioning. *---------- */ typedef enum RelOptKind -- 2.20.1 (Apple Git-117)