diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e1b49d5..8dbfeba 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1907,6 +1907,16 @@ _outIndexOptInfo(StringInfo str, const IndexOptInfo *node) } static void +_outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node) +{ + WRITE_NODE_TYPE("FOREIGNKEYOPTINFO"); + + WRITE_OID_FIELD(conrelid); + WRITE_OID_FIELD(confrelid); + WRITE_INT_FIELD(nkeys); +} + +static void _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) { /* @@ -3297,6 +3307,9 @@ _outNode(StringInfo str, const void *obj) case T_IndexOptInfo: _outIndexOptInfo(str, obj); break; + case T_ForeignKeyOptInfo: + _outForeignKeyOptInfo(str, obj); + break; case T_EquivalenceClass: _outEquivalenceClass(str, obj); break; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d107d76..d9083b2 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -3732,6 +3732,253 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, } /* + * find_best_match_foreign_key + * Returns a Bitmapset with bits set for each 0 based index of 'joinquals' + * which to a foreign key which is defined on fkrel. + * + * Note: in the event that there's more than 1 foreign key between these two + * relations, the largest match, i.e. the bitmask which was generated from the + * foreign key with the highest number of keys is returned. + */ +static Bitmapset * +find_best_match_foreign_key(PlannerInfo *root, RelOptInfo *fkrel, + RelOptInfo *foreignrel, List *joinquals) +{ + Bitmapset *qualbestmatch; + Bitmapset *usefulquals; + ListCell *lc; + int quallstidx; + int bestmatchnkeys; + Oid frelid; + + /* fast path out when there's no foreign keys on fkrel */ + if (fkrel->fkeylist == NIL) + return NULL; + + qualbestmatch = NULL; + quallstidx = -1; + usefulquals = NULL; + bestmatchnkeys = 0; + + /* + * First build a bitmapset with all possibly useful quals. This will save + * from having to do this for each foreign key later. The only quals that + * are useful to us are in the form "var op var", so here we'll ignore + * things like "function(var1, var2)". + */ + foreach(lc, joinquals) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + OpExpr *clause; + Var *leftvar, *rightvar; + + /* + * Increment this at the start of the loop to save doing it before each + * continue statement. + */ + quallstidx++; + + if (!IsA(rinfo, RestrictInfo)) + continue; + + if (!IsA(rinfo->clause, OpExpr)) + continue; + + clause = (OpExpr *) rinfo->clause; + + if (list_length(clause->args) != 2) + continue; + + leftvar = (Var *) get_leftop((Expr *) clause); + rightvar = (Var *) get_rightop((Expr *) clause); + + /* Foreign keys only support Vars, so ignore anything more complex */ + if (!IsA(leftvar, Var) || !IsA(rightvar, Var)) + continue; + + usefulquals = bms_add_member(usefulquals, quallstidx); + } + + frelid = root->simple_rte_array[foreignrel->relid]->relid; + + /* now check the matches for each foreign key defined on the fkrel */ + foreach(lc, fkrel->fkeylist) + { + ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); + Bitmapset *qualmatches; + Bitmapset *fkmatches; + int i; + int nkeys; + + /* skip any foreign keys which don't reference frelid */ + if (fkinfo->confrelid != frelid) + continue; + + qualmatches = NULL; + fkmatches = NULL; + nkeys = fkinfo->nkeys; + + /* + * Loop over each column of the foreign key and build a bitmap index + * of each joinqual which matches. Note that we don't stop when we find + * the first match, as the expression could be duplicated in the + * joinquals, and we want to match as many as possible. + */ + for (i = 0; i < nkeys; i++) + { + ListCell *lc2; + + quallstidx = -1; + foreach(lc2, joinquals) + { + RestrictInfo *rinfo; + OpExpr *clause; + Var *leftvar; + Var *rightvar; + + quallstidx++; + + /* skip anything we didn't mark as useful above. */ + if (!bms_is_member(quallstidx, usefulquals)) + continue; + + /* + * Here we can safely assume that we have an OpExpr, in the + * from of "var op var" + */ + rinfo = (RestrictInfo *) lfirst(lc2); + clause = (OpExpr *) rinfo->clause; + + /* skip if the operator does not match */ + if (clause->opno != fkinfo->conpfeqop[i]) + continue; + + leftvar = (Var *) get_leftop((Expr *) clause); + rightvar = (Var *) get_rightop((Expr *) clause); + + /* + * Check if the OpExpr matches the foreign key. Remember that + * this could be written with the Vars in either order, so we + * test both permutations of the expression. + */ + if ((fkinfo->confkeys[i] == leftvar->varattno && + fkinfo->conkeys[i] == rightvar->varattno) || + (fkinfo->confkeys[i] == rightvar->varattno && + fkinfo->conkeys[i] == leftvar->varattno)) + { + qualmatches = bms_add_member(qualmatches, quallstidx); + fkmatches = bms_add_member(fkmatches, i); + } + } + } + + /* did we get a complete match to the foreign key? */ + if (bms_num_members(fkmatches) == nkeys) + { + /* does this match match more keys than any previous match? */ + if (nkeys > bestmatchnkeys) + { + bms_free(qualbestmatch); + qualbestmatch = qualmatches; + bestmatchnkeys = nkeys; + } + } + + bms_free(fkmatches); + } + + return qualbestmatch; +} + +/* + * clauselist_join_selectivity + * Estimate selectivity of join clauses either by using foreign key info + * or by using the regular clauselist_selectivity(). + * + * Since selectivity estimates for each joinqual are multiplied together, this + * can cause significant underestimates on the number of join tuples in cases + * where there's more than 1 clause in the join condition. To help ease the + * pain here we make use of foreign keys, and we assume that 1 row will match + * when *all* of the foreign key columns are present in the join condition, any + * additional clauses are estimated using clauselist_selectivity(). + */ +static Selectivity +clauselist_join_selectivity(PlannerInfo *root, List *joinquals, + JoinType jointype, SpecialJoinInfo *sjinfo) +{ + int outerrelid; + int innerrelid; + + if (bms_get_singleton_member(sjinfo->min_righthand, &innerrelid) && + bms_get_singleton_member(sjinfo->min_lefthand, &outerrelid)) + { + Bitmapset *outer2inner, *inner2outer; + RelOptInfo *innerrel = find_base_rel(root, innerrelid); + RelOptInfo *outerrel = find_base_rel(root, outerrelid); + + /* check which quals are matched by a foreign key referencing the innerrel */ + outer2inner = find_best_match_foreign_key(root, outerrel, innerrel, joinquals); + + /* do the same again, but with relations swapped */ + inner2outer = find_best_match_foreign_key(root, innerrel, outerrel, joinquals); + + /* + * did we find any matches at all? If so we need to see which one is + * the best/longest match + */ + if (outer2inner || inner2outer) + { + Selectivity sel; + ListCell *lc; + int lstidx; + List *nonfkeyclauses; + + /* either could be NULL, but bms_num_members will handle that */ + if (bms_num_members(outer2inner) < bms_num_members(inner2outer)) + { + /* + * We can now disgard the lesser of the two and save the best + * match into one of the bitmaps. Let's just use outer2inner + */ + bms_free(outer2inner); + outer2inner = inner2outer; + + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + sel = 1.0; + else + sel = 1.0 / Max(outerrel->tuples, 1.0); + } + else + { + bms_free(inner2outer); + + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + sel = 1.0; + else + sel = 1.0 / Max(innerrel->tuples, 1.0); + } + + /* + * build a list of all non-fkey joinquals, we'll need to enlist + * clauselist_selectivity() to estimate these + */ + lstidx = 0; + nonfkeyclauses = NIL; + foreach (lc, joinquals) + { + if (!bms_is_member(lstidx, outer2inner)) + nonfkeyclauses = lappend(nonfkeyclauses, lfirst(lc)); + lstidx++; + } + return sel * clauselist_selectivity(root, nonfkeyclauses, 0, jointype, sjinfo); + } + } + + /* perform normal estimation without the help of foreign keys */ + return clauselist_selectivity(root, joinquals, 0, jointype, sjinfo); +} + +/* * calc_joinrel_size_estimate * Workhorse for set_joinrel_size_estimates and * get_parameterized_joinrel_size. @@ -3777,11 +4024,11 @@ calc_joinrel_size_estimate(PlannerInfo *root, } /* Get the separate selectivities */ - jselec = clauselist_selectivity(root, - joinquals, - 0, - jointype, - sjinfo); + jselec = clauselist_join_selectivity(root, + joinquals, + jointype, + sjinfo); + pselec = clauselist_selectivity(root, pushedquals, 0, @@ -3794,11 +4041,10 @@ calc_joinrel_size_estimate(PlannerInfo *root, } else { - jselec = clauselist_selectivity(root, - restrictlist, - 0, - jointype, - sjinfo); + jselec = clauselist_join_selectivity(root, + restrictlist, + jointype, + sjinfo); pselec = 0.0; /* not used, keep compiler quiet */ } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 9442e5f..dbe6038 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -27,6 +27,7 @@ #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/heap.h" +#include "catalog/pg_constraint.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -40,6 +41,7 @@ #include "rewrite/rewriteManip.h" #include "storage/bufmgr.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -93,6 +95,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Relation relation; bool hasindex; List *indexinfos = NIL; + List *fkinfos = NIL; + List *fkoidlist; + ListCell *l; /* * We need not lock the relation since it was already locked, either by @@ -140,7 +145,6 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, if (hasindex) { List *indexoidlist; - ListCell *l; LOCKMODE lmode; indexoidlist = RelationGetIndexList(relation); @@ -380,6 +384,77 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, } rel->indexlist = indexinfos; + + /* load foreign keys */ + fkoidlist = RelationGetFKeyList(relation); + + foreach(l, fkoidlist) + { + int i; + ArrayType *arr; + Datum adatum; + bool isnull; + int numkeys; + Oid fkoid = lfirst_oid(l); + + HeapTuple htup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(fkoid)); + Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup); + + ForeignKeyOptInfo *info; + + Assert(constraint->contype == CONSTRAINT_FOREIGN); + + info = makeNode(ForeignKeyOptInfo); + + info->conrelid = constraint->conrelid; + info->confrelid = constraint->confrelid; + + /* conkey */ + adatum = SysCacheGetAttr(CONSTROID, htup, + Anum_pg_constraint_conkey, &isnull); + Assert(!isnull); + + arr = DatumGetArrayTypeP(adatum); + numkeys = ARR_DIMS(arr)[0]; + info->conkeys = (int*)palloc0(numkeys * sizeof(int)); + + for (i = 0; i < numkeys; i++) + info->conkeys[i] = ((int16 *) ARR_DATA_PTR(arr))[i]; + + /* confkey */ + adatum = SysCacheGetAttr(CONSTROID, htup, + Anum_pg_constraint_confkey, &isnull); + Assert(!isnull); + + arr = DatumGetArrayTypeP(adatum); + numkeys = ARR_DIMS(arr)[0]; + info->confkeys = (int*)palloc0(numkeys * sizeof(int)); + + for (i = 0; i < numkeys; i++) + info->confkeys[i] = ((int16 *) ARR_DATA_PTR(arr))[i]; + + /* conpfeqop */ + adatum = SysCacheGetAttr(CONSTROID, htup, + Anum_pg_constraint_conpfeqop, &isnull); + Assert(!isnull); + + arr = DatumGetArrayTypeP(adatum); + numkeys = ARR_DIMS(arr)[0]; + info->conpfeqop = (Oid*)palloc0(numkeys * sizeof(Oid)); + + for (i = 0; i < numkeys; i++) + info->conpfeqop[i] = ((Oid *) ARR_DATA_PTR(arr))[i]; + + info->nkeys = numkeys; + + ReleaseSysCache(htup); + + fkinfos = lcons(info, fkinfos); + } + + list_free(fkoidlist); + + rel->fkeylist = fkinfos; /* Grab foreign-table info using the relcache, while we have it */ if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 9c3d096..3ae9022 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3923,6 +3923,73 @@ RelationGetIndexList(Relation relation) } /* + * RelationGetFKeyList -- get a list of foreign key oids + * + * TODO blah blah blah + */ +List * +RelationGetFKeyList(Relation relation) +{ + Relation conrel; + SysScanDesc conscan; + ScanKeyData skey; + HeapTuple htup; + List *result; + List *oldlist; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_fkeyvalid) + return list_copy(relation->rd_fkeylist); + + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* Prepare to scan pg_constraint for entries having conrelid = this rel. */ + ScanKeyInit(&skey, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + conrel = heap_open(ConstraintRelationId, AccessShareLock); + conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(conscan))) + { + Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup); + + /* return only foreign keys */ + if (constraint->contype != CONSTRAINT_FOREIGN) + continue; + + /* Add index's OID to result list in the proper order */ + result = insert_ordered_oid(result, HeapTupleGetOid(htup)); + } + + systable_endscan(conscan); + + heap_close(conrel, AccessShareLock); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_fkeylist; + relation->rd_fkeylist = list_copy(result); + relation->rd_fkeyvalid = true; + MemoryContextSwitchTo(oldcxt); + + /* Don't leak the old list, if there is one */ + list_free(oldlist); + + return result; +} + +/* * insert_ordered_oid * Insert a new Oid into a sorted list of Oids, preserving ordering * @@ -4891,6 +4958,8 @@ load_relcache_init_file(bool shared) rel->rd_indexattr = NULL; rel->rd_keyattr = NULL; rel->rd_idattr = NULL; + rel->rd_fkeylist = NIL; + rel->rd_fkeyvalid = false; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 274480e..5c40b83 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -221,6 +221,7 @@ typedef enum NodeTag T_PlannerGlobal, T_RelOptInfo, T_IndexOptInfo, + T_ForeignKeyOptInfo, T_ParamPathInfo, T_Path, T_IndexPath, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 79bed33..8ce35e7 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -472,6 +472,7 @@ typedef struct RelOptInfo Relids lateral_relids; /* minimum parameterization of rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ + List *fkeylist; /* list of ForeignKeyOptInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; @@ -566,6 +567,20 @@ typedef struct IndexOptInfo bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ } IndexOptInfo; +/* TODO add info*/ +typedef struct ForeignKeyOptInfo +{ + NodeTag type; + + Oid conrelid; + Oid confrelid; + + int nkeys; + int *conkeys; + int *confkeys; + Oid *conpfeqop; + +} ForeignKeyOptInfo; /* * EquivalenceClasses diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 87123a5..da1fd58 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -73,6 +73,8 @@ extern Expr *adjust_rowcompare_for_index(RowCompareExpr *clause, int indexcol, List **indexcolnos, bool *var_on_left_p); +extern bool has_matching_fkey(RelOptInfo *rel, RelOptInfo *frel, List *clauses, + bool reverse); /* * tidpath.h diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 8a55a09..c56cf8a 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -79,6 +79,7 @@ typedef struct RelationData bool rd_isvalid; /* relcache entry is valid */ char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 = * valid, 2 = temporarily forced */ + bool rd_fkeyvalid; /* state of rd_fkeylist: 0 = not valid, 1 = valid */ /* * rd_createSubid is the ID of the highest subtransaction the rel has @@ -112,6 +113,9 @@ typedef struct RelationData Oid rd_oidindex; /* OID of unique index on OID, if any */ Oid rd_replidindex; /* OID of replica identity index, if any */ + /* data managed by RelationGetFKList: */ + List *rd_fkeylist; /* OIDs of foreign keys */ + /* data managed by RelationGetIndexAttrBitmap: */ Bitmapset *rd_indexattr; /* identifies columns used in indexes */ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 6953281..8878478 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -38,6 +38,7 @@ extern void RelationClose(Relation relation); * Routines to compute/retrieve additional cached information */ extern List *RelationGetIndexList(Relation relation); +extern List *RelationGetFKeyList(Relation relation); extern Oid RelationGetOidIndex(Relation relation); extern Oid RelationGetReplicaIndex(Relation relation); extern List *RelationGetIndexExpressions(Relation relation);