Index: src/backend/executor/nodeHash.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHash.c,v retrieving revision 1.116 diff -c -r1.116 nodeHash.c *** src/backend/executor/nodeHash.c 1 Jan 2008 19:45:49 -0000 1.116 --- src/backend/executor/nodeHash.c 17 Oct 2008 23:47:20 -0000 *************** *** 54,59 **** --- 54,86 ---- } /* ---------------------------------------------------------------- + * isAMostCommonValue + * + * is the value one of the most common key values? + * ---------------------------------------------------------------- + */ + bool isAMostCommonValue(HashJoinTable hashtable, uint32 hashvalue, int *partitionNumber) + { + int bucket = hashvalue & (hashtable->nMostCommonTuplePartitionHashBuckets - 1); + + while (hashtable->mostCommonTuplePartition[bucket].hashvalue != 0 + && hashtable->mostCommonTuplePartition[bucket].hashvalue != hashvalue) + { + bucket = (bucket + 1) & (hashtable->nMostCommonTuplePartitionHashBuckets - 1); + } + + if (hashtable->mostCommonTuplePartition[bucket].hashvalue == hashvalue) + { + *partitionNumber = bucket; + return true; + } + + //must have run into an empty slot which means this is not an MCV + *partitionNumber = MCV_INVALID_PARTITION; + return false; + } + + /* ---------------------------------------------------------------- * MultiExecHash * * build hash table for hashjoin, doing partitioning if more *************** *** 69,74 **** --- 96,103 ---- TupleTableSlot *slot; ExprContext *econtext; uint32 hashvalue; + MinimalTuple mintuple; + int partitionNumber; /* must provide our own instrumentation support */ if (node->ps.instrument) *************** *** 99,106 **** if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false, &hashvalue)) { ! ExecHashTableInsert(hashtable, slot, hashvalue); ! hashtable->totalTuples += 1; } } --- 128,163 ---- if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false, &hashvalue)) { ! partitionNumber = MCV_INVALID_PARTITION; ! ! if (hashtable->usingMostCommonValues && isAMostCommonValue(hashtable, hashvalue, &partitionNumber)) ! { ! HashJoinTuple hashTuple; ! int hashTupleSize; ! ! mintuple = ExecFetchSlotMinimalTuple(slot); ! hashTupleSize = HJTUPLE_OVERHEAD + mintuple->t_len; ! hashTuple = (HashJoinTuple) palloc(hashTupleSize); ! hashTuple->hashvalue = hashvalue; ! memcpy(HJTUPLE_MINTUPLE(hashTuple), mintuple, mintuple->t_len); ! ! hashTuple->next = hashtable->mostCommonTuplePartition[partitionNumber].tuples; ! hashtable->mostCommonTuplePartition[partitionNumber].tuples = hashTuple; ! ! hashtable->spaceUsed += hashTupleSize; ! ! if (hashtable->spaceUsed > hashtable->spaceAllowed) { ! ExecHashIncreaseNumBatches(hashtable); ! } ! ! hashtable->mostCommonTuplesStored++; ! } ! ! if (partitionNumber == MCV_INVALID_PARTITION) ! { ! ExecHashTableInsert(hashtable, slot, hashvalue); ! hashtable->totalTuples += 1; ! } } } *************** *** 798,803 **** --- 855,921 ---- } /* + * ExecScanHashMostCommonTuples + * scan a hash bucket for matches to the current outer tuple + * + * The current outer tuple must be stored in econtext->ecxt_outertuple. + */ + HashJoinTuple + ExecScanHashMostCommonTuples(HashJoinState *hjstate, + ExprContext *econtext) + { + List *hjclauses = hjstate->hashclauses; + HashJoinTable hashtable = hjstate->hj_HashTable; + HashJoinTuple hashTuple = hjstate->hj_CurTuple; + uint32 hashvalue = hjstate->hj_CurHashValue; + + /* + * hj_CurTuple is NULL to start scanning a new bucket, or the address of + * the last tuple returned from the current bucket. + */ + if (hashTuple == NULL) + { + //painstakingly make sure this is a valid partition index + Assert(hjstate->hj_OuterTupleMostCommonValuePartition > MCV_INVALID_PARTITION); + Assert(hjstate->hj_OuterTupleMostCommonValuePartition < hashtable->nMostCommonTuplePartitions); + + hashTuple = hashtable->mostCommonTuplePartition[hjstate->hj_OuterTupleMostCommonValuePartition].tuples; + } + else + hashTuple = hashTuple->next; + + while (hashTuple != NULL) + { + if (hashTuple->hashvalue == hashvalue) + { + TupleTableSlot *inntuple; + + /* insert hashtable's tuple into exec slot so ExecQual sees it */ + inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple), + hjstate->hj_HashTupleSlot, + false); /* do not pfree */ + econtext->ecxt_innertuple = inntuple; + + /* reset temp memory each time to avoid leaks from qual expr */ + ResetExprContext(econtext); + + if (ExecQual(hjclauses, econtext, false)) + { + hjstate->hj_CurTuple = hashTuple; + return hashTuple; + } + } + + hashTuple = hashTuple->next; + } + + /* + * no match + */ + return NULL; + } + + /* * ExecScanHashBucket * scan a hash bucket for matches to the current outer tuple * Index: src/backend/executor/nodeHashjoin.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v retrieving revision 1.95 diff -c -r1.95 nodeHashjoin.c *** src/backend/executor/nodeHashjoin.c 15 Aug 2008 19:20:42 -0000 1.95 --- src/backend/executor/nodeHashjoin.c 18 Oct 2008 01:47:57 -0000 *************** *** 20,25 **** --- 20,30 ---- #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" #include "utils/memutils.h" + #include "optimizer/cost.h" + #include "utils/syscache.h" + #include "utils/lsyscache.h" + #include "parser/parsetree.h" + #include "catalog/pg_statistic.h" /* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */ *************** *** 34,39 **** --- 39,146 ---- TupleTableSlot *tupleSlot); static int ExecHashJoinNewBatch(HashJoinState *hjstate); + /* + * getMostCommonValues + * + * + */ + void getMostCommonValues(EState *estate, HashJoinState *hjstate) + { + HeapTupleData *statsTuple; + FuncExprState *clause; + ExprState *argstate; + Var *variable; + + Datum *values; + int nvalues; + float4 *numbers; + int nnumbers; + + Oid relid; + AttrNumber relattnum; + Oid atttype; + int32 atttypmod; + + int i; + + //is it a join on more than one key? + if (hjstate->hashclauses->length != 1) + return; //histojoin is not defined for more than one join key so run away + + //make sure the outer node is a seq scan on a base relation otherwise we cant get MCVs at the moment and should not bother trying + if (outerPlanState(hjstate)->type != T_SeqScanState) + return; + + //grab the relation object id of the outer relation + relid = getrelid(((SeqScan *) ((SeqScanState *) outerPlanState(hjstate))->ps.plan)->scanrelid, estate->es_range_table); + clause = (FuncExprState *) lfirst(list_head(hjstate->hashclauses)); + argstate = (ExprState *) lfirst(list_head(clause->args)); + variable = (Var *) argstate->expr; + + //grab the necessary properties of the join variable + relattnum = variable->varattno; + atttype = variable->vartype; + atttypmod = variable->vartypmod; + + statsTuple = SearchSysCache(STATRELATT, + ObjectIdGetDatum(relid), + Int16GetDatum(relattnum), + 0, 0); + + if (HeapTupleIsValid(statsTuple)) + { + if (get_attstatsslot(statsTuple, + atttype, atttypmod, + STATISTIC_KIND_MCV, InvalidOid, + &values, &nvalues, + &numbers, &nnumbers)) + { + HashJoinTable hashtable; + FmgrInfo *hashfunctions; + //MCV Partitions is an open addressing hashtable with a power of 2 size greater than the number of MCV values + int nbuckets = 2; + uint32 collisionsWhileHashing = 0; + while (nbuckets <= nvalues) + { + nbuckets <<= 1; + } + //use two more bit just to help avoid collisions + nbuckets <<= 2; + + hashtable = hjstate->hj_HashTable; + hashtable->usingMostCommonValues = true; + hashtable->nMostCommonTuplePartitionHashBuckets = nbuckets; + hashtable->mostCommonTuplePartition = palloc0(nbuckets * sizeof(HashJoinMostCommonValueTuplePartition)); + hashfunctions = hashtable->outer_hashfunctions; + + //create the partitions + for (i = 0; i < nvalues; i++) + { + uint32 hashvalue = DatumGetUInt32(FunctionCall1(&hashfunctions[0], values[i])); + int bucket = hashvalue & (nbuckets - 1); + + while (hashtable->mostCommonTuplePartition[bucket].hashvalue != 0 + && hashtable->mostCommonTuplePartition[bucket].hashvalue != hashvalue) + { + bucket = (bucket + 1) & (nbuckets - 1); + collisionsWhileHashing++; + } + + //leave partition alone if it has the same hashvalue as current MCV. we only want one partition per hashvalue + if (hashtable->mostCommonTuplePartition[bucket].hashvalue != hashvalue) + { + hashtable->mostCommonTuplePartition[bucket].tuples = NULL; + hashtable->mostCommonTuplePartition[bucket].hashvalue = hashvalue; + hashtable->nMostCommonTuplePartitions++; + } + } + + free_attstatsslot(atttype, values, nvalues, numbers, nnumbers); + } + + ReleaseSysCache(statsTuple); + } + } /* ---------------------------------------------------------------- * ExecHashJoin *************** *** 146,151 **** --- 253,267 ---- hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan, node->hj_HashOperators); node->hj_HashTable = hashtable; + + hashtable->usingMostCommonValues = false; + hashtable->nMostCommonTuplePartitions = 0; + hashtable->nMostCommonTuplePartitionHashBuckets = 0; + hashtable->mostCommonTuplesStored = 0; + hashtable->mostCommonTuplePartition = NULL; + + if (enable_hashjoin_usestatmcvs) + getMostCommonValues(estate, node); /* * execute the Hash node, to build the hash table *************** *** 157,163 **** * If the inner relation is completely empty, and we're not doing an * outer join, we can quit without scanning the outer relation. */ ! if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node)) return NULL; /* --- 273,279 ---- * If the inner relation is completely empty, and we're not doing an * outer join, we can quit without scanning the outer relation. */ ! if (hashtable->totalTuples == 0 && hashtable->mostCommonTuplesStored == 0 && !HASHJOIN_IS_OUTER(node)) return NULL; /* *************** *** 206,228 **** ExecHashGetBucketAndBatch(hashtable, hashvalue, &node->hj_CurBucketNo, &batchno); node->hj_CurTuple = NULL; ! ! /* ! * Now we've got an outer tuple and the corresponding hash bucket, ! * but this tuple may not belong to the current batch. ! */ ! if (batchno != hashtable->curbatch) { /* ! * Need to postpone this outer tuple to a later batch. Save it ! * in the corresponding outer-batch file. */ ! Assert(batchno > hashtable->curbatch); ! ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot), ! hashvalue, ! &hashtable->outerBatchFile[batchno]); ! node->hj_NeedNewOuter = true; ! continue; /* loop around for a new outer tuple */ } } --- 322,350 ---- ExecHashGetBucketAndBatch(hashtable, hashvalue, &node->hj_CurBucketNo, &batchno); node->hj_CurTuple = NULL; ! ! node->hj_OuterTupleMostCommonValuePartition = MCV_INVALID_PARTITION; ! ! ! if (!(hashtable->usingMostCommonValues && isAMostCommonValue(hashtable, hashvalue, &node->hj_OuterTupleMostCommonValuePartition))) { /* ! * Now we've got an outer tuple and the corresponding hash bucket, ! * but this tuple may not belong to the current batch. */ ! if (batchno != hashtable->curbatch) ! { ! /* ! * Need to postpone this outer tuple to a later batch. Save it ! * in the corresponding outer-batch file. ! */ ! Assert(batchno > hashtable->curbatch); ! ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot), ! hashvalue, ! &hashtable->outerBatchFile[batchno]); ! node->hj_NeedNewOuter = true; ! continue; /* loop around for a new outer tuple */ ! } } } *************** *** 231,237 **** */ for (;;) { ! curtuple = ExecScanHashBucket(node, econtext); if (curtuple == NULL) break; /* out of matches */ --- 353,366 ---- */ for (;;) { ! if (node->hj_OuterTupleMostCommonValuePartition != MCV_INVALID_PARTITION) ! { ! curtuple = ExecScanHashMostCommonTuples(node, econtext); ! } ! else ! { ! curtuple = ExecScanHashBucket(node, econtext); ! } if (curtuple == NULL) break; /* out of matches */ Index: src/backend/optimizer/path/costsize.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v retrieving revision 1.199 diff -c -r1.199 costsize.c *** src/backend/optimizer/path/costsize.c 17 Oct 2008 20:27:24 -0000 1.199 --- src/backend/optimizer/path/costsize.c 17 Oct 2008 23:07:05 -0000 *************** *** 108,113 **** --- 108,115 ---- bool enable_mergejoin = true; bool enable_hashjoin = true; + bool enable_hashjoin_usestatmcvs = true; + typedef struct { PlannerInfo *root; Index: src/backend/utils/misc/guc.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/misc/guc.c,v retrieving revision 1.475 diff -c -r1.475 guc.c *** src/backend/utils/misc/guc.c 6 Oct 2008 13:05:36 -0000 1.475 --- src/backend/utils/misc/guc.c 9 Oct 2008 19:56:17 -0000 *************** *** 625,630 **** --- 625,638 ---- true, NULL, NULL }, { + {"enable_hashjoin_usestatmcvs", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the hash join's use of the MCVs stored in pg_statistic."), + NULL + }, + &enable_hashjoin_usestatmcvs, + true, NULL, NULL + }, + { {"constraint_exclusion", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Enables the planner to use constraints to optimize queries."), gettext_noop("Child table scans will be skipped if their " Index: src/include/executor/hashjoin.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/executor/hashjoin.h,v retrieving revision 1.48 diff -c -r1.48 hashjoin.h *** src/include/executor/hashjoin.h 1 Jan 2008 19:45:57 -0000 1.48 --- src/include/executor/hashjoin.h 17 Oct 2008 23:48:46 -0000 *************** *** 72,77 **** --- 72,84 ---- #define HJTUPLE_MINTUPLE(hjtup) \ ((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD)) + typedef struct HashJoinMostCommonValueTuplePartition + { + uint32 hashvalue; + HashJoinTuple tuples; + } HashJoinMostCommonValueTuplePartition; + + #define MCV_INVALID_PARTITION -1 typedef struct HashJoinTableData { *************** *** 116,121 **** --- 123,134 ---- MemoryContext hashCxt; /* context for whole-hash-join storage */ MemoryContext batchCxt; /* context for this-batch-only storage */ + + bool usingMostCommonValues; + HashJoinMostCommonValueTuplePartition *mostCommonTuplePartition; + int nMostCommonTuplePartitionHashBuckets; + int nMostCommonTuplePartitions; + uint32 mostCommonTuplesStored; } HashJoinTableData; #endif /* HASHJOIN_H */ Index: src/include/executor/nodeHash.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/executor/nodeHash.h,v retrieving revision 1.45 diff -c -r1.45 nodeHash.h *** src/include/executor/nodeHash.h 1 Jan 2008 19:45:57 -0000 1.45 --- src/include/executor/nodeHash.h 30 Sep 2008 20:31:35 -0000 *************** *** 45,48 **** --- 45,51 ---- int *numbuckets, int *numbatches); + extern HashJoinTuple ExecScanHashMostCommonTuples(HashJoinState *hjstate, ExprContext *econtext); + extern bool isAMostCommonValue(HashJoinTable hashtable, uint32 hashvalue, int *partitionNumber); + #endif /* NODEHASH_H */ Index: src/include/executor/nodeHashjoin.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/executor/nodeHashjoin.h,v retrieving revision 1.37 diff -c -r1.37 nodeHashjoin.h *** src/include/executor/nodeHashjoin.h 1 Jan 2008 19:45:57 -0000 1.37 --- src/include/executor/nodeHashjoin.h 30 Sep 2008 20:32:05 -0000 *************** *** 26,29 **** --- 26,31 ---- extern void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr); + extern void getMostCommonValues(EState *estate, HashJoinState *hjstate); + #endif /* NODEHASHJOIN_H */ Index: src/include/nodes/execnodes.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/nodes/execnodes.h,v retrieving revision 1.190 diff -c -r1.190 execnodes.h *** src/include/nodes/execnodes.h 7 Oct 2008 19:27:04 -0000 1.190 --- src/include/nodes/execnodes.h 17 Oct 2008 23:07:14 -0000 *************** *** 1365,1370 **** --- 1365,1371 ---- bool hj_NeedNewOuter; bool hj_MatchedOuter; bool hj_OuterNotEmpty; + int hj_OuterTupleMostCommonValuePartition; } HashJoinState; Index: src/include/optimizer/cost.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/optimizer/cost.h,v retrieving revision 1.93 diff -c -r1.93 cost.h *** src/include/optimizer/cost.h 4 Oct 2008 21:56:55 -0000 1.93 --- src/include/optimizer/cost.h 7 Oct 2008 18:31:42 -0000 *************** *** 52,57 **** --- 52,58 ---- extern bool enable_nestloop; extern bool enable_mergejoin; extern bool enable_hashjoin; + extern bool enable_hashjoin_usestatmcvs; extern bool constraint_exclusion; extern double clamp_row_est(double nrows);