From 03c8334c4504afe60fcd2a7f1230aaa750900ec3 Mon Sep 17 00:00:00 2001 From: Justin Pryzby Date: Tue, 31 Dec 2019 18:49:41 -0600 Subject: [PATCH v9 2/8] explain to show tuplehash bucket and memory stats.. Note that hashed SubPlan and recursiveUnion aren't affected in explain output, probably since hashtables aren't allocated at that point. Discussion: https://www.postgresql.org/message-id/flat/20200103161925.GM12066@telsasoft.com --- src/backend/commands/explain.c | 173 +++++++++++++++++----- src/backend/executor/execGrouping.c | 33 +++++ src/backend/executor/nodeAgg.c | 17 +-- src/backend/executor/nodeRecursiveunion.c | 3 + src/backend/executor/nodeSetOp.c | 1 + src/backend/executor/nodeSubplan.c | 3 + src/include/executor/executor.h | 1 + src/include/nodes/execnodes.h | 11 +- 8 files changed, 194 insertions(+), 48 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 455f54ef83..ecc0469d35 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -18,6 +18,7 @@ #include "commands/createas.h" #include "commands/defrem.h" #include "commands/prepare.h" +#include "executor/nodeAgg.h" #include "executor/nodeHash.h" #include "foreign/fdwapi.h" #include "jit/jit.h" @@ -88,12 +89,14 @@ static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors, ExplainState *es); static void show_agg_keys(AggState *astate, List *ancestors, ExplainState *es); -static void show_grouping_sets(PlanState *planstate, Agg *agg, +static void show_grouping_sets(AggState *aggstate, Agg *agg, List *ancestors, ExplainState *es); -static void show_grouping_set_keys(PlanState *planstate, +static void show_grouping_set_info(AggState *aggstate, Agg *aggnode, Sort *sortnode, List *context, bool useprefix, - List *ancestors, ExplainState *es); + List *ancestors, + HashTableInstrumentation *inst, + ExplainState *es); static void show_group_keys(GroupState *gstate, List *ancestors, ExplainState *es); static void show_sort_group_keys(PlanState *planstate, const char *qlabel, @@ -108,7 +111,8 @@ static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_incremental_sort_info(IncrementalSortState *incrsortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); -static void show_hashagg_info(AggState *hashstate, ExplainState *es); +static void show_tuplehash_info(HashTableInstrumentation *inst, AggState *as, + ExplainState *es); static void show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es); static void show_instrumentation_count(const char *qlabel, int which, @@ -1535,6 +1539,7 @@ ExplainNode(PlanState *planstate, List *ancestors, appendStringInfo(es->str, " %s", setopcmd); else ExplainPropertyText("Command", setopcmd, es); + // show strategy in text mode ? } break; default: @@ -1928,11 +1933,24 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_Agg: show_agg_keys(castNode(AggState, planstate), ancestors, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - show_hashagg_info((AggState *) planstate, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); break; + case T_SetOp: + { + SetOpState *sos = castNode(SetOpState, planstate); + if (sos->hashtable) + show_tuplehash_info(&sos->hashtable->instrument, NULL, es); + } + break; + case T_RecursiveUnion: + { + RecursiveUnionState *rus = (RecursiveUnionState *)planstate; + if (rus->hashtable) + show_tuplehash_info(&rus->hashtable->instrument, NULL, es); + } + break; case T_Group: show_group_keys(castNode(GroupState, planstate), ancestors, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); @@ -2337,24 +2355,31 @@ show_agg_keys(AggState *astate, List *ancestors, ancestors = lcons(plan, ancestors); if (plan->groupingSets) - show_grouping_sets(outerPlanState(astate), plan, ancestors, es); + show_grouping_sets(astate, plan, ancestors, es); else + { show_sort_group_keys(outerPlanState(astate), "Group Key", plan->numCols, 0, plan->grpColIdx, NULL, NULL, NULL, ancestors, es); + Assert(astate->num_hashes <= 1); + if (astate->num_hashes) + show_tuplehash_info(&astate->perhash[0].hashtable->instrument, astate, es); + } ancestors = list_delete_first(ancestors); } } static void -show_grouping_sets(PlanState *planstate, Agg *agg, +show_grouping_sets(AggState *aggstate, Agg *agg, List *ancestors, ExplainState *es) { + PlanState *planstate = outerPlanState(aggstate); List *context; bool useprefix; ListCell *lc; + int setno = 0; /* Set up deparsing context */ context = set_deparse_context_plan(es->deparse_cxt, @@ -2364,27 +2389,41 @@ show_grouping_sets(PlanState *planstate, Agg *agg, ExplainOpenGroup("Grouping Sets", "Grouping Sets", false, es); - show_grouping_set_keys(planstate, agg, NULL, - context, useprefix, ancestors, es); + show_grouping_set_info(aggstate, agg, NULL, context, useprefix, ancestors, + aggstate->num_hashes ? + &aggstate->perhash[setno++].hashtable->instrument : NULL, + es); foreach(lc, agg->chain) { Agg *aggnode = lfirst(lc); Sort *sortnode = (Sort *) aggnode->plan.lefttree; + HashTableInstrumentation *inst = NULL; - show_grouping_set_keys(planstate, aggnode, sortnode, - context, useprefix, ancestors, es); + if (aggnode->aggstrategy == AGG_HASHED || + aggnode->aggstrategy == AGG_MIXED) + { + Assert(setno < aggstate->num_hashes); + inst = &aggstate->perhash[setno++].hashtable->instrument; + } + + show_grouping_set_info(aggstate, aggnode, sortnode, + context, useprefix, ancestors, + inst, es); } ExplainCloseGroup("Grouping Sets", "Grouping Sets", false, es); } +/* Show keys and any hash instrumentation for a grouping set */ static void -show_grouping_set_keys(PlanState *planstate, +show_grouping_set_info(AggState *aggstate, Agg *aggnode, Sort *sortnode, List *context, bool useprefix, - List *ancestors, ExplainState *es) + List *ancestors, HashTableInstrumentation *inst, + ExplainState *es) { + PlanState *planstate = outerPlanState(aggstate); Plan *plan = planstate->plan; char *exprstr; ListCell *lc; @@ -2448,6 +2487,10 @@ show_grouping_set_keys(PlanState *planstate, ExplainCloseGroup(keysetname, keysetname, false, es); + if (aggnode->aggstrategy == AGG_HASHED || + aggnode->aggstrategy == AGG_MIXED) + show_tuplehash_info(inst, NULL, es); + if (sortnode && es->format == EXPLAIN_FORMAT_TEXT) es->indent--; @@ -3059,37 +3102,78 @@ show_hash_info(HashState *hashstate, ExplainState *es) } /* - * Show information on hash aggregate memory usage and batches. + * Show hash bucket stats and (optionally) memory. */ static void -show_hashagg_info(AggState *aggstate, ExplainState *es) +show_tuplehash_info(HashTableInstrumentation *inst, AggState *aggstate, ExplainState *es) { - Agg *agg = (Agg *)aggstate->ss.ps.plan; - int64 memPeakKb = (aggstate->hash_mem_peak + 1023) / 1024; - - Assert(IsA(aggstate, AggState)); - - if (agg->aggstrategy != AGG_HASHED && - agg->aggstrategy != AGG_MIXED) - return; + int64 spacePeakKb_tuples = (inst->space_peak_tuples + 1023) / 1024, + spacePeakKb_hash = (inst->space_peak_hash + 1023) / 1024; - if (es->costs && aggstate->hash_planned_partitions > 0) - { + if (es->costs && aggstate!=NULL && aggstate->hash_planned_partitions > 0) ExplainPropertyInteger("Planned Partitions", NULL, aggstate->hash_planned_partitions, es); - } if (!es->analyze) return; - /* EXPLAIN ANALYZE */ - ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, es); - if (aggstate->hash_batches_used > 0) + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyInteger("Hash Buckets", NULL, + inst->nbuckets, es); + ExplainPropertyInteger("Original Hash Buckets", NULL, + inst->nbuckets_original, es); + ExplainPropertyInteger("Peak Memory Usage (hashtable)", "kB", + spacePeakKb_hash, es); + ExplainPropertyInteger("Peak Memory Usage (tuples)", "kB", + spacePeakKb_tuples, es); + if (aggstate != NULL) + { + Agg *agg = (Agg *)aggstate->ss.ps.plan; + if (agg->aggstrategy == AGG_HASHED || + agg->aggstrategy == AGG_MIXED) + { + ExplainPropertyInteger("Disk Usage", "kB", + aggstate->hash_disk_used, es); + ExplainPropertyInteger("HashAgg Batches", NULL, + aggstate->hash_batches_used, es); + } + } + } + else if (!inst->nbuckets) + ; /* Do nothing */ + else { - ExplainPropertyInteger("Disk Usage", "kB", - aggstate->hash_disk_used, es); - ExplainPropertyInteger("HashAgg Batches", NULL, - aggstate->hash_batches_used, es); + if (inst->nbuckets_original != inst->nbuckets) + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Buckets: %lld (originally %lld)", + (long long)inst->nbuckets, + (long long)inst->nbuckets_original); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Buckets: %lld", + (long long)inst->nbuckets); + } + + appendStringInfoChar(es->str, '\n'); + ExplainIndentText(es); + appendStringInfo(es->str, + "Peak Memory Usage: hashtable: %lldkB, tuples: %lldkB", + (long long)spacePeakKb_hash, (long long)spacePeakKb_tuples); + appendStringInfoChar(es->str, '\n'); + + if (aggstate != NULL && aggstate->hash_batches_used > 0) + { + ExplainPropertyInteger("Disk Usage", "kB", + aggstate->hash_disk_used, es); + ExplainPropertyInteger("HashAgg Batches", NULL, + aggstate->hash_batches_used, es); + } } } @@ -3798,6 +3882,29 @@ ExplainSubPlans(List *plans, List *ancestors, ExplainNode(sps->planstate, ancestors, relationship, sp->plan_name, es); + if (sps->hashtable) + { + ExplainOpenGroup("Hashtable", "Hashtable", true, es); + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfoString(es->str, "Hashtable: "); + } + show_tuplehash_info(&sps->hashtable->instrument, NULL, es); + ExplainCloseGroup("Hashtable", "Hashtable", true, es); + } + + if (sps->hashnulls) + { + ExplainOpenGroup("Null Hashtable", "Null Hashtable", true, es); + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfoString(es->str, "Null Hashtable: "); + } + show_tuplehash_info(&sps->hashnulls->instrument, NULL, es); + ExplainCloseGroup("Null Hashtable", "Null Hashtable", true, es); + } ancestors = list_delete_first(ancestors); } diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 009d27b9a8..10276d3f58 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -188,6 +188,7 @@ BuildTupleHashTableExt(PlanState *parent, hashtable->inputslot = NULL; hashtable->in_hash_funcs = NULL; hashtable->cur_eq_func = NULL; + memset(&hashtable->instrument, 0, sizeof(hashtable->instrument)); /* * If parallelism is in use, even if the master backend is performing the @@ -203,6 +204,7 @@ BuildTupleHashTableExt(PlanState *parent, hashtable->hash_iv = 0; hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable); + UpdateTupleHashTableStats(hashtable, true); /* * We copy the input tuple descriptor just for safety --- we assume all @@ -281,9 +283,40 @@ BuildTupleHashTable(PlanState *parent, void ResetTupleHashTable(TupleHashTable hashtable) { + UpdateTupleHashTableStats(hashtable, false); tuplehash_reset(hashtable->hashtab); } +/* Update instrumentation stats */ +void +UpdateTupleHashTableStats(TupleHashTable hashtable, bool initial) +{ + hashtable->instrument.nbuckets = hashtable->hashtab->size; + if (initial) + { + hashtable->instrument.nbuckets_original = hashtable->hashtab->size; + // hashtable->instrument.space_peak_hash = hashtable->hashtab->size * + // sizeof(TupleHashEntryData); + hashtable->instrument.space_peak_hash = + MemoryContextMemAllocated(hashtable->hashtab->ctx, true); + hashtable->instrument.space_peak_tuples = 0; + } + else + { + /* hashtable->entrysize includes additionalsize */ + size_t hash_size = MemoryContextMemAllocated(hashtable->hashtab->ctx, true); + size_t tuple_size = MemoryContextMemAllocated(hashtable->tablecxt, true); + + hashtable->instrument.space_peak_hash = Max( + hashtable->instrument.space_peak_hash, + hash_size); + + hashtable->instrument.space_peak_tuples = Max( + hashtable->instrument.space_peak_tuples, tuple_size); + // hashtable->hashtab->members * hashtable->entrysize); + } +} + /* * Find or create a hashtable entry for the tuple group containing the * given tuple. The tuple must be the same type as the hashtable entries. diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 48b0274b2e..2d6783843a 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -1867,36 +1867,25 @@ hash_agg_enter_spill_mode(AggState *aggstate) static void hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions) { - Size meta_mem = 0; Size hash_mem = 0; Size buffer_mem; - Size total_mem; if (aggstate->aggstrategy != AGG_MIXED && aggstate->aggstrategy != AGG_HASHED) return; - for (int i = 0; i < aggstate->num_hashes; ++i) { - /* memory for the hash table itself */ - meta_mem += MemoryContextMemAllocated( - aggstate->perhash[i].hash_metacxt, true); - /* memory for the group keys and transition states */ hash_mem += MemoryContextMemAllocated( aggstate->perhash[i].hashcontext->ecxt_per_tuple_memory, true); + UpdateTupleHashTableStats(aggstate->perhash[i].hashtable, false); } - /* memory for read/write tape buffers, if spilled */ + /* memory for read/write tape buffers, if spilled XXX */ buffer_mem = npartitions * HASHAGG_WRITE_BUFFER_SIZE; if (from_tape) buffer_mem += HASHAGG_READ_BUFFER_SIZE; - /* update peak mem */ - total_mem = meta_mem + hash_mem + buffer_mem; - if (total_mem > aggstate->hash_mem_peak) - aggstate->hash_mem_peak = total_mem; - /* update disk usage */ if (aggstate->hash_tapeinfo != NULL) { @@ -3269,7 +3258,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) /* * Create expression contexts. We need three or more, one for * per-input-tuple processing, one for per-output-tuple processing, one - * for all the hashtables, and one for each grouping set. The per-tuple + * for each hashtable, and one for each grouping set. The per-tuple * memory context of the per-grouping-set ExprContexts (aggcontexts) * replaces the standalone memory context formerly used to hold transition * values. We cheat a little by using ExecAssignExprContext() to build diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index 620414a1ed..93272c28b1 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -156,6 +156,9 @@ ExecRecursiveUnion(PlanState *pstate) return slot; } + if (node->hashtable) + UpdateTupleHashTableStats(node->hashtable, false); + return NULL; } diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index bfd148a41a..9c0e0ab96e 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -415,6 +415,7 @@ setop_fill_hash_table(SetOpState *setopstate) setopstate->table_filled = true; /* Initialize to walk the hash table */ + UpdateTupleHashTableStats(setopstate->hashtable, false); ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter); } diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 298b7757f5..22c32612ba 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -621,6 +621,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) ExecClearTuple(node->projRight->pi_state.resultslot); MemoryContextSwitchTo(oldcontext); + UpdateTupleHashTableStats(node->hashtable, false); + if (node->hashnulls) + UpdateTupleHashTableStats(node->hashnulls, false); } /* diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index c7deeac662..f71cc03ad5 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -150,6 +150,7 @@ extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, ExprState *eqcomp, FmgrInfo *hashfunctions); extern void ResetTupleHashTable(TupleHashTable hashtable); +extern void UpdateTupleHashTableStats(TupleHashTable hashtable, bool initial); /* * prototypes from functions in execJunk.c diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3fc5989bf7..cdcd825c1e 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -693,6 +693,14 @@ typedef struct TupleHashEntryData #define SH_DECLARE #include "lib/simplehash.h" +typedef struct HashTableInstrumentation +{ + size_t nbuckets; /* number of buckets at end of execution */ + size_t nbuckets_original; /* planned number of buckets */ + size_t space_peak_hash; /* peak memory usage in bytes */ + size_t space_peak_tuples; /* peak memory usage in bytes */ +} HashTableInstrumentation; + typedef struct TupleHashTableData { tuplehash_hash *hashtab; /* underlying hash table */ @@ -711,6 +719,7 @@ typedef struct TupleHashTableData ExprState *cur_eq_func; /* comparator for input vs. table */ uint32 hash_iv; /* hash-function IV */ ExprContext *exprcontext; /* expression context */ + HashTableInstrumentation instrument; } TupleHashTableData; typedef tuplehash_iterator TupleHashIterator; @@ -2173,9 +2182,9 @@ typedef struct AggState int hash_planned_partitions; /* number of partitions planned for first pass */ double hashentrysize; /* estimate revised during execution */ - Size hash_mem_peak; /* peak hash table memory usage */ uint64 hash_ngroups_current; /* number of groups currently in memory in all hash tables */ +// Move these to instrumentation ? uint64 hash_disk_used; /* kB of disk space used */ int hash_batches_used; /* batches used during entire execution */ -- 2.17.0