From cadcca528fb25986fd995b71e859cd69d6f483e4 Mon Sep 17 00:00:00 2001 From: David Geier Date: Tue, 8 Nov 2022 19:40:31 +0100 Subject: [PATCH v4] Parallel Bitmap Heap Scan reports per-worker stats Similarly to other nodes (e.g. hash join, sort, memoize), Bitmap Heap Scan now reports per-worker stats in the EXPLAIN ANALYZE output. Previously only the heap blocks stats for the leader were reported which was incomplete in parallel scans. Discussion: https://www.postgresql.org/message-id/flat/b3d80961-c2e5-38cc-6a32-61886cdf766d%40gmail.com --- src/backend/commands/explain.c | 45 +++++++++-- src/backend/executor/execParallel.c | 3 + src/backend/executor/nodeBitmapHeapscan.c | 93 ++++++++++++++++++++--- src/include/executor/nodeBitmapHeapscan.h | 1 + src/include/nodes/execnodes.h | 33 +++++++- 5 files changed, 157 insertions(+), 18 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 926d70afaf..02251994c6 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3467,26 +3467,57 @@ show_hashagg_info(AggState *aggstate, ExplainState *es) static void show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es) { + Assert(es->analyze); + if (es->format != EXPLAIN_FORMAT_TEXT) { ExplainPropertyInteger("Exact Heap Blocks", NULL, - planstate->exact_pages, es); + planstate->stats.exact_pages, es); ExplainPropertyInteger("Lossy Heap Blocks", NULL, - planstate->lossy_pages, es); + planstate->stats.lossy_pages, es); } else { - if (planstate->exact_pages > 0 || planstate->lossy_pages > 0) + if (planstate->stats.exact_pages > 0 || planstate->stats.lossy_pages > 0) { ExplainIndentText(es); appendStringInfoString(es->str, "Heap Blocks:"); - if (planstate->exact_pages > 0) - appendStringInfo(es->str, " exact=%ld", planstate->exact_pages); - if (planstate->lossy_pages > 0) - appendStringInfo(es->str, " lossy=%ld", planstate->lossy_pages); + if (planstate->stats.exact_pages > 0) + appendStringInfo(es->str, " exact=%ld", planstate->stats.exact_pages); + if (planstate->stats.lossy_pages > 0) + appendStringInfo(es->str, " lossy=%ld", planstate->stats.lossy_pages); appendStringInfoChar(es->str, '\n'); } } + + if (planstate->pstate != NULL) + { + for (int n = 0; n < planstate->sinstrument->num_workers; n++) + { + BitmapHeapScanInstrumentation *si = &planstate->sinstrument->sinstrument[n]; + + if (si->exact_pages == 0 && si->lossy_pages == 0) + continue; + + if (es->workers_state) + ExplainOpenWorker(n, es); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Heap Blocks: exact=%ld lossy=%ld\n", + si->exact_pages, si->lossy_pages); + } + else + { + ExplainPropertyInteger("Exact Heap Blocks", NULL, si->exact_pages, es); + ExplainPropertyInteger("Lossy Heap Blocks", NULL, si->lossy_pages, es); + } + + if (es->workers_state) + ExplainCloseWorker(n, es); + } + } } /* diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 8c53d1834e..bfb3419efb 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -1076,6 +1076,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, case T_MemoizeState: ExecMemoizeRetrieveInstrumentation((MemoizeState *) planstate); break; + case T_BitmapHeapScanState: + ExecBitmapHeapRetrieveInstrumentation((BitmapHeapScanState *) planstate); + break; default: break; } diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index cee7f45aab..0d7dd1f165 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -205,9 +205,9 @@ BitmapHeapNext(BitmapHeapScanState *node) BitmapAdjustPrefetchIterator(node, tbmres); if (tbmres->ntuples >= 0) - node->exact_pages++; + node->stats.exact_pages++; else - node->lossy_pages++; + node->stats.lossy_pages++; /* * We can skip fetching the heap page if we don't need any fields @@ -647,6 +647,17 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) { TableScanDesc scanDesc; + /* + * When ending a parallel worker, copy the statistics gathered by the + * worker back into shared memory so that it can be picked up by the main + * process to report in EXPLAIN ANALYZE. + */ + if (node->sinstrument != NULL && IsParallelWorker()) + { + Assert(ParallelWorkerNumber <= node->sinstrument->num_workers); + node->sinstrument->sinstrument[ParallelWorkerNumber] = node->stats; + } + /* * extract information from the node */ @@ -716,8 +727,8 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->return_empty_tuples = 0; scanstate->vmbuffer = InvalidBuffer; scanstate->pvmbuffer = InvalidBuffer; - scanstate->exact_pages = 0; - scanstate->lossy_pages = 0; + scanstate->stats.exact_pages = 0; + scanstate->stats.lossy_pages = 0; scanstate->prefetch_iterator = NULL; scanstate->prefetch_pages = 0; scanstate->prefetch_target = 0; @@ -840,7 +851,22 @@ void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt) { - shm_toc_estimate_chunk(&pcxt->estimator, sizeof(ParallelBitmapHeapState)); + Size size; + + /* + * We store ParallelBitmapHeapState followed by + * SharedBitmapHeapInstrumentationInfo in the same shmem chunk. + */ + size = MAXALIGN(sizeof(ParallelBitmapHeapState)); + + /* don't need this if not instrumenting or no workers */ + if (node->ss.ps.instrument && pcxt->nworkers > 0) + { + size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument)); + size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation))); + } + + shm_toc_estimate_chunk(&pcxt->estimator, size); shm_toc_estimate_keys(&pcxt->estimator, 1); } @@ -855,13 +881,27 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt) { ParallelBitmapHeapState *pstate; + SharedBitmapHeapInstrumentation *sinstrument = NULL; dsa_area *dsa = node->ss.ps.state->es_query_dsa; + char *ptr; + Size size; /* If there's no DSA, there are no workers; initialize nothing. */ if (dsa == NULL) return; - pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelBitmapHeapState)); + size = MAXALIGN(sizeof(ParallelBitmapHeapState)); + if (node->ss.ps.instrument && pcxt->nworkers > 0) + { + size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument)); + size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation))); + } + + ptr = shm_toc_allocate(pcxt->toc, size); + pstate = (ParallelBitmapHeapState *) ptr; + ptr += MAXALIGN(sizeof(ParallelBitmapHeapState)); + if (node->ss.ps.instrument && pcxt->nworkers > 0) + sinstrument = (SharedBitmapHeapInstrumentation *) ptr; pstate->tbmiterator = 0; pstate->prefetch_iterator = 0; @@ -874,8 +914,16 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ConditionVariableInit(&pstate->cv); + /* ensure any unfilled slots will contain zeroes */ + if (sinstrument) + { + sinstrument->num_workers = pcxt->nworkers; + memset(sinstrument->sinstrument, 0, pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation)); + } + shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate); node->pstate = pstate; + node->sinstrument = sinstrument; } /* ---------------------------------------------------------------- @@ -917,10 +965,37 @@ void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt) { - ParallelBitmapHeapState *pstate; + char *ptr; Assert(node->ss.ps.state->es_query_dsa != NULL); - pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); - node->pstate = pstate; + ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); + + node->pstate = (ParallelBitmapHeapState *) ptr; + ptr += MAXALIGN(sizeof(ParallelBitmapHeapState)); + + if (node->ss.ps.instrument) + node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr; +} + +/* ---------------------------------------------------------------- + * ExecBitmapHeapRetrieveInstrumentation + * + * Transfer bitmap heap scan statistics from DSM to private memory. + * ---------------------------------------------------------------- + */ +void +ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node) +{ + SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument; + Size size; + + if (sinstrument == NULL) + return; + + size = offsetof(SharedBitmapHeapInstrumentation, sinstrument) + + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation); + + node->sinstrument = palloc(size); + memcpy(node->sinstrument, sinstrument, size); } diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h index ea003a9caa..446a664590 100644 --- a/src/include/executor/nodeBitmapHeapscan.h +++ b/src/include/executor/nodeBitmapHeapscan.h @@ -28,5 +28,6 @@ extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt); extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt); +extern void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node); #endif /* NODEBITMAPHEAPSCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 1774c56ae3..8306e0c47b 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1735,6 +1735,12 @@ typedef struct BitmapIndexScanState struct IndexScanDescData *biss_ScanDesc; } BitmapIndexScanState; +typedef struct BitmapHeapScanInstrumentation +{ + long lossy_pages; + long exact_pages; +} BitmapHeapScanInstrumentation; + /* ---------------- * SharedBitmapState information * @@ -1778,6 +1784,20 @@ typedef struct ParallelBitmapHeapState ConditionVariable cv; } ParallelBitmapHeapState; +/* ---------------- + * Instrumentation data for a parallel bitmap heap scan. + * + * During a parallel bitmap heap scan, this lives in shared memory. At the + * end, each worker copies their own stats to the right slot. Finally, the + * leader copies the data to its local memory + * ---------------- + */ +typedef struct SharedBitmapHeapInstrumentation +{ + int num_workers; + BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedBitmapHeapInstrumentation; + /* ---------------- * BitmapHeapScanState information * @@ -1812,8 +1832,6 @@ typedef struct BitmapHeapScanState int return_empty_tuples; Buffer vmbuffer; Buffer pvmbuffer; - long exact_pages; - long lossy_pages; TBMIterator *prefetch_iterator; int prefetch_pages; int prefetch_target; @@ -1822,6 +1840,17 @@ typedef struct BitmapHeapScanState TBMSharedIterator *shared_tbmiterator; TBMSharedIterator *shared_prefetch_iterator; ParallelBitmapHeapState *pstate; + + /* + * In a parallelized bitmap heap scan, each worker sets their + * instrumentaton data in pstate->sinstrument at the end. The leader + * copies the shared-memory info back to local storage before DSM + * shutdown, and stores it here in 'instrument'. It remains NULL in + * workers and in non-parallel scans. + */ + SharedBitmapHeapInstrumentation *sinstrument; + + BitmapHeapScanInstrumentation stats; } BitmapHeapScanState; /* ---------------- -- 2.40.1