From 8df37528e0ccab135fad3a9182fb448183a413ee Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Tue, 25 Feb 2025 15:40:59 +0100 Subject: [PATCH v20250225 3/7] progress --- src/backend/access/gin/gininsert.c | 61 ++++++++++++++++++++++++++++-- src/backend/access/gin/ginutil.c | 28 +++++++++++++- src/include/access/gin.h | 11 ++++++ src/include/access/gin_private.h | 1 + 4 files changed, 97 insertions(+), 4 deletions(-) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 7c2f46b9541..7286432698e 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -21,6 +21,7 @@ #include "access/xloginsert.h" #include "catalog/index.h" #include "catalog/pg_collation.h" +#include "commands/progress.h" #include "commands/vacuum.h" #include "miscadmin.h" #include "nodes/execnodes.h" @@ -644,6 +645,10 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) buildstate.accum.ginstate = &buildstate.ginstate; ginInitBA(&buildstate.accum); + /* Report table scan phase started */ + pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE, + PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN); + /* * Attempt to launch parallel worker scan when required * @@ -1481,15 +1486,42 @@ _gin_parallel_merge(GinBuildState *state) double reltuples = 0; GinBuffer *buffer; + /* GIN tuples from workers, merged by leader */ + double numtuples = 0; + /* wait for workers to scan table and produce partial results */ reltuples = _gin_parallel_heapscan(state); + /* Execute the sort */ + pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE, + PROGRESS_GIN_PHASE_PERFORMSORT_2); + /* do the actual sort in the leader */ tuplesort_performsort(state->bs_sortstate); /* initialize buffer to combine entries for the same key */ buffer = GinBufferInit(state->ginstate.index); + /* + * Set the progress target for the next phase. Reset the block number + * values set by table_index_build_scan + */ + { + const int progress_index[] = { + PROGRESS_CREATEIDX_SUBPHASE, + PROGRESS_CREATEIDX_TUPLES_TOTAL, + PROGRESS_SCAN_BLOCKS_TOTAL, + PROGRESS_SCAN_BLOCKS_DONE + }; + const int64 progress_vals[] = { + PROGRESS_GIN_PHASE_MERGE_2, + state->bs_numtuples, + 0, 0 + }; + + pgstat_progress_update_multi_param(4, progress_index, progress_vals); + } + /* * Read the GIN tuples from the shared tuplesort, sorted by category and * key. That probably gives us order matching how data is organized in the @@ -1530,6 +1562,10 @@ _gin_parallel_merge(GinBuildState *state) * or append if to the existing data). */ GinBufferStoreTuple(buffer, tup); + + /* Report progress */ + pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE, + ++numtuples); } /* flush data remaining in the buffer (for the last key) */ @@ -1543,6 +1579,10 @@ _gin_parallel_merge(GinBuildState *state) /* discard the existing data */ GinBufferReset(buffer); + + /* Report progress */ + pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE, + ++numtuples); } /* relase all the memory */ @@ -1583,7 +1623,8 @@ _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Rela /* Perform work common to all participants */ _gin_parallel_scan_and_build(buildstate, ginleader->ginshared, - ginleader->sharedsort, heap, index, sortmem, true); + ginleader->sharedsort, heap, index, + sortmem, true); } /* @@ -1601,7 +1642,8 @@ _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Rela * do a very limited number of mergesorts, which is good. */ static void -_gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) +_gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort, + bool progress) { GinTuple *tup; Size tuplen; @@ -1612,8 +1654,19 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) buffer = GinBufferInit(state->ginstate.index); /* sort the raw per-worker data */ + if (progress) + pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE, + PROGRESS_GIN_PHASE_PERFORMSORT_1); + tuplesort_performsort(state->bs_worker_sort); + /* reset the number of GIN tuples produced by this worker */ + state->bs_numtuples = 0; + + if (progress) + pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE, + PROGRESS_GIN_PHASE_MERGE_1); + /* * Read the GIN tuples from the shared tuplesort, sorted by the key, and * merge them into larger chunks for the leader to combine. @@ -1645,6 +1698,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) buffer->items, buffer->nitems, &ntuplen); tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen); + state->bs_numtuples++; pfree(ntup); @@ -1672,6 +1726,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort) buffer->items, buffer->nitems, &ntuplen); tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen); + state->bs_numtuples++; pfree(ntup); @@ -1775,7 +1830,7 @@ _gin_parallel_scan_and_build(GinBuildState *state, * the callback, and combine them into much larger chunks and place that * into the shared tuplestore for leader to process. */ - _gin_process_worker_data(state, state->bs_worker_sort); + _gin_process_worker_data(state, state->bs_worker_sort, progress); /* sort the GIN tuples built by this worker */ tuplesort_performsort(state->bs_sortstate); diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 6b2dd40fa0f..a61532538c0 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -20,6 +20,7 @@ #include "access/xloginsert.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" +#include "commands/progress.h" #include "commands/vacuum.h" #include "miscadmin.h" #include "storage/indexfsm.h" @@ -72,7 +73,7 @@ ginhandler(PG_FUNCTION_ARGS) amroutine->amgettreeheight = NULL; amroutine->amoptions = ginoptions; amroutine->amproperty = NULL; - amroutine->ambuildphasename = NULL; + amroutine->ambuildphasename = ginbuildphasename; amroutine->amvalidate = ginvalidate; amroutine->amadjustmembers = ginadjustmembers; amroutine->ambeginscan = ginbeginscan; @@ -700,3 +701,28 @@ ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build) END_CRIT_SECTION(); } + +/* + * ginbuildphasename() -- Return name of index build phase. + */ +char * +ginbuildphasename(int64 phasenum) +{ + switch (phasenum) + { + case PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE: + return "initializing"; + case PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN: + return "scanning table"; + case PROGRESS_GIN_PHASE_PERFORMSORT_1: + return "sorting tuples (workers)"; + case PROGRESS_GIN_PHASE_MERGE_1: + return "merging tuples (workers)"; + case PROGRESS_GIN_PHASE_PERFORMSORT_2: + return "sorting tuples"; + case PROGRESS_GIN_PHASE_MERGE_2: + return "merging tuples"; + default: + return NULL; + } +} diff --git a/src/include/access/gin.h b/src/include/access/gin.h index 2debdac0f43..2e1076a0499 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -38,6 +38,17 @@ #define GIN_SEARCH_MODE_ALL 2 #define GIN_SEARCH_MODE_EVERYTHING 3 /* for internal use only */ +/* + * Constant definition for progress reporting. Phase numbers must match + * ginbuildphasename. + */ +/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 (see progress.h) */ +#define PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN 2 +#define PROGRESS_GIN_PHASE_PERFORMSORT_1 3 +#define PROGRESS_GIN_PHASE_MERGE_1 4 +#define PROGRESS_GIN_PHASE_PERFORMSORT_2 5 +#define PROGRESS_GIN_PHASE_MERGE_2 6 + /* * GinStatsData represents stats data for planner use */ diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 50478db9820..95d8805b66f 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -109,6 +109,7 @@ extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum, extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple); extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, GinNullCategory *category); +extern char *ginbuildphasename(int64 phasenum); /* gininsert.c */ extern IndexBuildResult *ginbuild(Relation heap, Relation index, -- 2.48.1