From 394e28f8fe7f83e2c0ac6922b67f9df57bd7eb08 Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Sat, 16 Mar 2024 17:21:10 -0400 Subject: [PATCH 3/4] Enable dumping of table/index stats in pg_dump. For each table/matview/index dumped, it will also generate a statement that calls all of the pg_set_relation_stats() and pg_set_attribute_stats() calls necessary to restore the statistics of the current system onto the destination system. As is the pattern with pg_dump options, this can be disabled with --no-statistics. --- src/bin/pg_dump/pg_backup.h | 2 + src/bin/pg_dump/pg_backup_archiver.c | 5 + src/bin/pg_dump/pg_dump.c | 100 ++++++++++++- src/bin/pg_dump/pg_dump.h | 1 + src/bin/pg_dump/pg_dumpall.c | 5 + src/bin/pg_dump/pg_restore.c | 3 + src/fe_utils/Makefile | 1 + src/fe_utils/meson.build | 1 + src/fe_utils/stats_export.c | 201 +++++++++++++++++++++++++++ src/include/fe_utils/stats_export.h | 36 +++++ 10 files changed, 353 insertions(+), 2 deletions(-) create mode 100644 src/fe_utils/stats_export.c create mode 100644 src/include/fe_utils/stats_export.h diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 9ef2f2017ef..1db5cf52eb8 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -112,6 +112,7 @@ typedef struct _restoreOptions int no_publications; /* Skip publication entries */ int no_security_labels; /* Skip security label entries */ int no_subscriptions; /* Skip subscription entries */ + int no_statistics; /* Skip statistics import */ int strict_names; const char *filename; @@ -179,6 +180,7 @@ typedef struct _dumpOptions int no_security_labels; int no_publications; int no_subscriptions; + int no_statistics; int no_toast_compression; int no_unlogged_table_data; int serializable_deferrable; diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index d97ebaff5b8..d5f61399d91 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -2833,6 +2833,10 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) if (ropt->no_subscriptions && strcmp(te->desc, "SUBSCRIPTION") == 0) return 0; + /* If it's a stats dump, maybe ignore it */ + if (ropt->no_statistics && strcmp(te->desc, "STATISTICS") == 0) + return 0; + /* Ignore it if section is not to be dumped/restored */ switch (curSection) { @@ -2862,6 +2866,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) */ if (strcmp(te->desc, "ACL") == 0 || strcmp(te->desc, "COMMENT") == 0 || + strcmp(te->desc, "STATISTICS") == 0 || strcmp(te->desc, "SECURITY LABEL") == 0) { /* Database properties react to createDB, not selectivity options. */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index b1c4c3ec7f0..621bfa12337 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -59,6 +59,7 @@ #include "compress_io.h" #include "dumputils.h" #include "fe_utils/option_utils.h" +#include "fe_utils/stats_export.h" #include "fe_utils/string_utils.h" #include "filter.h" #include "getopt_long.h" @@ -428,6 +429,7 @@ main(int argc, char **argv) {"no-comments", no_argument, &dopt.no_comments, 1}, {"no-publications", no_argument, &dopt.no_publications, 1}, {"no-security-labels", no_argument, &dopt.no_security_labels, 1}, + {"no-statistics", no_argument, &dopt.no_statistics, 1}, {"no-subscriptions", no_argument, &dopt.no_subscriptions, 1}, {"no-toast-compression", no_argument, &dopt.no_toast_compression, 1}, {"no-unlogged-table-data", no_argument, &dopt.no_unlogged_table_data, 1}, @@ -1144,6 +1146,7 @@ help(const char *progname) printf(_(" --no-comments do not dump comments\n")); printf(_(" --no-publications do not dump publications\n")); printf(_(" --no-security-labels do not dump security label assignments\n")); + printf(_(" --no-statistics do not dump statistics\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); printf(_(" --no-table-access-method do not dump table access methods\n")); printf(_(" --no-tablespaces do not dump tablespace assignments\n")); @@ -7001,6 +7004,7 @@ getTables(Archive *fout, int *numTables) /* Tables have data */ tblinfo[i].dobj.components |= DUMP_COMPONENT_DATA; + tblinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS; /* Mark whether table has an ACL */ if (!PQgetisnull(res, i, i_relacl)) @@ -7498,6 +7502,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) indxinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid)); indxinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid)); AssignDumpId(&indxinfo[j].dobj); + indxinfo[j].dobj.components |= DUMP_COMPONENT_STATISTICS; indxinfo[j].dobj.dump = tbinfo->dobj.dump; indxinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_indexname)); indxinfo[j].dobj.namespace = tbinfo->dobj.namespace; @@ -10247,6 +10252,82 @@ dumpComment(Archive *fout, const char *type, catalogId, subid, dumpId, NULL); } +/* + * dumpRelationStats -- + * + * Dump command to import stats into the relation on the new database. + */ +static void +dumpRelationStats(Archive *fout, const DumpableObject *dobj, + const char *reltypename, DumpId dumpid) +{ + const char *stmtname = "relstats"; + static bool prepared = false; + const char *values[2]; + PGconn *conn; + PGresult *res; + PQExpBuffer query; + PQExpBuffer tag; + + /* do nothing, if --no-statistics is supplied */ + if (fout->dopt->no_statistics) + return; + + conn = GetConnection(fout); + + if (!prepared) + { + int ver = PQserverVersion(conn); + char *sql = exportRelationStatsSQL(ver); + + if (sql == NULL) + pg_fatal("could not prepare stats export query for server version %d", + ver); + + res = PQprepare(conn, stmtname, sql, 2, NULL); + if (res == NULL || PQresultStatus(res) != PGRES_COMMAND_OK) + pg_fatal("prepared statement failed: %s", + PQerrorMessage(conn)); + + free(sql); + prepared = true; + } + + values[0] = fmtQualifiedId(dobj->namespace->dobj.name, dobj->name); + values[1] = NULL; + res = PQexecPrepared(conn, stmtname, 2, values, NULL, NULL, 0); + + + /* Result set must be 1x1 */ + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("error in statistics extraction: %s", PQerrorMessage(conn)); + + if (PQntuples(res) != 1) + pg_fatal("statistics extraction expected one row, but got %d rows", + PQntuples(res)); + + query = createPQExpBuffer(); + appendPQExpBufferStr(query, strdup(PQgetvalue(res, 0, 0))); + appendPQExpBufferStr(query, ";\n"); + + tag = createPQExpBuffer(); + appendPQExpBuffer(tag, "%s %s", reltypename, + fmtId(dobj->name)); + + ArchiveEntry(fout, nilCatalogId, createDumpId(), + ARCHIVE_OPTS(.tag = tag->data, + .namespace = dobj->namespace->dobj.name, + .description = "STATS IMPORT", + .section = SECTION_POST_DATA, + .createStmt = query->data, + .deps = &dumpid, + .nDeps = 1)); + + PQclear(res); + destroyPQExpBuffer(query); + destroyPQExpBuffer(tag); +} + /* * dumpTableComment -- * @@ -16681,6 +16762,13 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) if (tbinfo->dobj.dump & DUMP_COMPONENT_SECLABEL) dumpTableSecLabel(fout, tbinfo, reltypename); + /* Statistics are dependent on the definition, not the data */ + /* Views don't have stats */ + if ((tbinfo->dobj.dump & DUMP_COMPONENT_STATISTICS) && + (tbinfo->relkind == RELKIND_VIEW)) + dumpRelationStats(fout, &tbinfo->dobj, reltypename, + tbinfo->dobj.dumpId); + /* Dump comments on inlined table constraints */ for (j = 0; j < tbinfo->ncheck; j++) { @@ -16882,6 +16970,7 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo) PQExpBuffer delq; char *qindxname; char *qqindxname; + DumpId dumpid; /* Do nothing in data-only dump */ if (dopt->dataOnly) @@ -16994,14 +17083,21 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo) free(indstatvalsarray); } + /* Comments and stats share same .dep */ + dumpid = is_constraint ? indxinfo->indexconstraint : + indxinfo->dobj.dumpId; + /* Dump Index Comments */ if (indxinfo->dobj.dump & DUMP_COMPONENT_COMMENT) dumpComment(fout, "INDEX", qindxname, tbinfo->dobj.namespace->dobj.name, tbinfo->rolname, indxinfo->dobj.catId, 0, - is_constraint ? indxinfo->indexconstraint : - indxinfo->dobj.dumpId); + dumpid); + + /* Dump Index Stats */ + if (indxinfo->dobj.dump & DUMP_COMPONENT_STATISTICS) + dumpRelationStats(fout, &indxinfo->dobj, "INDEX", dumpid); destroyPQExpBuffer(q); destroyPQExpBuffer(delq); diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 9bc93520b45..d6a071ec28f 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -101,6 +101,7 @@ typedef uint32 DumpComponents; #define DUMP_COMPONENT_ACL (1 << 4) #define DUMP_COMPONENT_POLICY (1 << 5) #define DUMP_COMPONENT_USERMAP (1 << 6) +#define DUMP_COMPONENT_STATISTICS (1 << 7) #define DUMP_COMPONENT_ALL (0xFFFF) /* diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 046c0dc3b36..69652aa2059 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -105,6 +105,7 @@ static int use_setsessauth = 0; static int no_comments = 0; static int no_publications = 0; static int no_security_labels = 0; +static int no_statistics = 0; static int no_subscriptions = 0; static int no_toast_compression = 0; static int no_unlogged_table_data = 0; @@ -174,6 +175,7 @@ main(int argc, char *argv[]) {"no-role-passwords", no_argument, &no_role_passwords, 1}, {"no-security-labels", no_argument, &no_security_labels, 1}, {"no-subscriptions", no_argument, &no_subscriptions, 1}, + {"no-statistics", no_argument, &no_statistics, 1}, {"no-sync", no_argument, NULL, 4}, {"no-toast-compression", no_argument, &no_toast_compression, 1}, {"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1}, @@ -453,6 +455,8 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " --no-publications"); if (no_security_labels) appendPQExpBufferStr(pgdumpopts, " --no-security-labels"); + if (no_statistics) + appendPQExpBufferStr(pgdumpopts, " --no-statistics"); if (no_subscriptions) appendPQExpBufferStr(pgdumpopts, " --no-subscriptions"); if (no_toast_compression) @@ -668,6 +672,7 @@ help(void) printf(_(" --no-publications do not dump publications\n")); printf(_(" --no-role-passwords do not dump passwords for roles\n")); printf(_(" --no-security-labels do not dump security label assignments\n")); + printf(_(" --no-statistics do not dump statistics\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); printf(_(" --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" --no-table-access-method do not dump table access methods\n")); diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index c3beacdec1d..2d326dec727 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -75,6 +75,7 @@ main(int argc, char **argv) static int no_publications = 0; static int no_security_labels = 0; static int no_subscriptions = 0; + static int no_statistics = 0; static int strict_names = 0; struct option cmdopts[] = { @@ -126,6 +127,7 @@ main(int argc, char **argv) {"no-security-labels", no_argument, &no_security_labels, 1}, {"no-subscriptions", no_argument, &no_subscriptions, 1}, {"filter", required_argument, NULL, 4}, + {"no-statistics", no_argument, &no_statistics, 1}, {NULL, 0, NULL, 0} }; @@ -358,6 +360,7 @@ main(int argc, char **argv) opts->no_publications = no_publications; opts->no_security_labels = no_security_labels; opts->no_subscriptions = no_subscriptions; + opts->no_statistics = no_statistics; if (if_exists && !opts->dropSchema) pg_fatal("option --if-exists requires option -c/--clean"); diff --git a/src/fe_utils/Makefile b/src/fe_utils/Makefile index 946c05258f0..c734f9f6d3a 100644 --- a/src/fe_utils/Makefile +++ b/src/fe_utils/Makefile @@ -32,6 +32,7 @@ OBJS = \ query_utils.o \ recovery_gen.o \ simple_list.o \ + stats_export.o \ string_utils.o ifeq ($(PORTNAME), win32) diff --git a/src/fe_utils/meson.build b/src/fe_utils/meson.build index 14d0482a2cc..fce503f6410 100644 --- a/src/fe_utils/meson.build +++ b/src/fe_utils/meson.build @@ -12,6 +12,7 @@ fe_utils_sources = files( 'query_utils.c', 'recovery_gen.c', 'simple_list.c', + 'stats_export.c', 'string_utils.c', ) diff --git a/src/fe_utils/stats_export.c b/src/fe_utils/stats_export.c new file mode 100644 index 00000000000..fd09e6ea8af --- /dev/null +++ b/src/fe_utils/stats_export.c @@ -0,0 +1,201 @@ +/*------------------------------------------------------------------------- + * + * Utility functions for extracting object statistics for frontend code + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/fe_utils/stats_export.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include "fe_utils/stats_export.h" +/* +#include "libpq/libpq-fs.h" +*/ +#include "fe_utils/string_utils.h" + +/* + * No-frills catalog queries that are named according to the statistics they + * fetch (relation, attribute, extended) and the earliest server version for + * which they work. These are presented so that if other use cases arise they + * can share the same base queries but utilize them in their own way. + * + * The queries themselves do not filter results, so it is up to the caller + * to append a WHERE clause filtering either on either c.oid or a combination + * of c.relname and n.nspname. + */ + +const char *export_class_stats_query_v9_2 = + "SELECT c.oid, n.nspname, c.relname, c.relpages, c.reltuples, c.relallvisible " + "FROM pg_class AS c " + "JOIN pg_namespace AS n ON n.oid = c.relnamespace"; + +const char *export_attribute_stats_query_v17 = + "SELECT c.oid, n.nspname, c.relname, a.attnum, a.attname, s.inherited, " + "s.null_frac, s.avg_width, s.n_distinct, " + "s.most_common_vals::text AS most_common_vals, s.most_common_freqs, " + "s.histogram_bounds::text AS histogram_bounds, s.correlation, " + "s.most_common_elems::text AS most_common_elems, " + "s.most_common_elem_freqs, s.elem_count_histogram, " + "s.range_length_histogram::text AS range_length_histogram, " + "s.range_empty_frac, " + "s.range_bounds_histogram::text AS range_bounds_histogram " + "FROM pg_class AS c " + "JOIN pg_namespace AS n ON n.oid = c.relnamespace " + "JOIN pg_attribute AS a ON a.attrelid = c.oid AND not a.attisdropped " + "JOIN pg_stats AS s ON s.schemaname = n.nspname AND s.tablename = c.relname"; + +const char *export_attribute_stats_query_v9_2 = + "SELECT c.oid, n.nspname, c.relname, a.attnum, a.attname, s.inherited, " + "s.null_frac, s.avg_width, s.n_distinct, " + "s.most_common_vals::text AS most_common_vals, s.most_common_freqs, " + "s.histogram_bounds::text AS histogram_bounds, s.correlation, " + "s.most_common_elems::text AS most_common_elems, " + "s.most_common_elem_freqs, s.elem_count_histogram, " + "NULL::text AS range_length_histogram, NULL::real AS range_empty_frac, " + "NULL::text AS range_bounds_histogram " + "FROM pg_class AS c " + "JOIN pg_namespace AS n ON n.oid = c.relnamespace " + "JOIN pg_attribute AS a ON a.attrelid = c.oid AND not a.attisdropped " + "JOIN pg_stats AS s ON s.schemaname = n.nspname AND s.tablename = c.relname"; + +/* + * Returns true if the server version number supports exporting regular + * (e.g. pg_statistic) statistics. + */ +bool +exportStatsSupported(int server_version_num) +{ + return (server_version_num >= MIN_SERVER_NUM); +} + +/* + * Returns true if the server version number supports exporting extended + * (e.g. pg_statistic_ext, pg_statitic_ext_data) statistics. + * + * Currently, none do. + */ +bool +exportExtStatsSupported(int server_version_num) +{ + return false; +} + +/* + * Return the query appropriate for extracting relation statistics for the + * given server version, if one exists. + */ +const char * +exportClassStatsSQL(int server_version_num) +{ + if (server_version_num >= MIN_SERVER_NUM) + return export_class_stats_query_v9_2; + return NULL; +} + +/* + * Return the query appropriate for extracting attribute statistics for the + * given server version, if one exists. + */ +const char * +exportAttributeStatsSQL(int server_version_num) +{ + if (server_version_num >= 170000) + return export_attribute_stats_query_v17; + if (server_version_num >= MIN_SERVER_NUM) + return export_attribute_stats_query_v9_2; + return NULL; +} + +/* + * Generate a SQL statement that will itself generate a SQL statement to + * import all regular stats from a given relation into another relation. + * + * The query generated takes two parameters. + * + * $1 is of type Oid, and represents the oid of the source relation. + * + * $2 is is a cstring, and represents the qualified name of the destination + * relation. If NULL, then the qualified name of the source relation will + * be used. In either case, the value is casted via ::regclass. + * + * The function will return NULL for invalid server version numbers. + * Otherwise, + * + * This function needs to work on databases back to 9.2. + * The format() function was introduced in 9.1. + * The string_agg() aggregate was introduced in 9.0. + * + */ +char *exportRelationStatsSQL(int server_version_num) +{ + const char *relsql = exportClassStatsSQL(server_version_num); + const char *attrsql = exportAttributeStatsSQL(server_version_num); + const char *filter = "WHERE c.oid = $1::regclass"; + char *s; + PQExpBuffer sql; + + if ((relsql == NULL) || (attrsql == NULL)) + return NULL; + + /* + * Set up the initial CTEs each with the same oid filter + */ + sql = createPQExpBuffer(); + appendPQExpBuffer(sql, + "WITH r AS (%s %s), a AS (%s %s), ", + relsql, filter, attrsql, filter); + + /* + * Generate the pg_set_relation_stats function call for the relation + * and one pg_set_attribute_stats function call for each attribute with + * a pg_statistic entry. Give each row an order value such that the + * set relation stats call will be first, followed by the set attribute + * stats calls in attnum order (even though the attributes are identified + * by attname). + * + * Then aggregate the function calls into a single SELECT statement that + * puts the calls in the order described above. + */ + appendPQExpBufferStr(sql, + "s(ord,sql) AS ( " + "SELECT 0, format('pg_catalog.pg_set_relation_stats(" + "%L::regclass, %L::integer, %L::real, %L::integer)', " + "coalesce($2, format('%I.%I', r.nspname, r.relname)), " + "r.relpages, r.reltuples, r.relallvisible) " + "FROM r " + "UNION ALL " + "SELECT 1, format('pg_catalog.pg_set_attribute_stats( " + "relation => %L::regclass, attname => %L::name, " + "inherited => %L::boolean, null_frac => %L::real, " + "avg_width => %L::integer, n_distinct => %L::real, " + "most_common_vals => %L::text, " + "most_common_freqs => %L::real[], " + "histogram_bounds => %L::text, " + "correlation => %L::real, " + "most_common_elems => %L::text, " + "most_common_elem_freqs => %L::real[], " + "elem_count_histogram => %L::real[], " + "range_length_histogram => %L::text, " + "range_empty_frac => %L::real, " + "range_bounds_histogram => %L::text)', " + "coalesce($2, format('%I.%I', a.nspname, a.relname)), " + "a.attname, a.inherited, a.null_frac, a.avg_width, " + "a.n_distinct, a.most_common_vals, a.most_common_freqs, " + "a.histogram_bounds, a.correlation, " + "a.most_common_elems, a.most_common_elem_freqs, " + "a.elem_count_histogram, a.range_length_histogram, " + "a.range_empty_frac, a.range_bounds_histogram ) " + "FROM a " + ") " + "SELECT 'SELECT ' || string_agg(s.sql, ', ' ORDER BY s.ord) " + "FROM s "); + + s = strdup(sql->data); + destroyPQExpBuffer(sql); + return s; +} diff --git a/src/include/fe_utils/stats_export.h b/src/include/fe_utils/stats_export.h new file mode 100644 index 00000000000..f0dc7041f79 --- /dev/null +++ b/src/include/fe_utils/stats_export.h @@ -0,0 +1,36 @@ +/*------------------------------------------------------------------------- + * + * stats_export.h + * Queries to export statistics from current and past versions. + * + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1995, Regents of the University of California + * + * src/include/varatt.h + * + *------------------------------------------------------------------------- + */ + +#ifndef STATS_EXPORT_H +#define STATS_EXPORT_H + +#include "postgres_fe.h" +#include "libpq-fe.h" + +/* + * The minimum supported version number. No attempt is made to get statistics + * import to work on versions older than this. This version was initially chosen + * because that was the minimum version supported by pg_dump at the time. + */ +#define MIN_SERVER_NUM 90200 + +extern bool exportStatsSupported(int server_version_num); +extern bool exportExtStatsSupported(int server_version_num); + +extern const char *exportClassStatsSQL(int server_verson_num); +extern const char *exportAttributeStatsSQL(int server_verson_num); + +extern char *exportRelationStatsSQL(int server_version_num); + +#endif -- 2.44.0