From 277d969c31349b22e2c7726935d681e72aacaece Mon Sep 17 00:00:00 2001 From: John Naylor Date: Fri, 25 Jan 2019 17:18:28 -0500 Subject: [PATCH v19 3/3] During pg_upgrade, conditionally skip transfer of FSMs. If a heap on the old cluster has 4 pages or fewer, don't copy or link the FSM. This will reduce space usage for installations with large numbers of small tables. --- src/bin/pg_upgrade/info.c | 18 ++++++- src/bin/pg_upgrade/pg_upgrade.h | 6 +++ src/bin/pg_upgrade/relfilenode.c | 84 +++++++++++++++++++++++++++----- 3 files changed, 93 insertions(+), 15 deletions(-) diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index 2f925f086c..55d4911d10 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -200,6 +200,8 @@ create_rel_filename_map(const char *old_data, const char *new_data, map->old_db_oid = old_db->db_oid; map->new_db_oid = new_db->db_oid; + map->relpages = old_rel->relpages; + map->relkind = old_rel->relkind; /* * old_relfilenode might differ from pg_class.oid (and hence @@ -415,9 +417,11 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) int ntups; int relnum; int num_rels = 0; + int relpages; char *nspname = NULL; char *relname = NULL; char *tablespace = NULL; + char *relkind = NULL; int i_spclocation, i_nspname, i_relname, @@ -425,7 +429,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) i_indtable, i_toastheap, i_relfilenode, - i_reltablespace; + i_reltablespace, + i_relpages, + i_relkind; char query[QUERY_ALLOC]; char *last_namespace = NULL, *last_tablespace = NULL; @@ -494,7 +500,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) */ snprintf(query + strlen(query), sizeof(query) - strlen(query), "SELECT all_rels.*, n.nspname, c.relname, " - " c.relfilenode, c.reltablespace, %s " + " c.relfilenode, c.reltablespace, c.relpages, c.relkind, %s " "FROM (SELECT * FROM regular_heap " " UNION ALL " " SELECT * FROM toast_heap " @@ -526,6 +532,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) i_relfilenode = PQfnumber(res, "relfilenode"); i_reltablespace = PQfnumber(res, "reltablespace"); i_spclocation = PQfnumber(res, "spclocation"); + i_relpages = PQfnumber(res, "relpages"); + i_relkind = PQfnumber(res, "relkind"); for (relnum = 0; relnum < ntups; relnum++) { @@ -555,6 +563,12 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) relname = PQgetvalue(res, relnum, i_relname); curr->relname = pg_strdup(relname); + relpages = atoi(PQgetvalue(res, relnum, i_relpages)); + curr->relpages = relpages; + + relkind = PQgetvalue(res, relnum, i_relkind); + curr->relkind = relkind[0]; + curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode)); curr->tblsp_alloc = false; diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 2f67eee22b..baeb8ff0f8 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -147,6 +147,8 @@ typedef struct char *tablespace; /* tablespace path; "" for cluster default */ bool nsp_alloc; /* should nspname be freed? */ bool tblsp_alloc; /* should tablespace be freed? */ + int32 relpages; /* # of pages -- see pg_class.h */ + char relkind; /* relation kind -- see pg_class.h */ } RelInfo; typedef struct @@ -173,6 +175,10 @@ typedef struct */ Oid old_relfilenode; Oid new_relfilenode; + + int32 relpages; /* # of pages -- see pg_class.h */ + char relkind; /* relation kind -- see pg_class.h */ + /* the rest are used only for logging and error reporting */ char *nspname; /* namespaces */ char *relname; diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c index 0c78073f0e..ffcec23d7c 100644 --- a/src/bin/pg_upgrade/relfilenode.c +++ b/src/bin/pg_upgrade/relfilenode.c @@ -14,10 +14,12 @@ #include #include "catalog/pg_class_d.h" #include "access/transam.h" +#include "storage/freespace.h" static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace); -static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit); +static Size transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit); +static bool new_cluster_needs_fsm(char relkind, Size first_seg_size); /* @@ -144,6 +146,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) int mapnum; bool vm_crashsafe_match = true; bool vm_must_add_frozenbit = false; + Size first_seg_size = 0; /* * Do the old and new cluster disagree on the crash-safetiness of the vm @@ -165,18 +168,22 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) if (old_tablespace == NULL || strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0) { - /* transfer primary file */ - transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit); + /* Transfer main fork and return size of the first segment. */ + first_seg_size = transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit); /* fsm/vm files added in PG 8.4 */ if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804) { /* - * Copy/link any fsm and vm files, if they exist + * Transfer any FSM files if they would be created in the + * new cluster. */ - transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit); + if (new_cluster_needs_fsm(maps[mapnum].relkind, first_seg_size)) + (void) transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit); + + /* Transfer any VM files if we can trust their contents. */ if (vm_crashsafe_match) - transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit); + (void) transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit); } } } @@ -188,9 +195,10 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) * * Copy or link file from old cluster to new one. If vm_must_add_frozenbit * is true, visibility map forks are converted and rewritten, even in link - * mode. + * mode. Returns size of the first segment. We only care about the accuracy + * of the size for small heap relations. */ -static void +static Size transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit) { char old_file[MAXPGPATH]; @@ -198,6 +206,7 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro int segno; char extent_suffix[65]; struct stat statbuf; + Size first_seg_size = 0; /* * Now copy/link any related segments as well. Remember, PG breaks large @@ -234,16 +243,40 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro { /* File does not exist? That's OK, just return */ if (errno == ENOENT) - return; + return first_seg_size; else - pg_fatal("error while checking for file existence \"%s.%s\" (\"%s\" to \"%s\"): %s\n", - map->nspname, map->relname, old_file, new_file, - strerror(errno)); + goto fatal; } /* If file is empty, just return */ if (statbuf.st_size == 0) - return; + return first_seg_size; + } + + /* Save size of the first segment of the main fork. */ + + else if (map->relpages <= HEAP_FSM_CREATION_THRESHOLD && + (map->relkind == RELKIND_RELATION || + map->relkind == RELKIND_TOASTVALUE)) + { + /* + * In this case, if pg_class.relpages is wrong, it's possible + * that a FSM will be skipped when we actually need it. To guard + * against this, we verify the size of the first segment. + */ + if (stat(old_file, &statbuf) != 0) + goto fatal; + else + first_seg_size = statbuf.st_size; + } + else + { + /* + * For indexes etc., we don't care if pg_class.relpages is wrong, + * since we always transfer their FSMs. For heaps, we might + * transfer a FSM when we don't need to, but this is harmless. + */ + first_seg_size = Min(map->relpages, RELSEG_SIZE) * BLCKSZ; } unlink(new_file); @@ -277,4 +310,29 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro linkFile(old_file, new_file, map->nspname, map->relname); } } + +fatal: + pg_fatal("error while checking for file existence \"%s.%s\" (\"%s\" to \"%s\"): %s\n", + map->nspname, map->relname, old_file, new_file, + strerror(errno)); +} + +/* + * Return false for small heaps if we're upgrading across version 12, + * the first where small heap relations don't have FSMs by default. + */ +static bool +new_cluster_needs_fsm(char relkind, Size first_seg_size) +{ + if (relkind != RELKIND_RELATION && relkind != RELKIND_TOASTVALUE) + return true; + + if (first_seg_size > HEAP_FSM_CREATION_THRESHOLD * BLCKSZ) + return true; + + if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100 && + GET_MAJOR_VERSION(new_cluster.major_version) >= 1200) + return false; + else + return true; } -- 2.17.1