*** a/doc/src/sgml/ref/pg_basebackup.sgml --- b/doc/src/sgml/ref/pg_basebackup.sgml *************** *** 63,68 **** PostgreSQL documentation --- 63,77 ---- better from a performance point of view to take only one backup, and copy the result. + + + pg_basebackup can make a base backup from + not only the master but also the standby. To take a backup from the standby, + set up the standby so that it can accept replication connections (that is, set + max_wal_senders and , + and configure host-based authentication). + You will also need to enable on the master. + *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *************** *** 157,162 **** HotStandbyState standbyState = STANDBY_DISABLED; --- 157,170 ---- static XLogRecPtr LastRec; /* + * During recovery, lastFullPageWrites keeps track of full_page_writes that + * the replayed WAL records indicate. It's initialized with full_page_writes + * that the recovery starting checkpoint record indicates, and then updated + * each time XLOG_FPW_CHANGE record is replayed. + */ + static bool lastFullPageWrites; + + /* * Local copy of SharedRecoveryInProgress variable. True actually means "not * known, need to check the shared state". */ *************** *** 355,360 **** typedef struct XLogCtlInsert --- 363,378 ---- bool forcePageWrites; /* forcing full-page writes for PITR? */ /* + * fullPageWrites is shared-memory copy of walwriter's or startup + * process' full_page_writes. All backends use this flag to determine + * whether to write full-page to WAL, instead of using process-local + * one. This is required because, when full_page_writes is changed + * by SIGHUP, we must WAL-log it before it actually affects + * WAL-logging by backends. + */ + bool fullPageWrites; + + /* * exclusiveBackup is true if a backup started with pg_start_backup() is * in progress, and nonExclusiveBackups is a counter indicating the number * of streaming base backups currently in progress. forcePageWrites is set *************** *** 460,465 **** typedef struct XLogCtlData --- 478,489 ---- /* Are we requested to pause recovery? */ bool recoveryPause; + /* + * lastFpwDisableRecPtr points to the start of the last replayed + * XLOG_FPW_CHANGE record that instructs full_page_writes is disabled. + */ + XLogRecPtr lastFpwDisableRecPtr; + slock_t info_lck; /* locks shared variables shown above */ } XLogCtlData; *************** *** 663,669 **** static void xlog_outrec(StringInfo buf, XLogRecord *record); #endif static void pg_start_backup_callback(int code, Datum arg); static bool read_backup_label(XLogRecPtr *checkPointLoc, ! bool *backupEndRequired); static void rm_redo_error_callback(void *arg); static int get_sync_bit(int method); --- 687,693 ---- #endif static void pg_start_backup_callback(int code, Datum arg); static bool read_backup_label(XLogRecPtr *checkPointLoc, ! bool *backupEndRequired, bool *backupFromStandby); static void rm_redo_error_callback(void *arg); static int get_sync_bit(int method); *************** *** 709,714 **** XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) --- 733,739 ---- bool updrqst; bool doPageWrites; bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH); + bool fpwChange = (rmid == RM_XLOG_ID && info == XLOG_FPW_CHANGE); uint8 info_orig = info; /* cross-check on whether we should be here or not */ *************** *** 756,765 **** begin:; /* * Decide if we need to do full-page writes in this XLOG record: true if * full_page_writes is on or we have a PITR request for it. Since we ! * don't yet have the insert lock, forcePageWrites could change under us, ! * but we'll recheck it once we have the lock. */ ! doPageWrites = fullPageWrites || Insert->forcePageWrites; len = 0; for (rdt = rdata;;) --- 781,790 ---- /* * Decide if we need to do full-page writes in this XLOG record: true if * full_page_writes is on or we have a PITR request for it. Since we ! * don't yet have the insert lock, fullPageWrites and forcePageWrites ! * could change under us, but we'll recheck them once we have the lock. */ ! doPageWrites = Insert->fullPageWrites || Insert->forcePageWrites; len = 0; for (rdt = rdata;;) *************** *** 939,950 **** begin:; } /* ! * Also check to see if forcePageWrites was just turned on; if we weren't ! * already doing full-page writes then go back and recompute. (If it was ! * just turned off, we could recompute the record without full pages, but ! * we choose not to bother.) */ ! if (Insert->forcePageWrites && !doPageWrites) { /* Oops, must redo it with full-page data. */ LWLockRelease(WALInsertLock); --- 964,975 ---- } /* ! * Also check to see if fullPageWrites or forcePageWrites was just turned on; ! * if we weren't already doing full-page writes then go back and recompute. ! * (If it was just turned off, we could recompute the record without full pages, ! * but we choose not to bother.) */ ! if ((Insert->fullPageWrites || Insert->forcePageWrites) && !doPageWrites) { /* Oops, must redo it with full-page data. */ LWLockRelease(WALInsertLock); *************** *** 1189,1194 **** begin:; --- 1214,1228 ---- WriteRqst = XLogCtl->xlblocks[curridx]; } + /* + * If the record is an XLOG_FPW_CHANGE, we update full_page_writes + * in shared memory before releasing WALInsertLock. This ensures that + * an XLOG_FPW_CHANGE record precedes any WAL record affected + * by this change of full_page_writes. + */ + if (fpwChange) + Insert->fullPageWrites = fullPageWrites; + LWLockRelease(WALInsertLock); if (updrqst) *************** *** 5147,5152 **** BootStrapXLOG(void) --- 5181,5187 ---- checkPoint.redo.xlogid = 0; checkPoint.redo.xrecoff = XLogSegSize + SizeOfXLogLongPHD; checkPoint.ThisTimeLineID = ThisTimeLineID; + checkPoint.fullPageWrites = fullPageWrites; checkPoint.nextXidEpoch = 0; checkPoint.nextXid = FirstNormalTransactionId; checkPoint.nextOid = FirstBootstrapObjectId; *************** *** 5961,5966 **** StartupXLOG(void) --- 5996,6003 ---- uint32 freespace; TransactionId oldestActiveXID; bool backupEndRequired = false; + bool backupFromStandby = false; + DBState save_state; /* * Read control file and check XLOG status looks valid. *************** *** 6094,6100 **** StartupXLOG(void) if (StandbyMode) OwnLatch(&XLogCtl->recoveryWakeupLatch); ! if (read_backup_label(&checkPointLoc, &backupEndRequired)) { /* * When a backup_label file is present, we want to roll forward from --- 6131,6138 ---- if (StandbyMode) OwnLatch(&XLogCtl->recoveryWakeupLatch); ! if (read_backup_label(&checkPointLoc, &backupEndRequired, ! &backupFromStandby)) { /* * When a backup_label file is present, we want to roll forward from *************** *** 6210,6215 **** StartupXLOG(void) --- 6248,6255 ---- */ ThisTimeLineID = checkPoint.ThisTimeLineID; + lastFullPageWrites = checkPoint.fullPageWrites; + RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo; if (XLByteLT(RecPtr, checkPoint.redo)) *************** *** 6250,6255 **** StartupXLOG(void) --- 6290,6296 ---- * pg_control with any minimum recovery stop point obtained from a * backup history file. */ + save_state = ControlFile->state; if (InArchiveRecovery) ControlFile->state = DB_IN_ARCHIVE_RECOVERY; else *************** *** 6270,6281 **** StartupXLOG(void) } /* ! * set backupStartPoint if we're starting recovery from a base backup */ if (haveBackupLabel) { ControlFile->backupStartPoint = checkPoint.redo; ControlFile->backupEndRequired = backupEndRequired; } ControlFile->time = (pg_time_t) time(NULL); /* No need to hold ControlFileLock yet, we aren't up far enough */ --- 6311,6339 ---- } /* ! * Set backupStartPoint if we're starting recovery from a base backup. ! * ! * Set backupEndPoint and use minRecoveryPoint as the backup end location ! * if we're starting recovery from a base backup which was taken from ! * the standby. In this case, the database system status in pg_control must ! * indicate DB_IN_ARCHIVE_RECOVERY. If not, which means that backup ! * is corrupted, so we cancel recovery. */ if (haveBackupLabel) { ControlFile->backupStartPoint = checkPoint.redo; ControlFile->backupEndRequired = backupEndRequired; + + if (backupFromStandby) + { + if (save_state != DB_IN_ARCHIVE_RECOVERY) + ereport(FATAL, + (errmsg("database system status mismatches between " + "pg_control and backup_label"), + errhint("This means that the backup is corrupted and you will " + "have to use another backup for recovery."))); + ControlFile->backupEndPoint = ControlFile->minRecoveryPoint; + } } ControlFile->time = (pg_time_t) time(NULL); /* No need to hold ControlFileLock yet, we aren't up far enough */ *************** *** 6564,6569 **** StartupXLOG(void) --- 6622,6648 ---- /* Pop the error context stack */ error_context_stack = errcontext.previous; + if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint) && + XLByteLE(ControlFile->backupEndPoint, EndRecPtr)) + { + /* + * We have reached the end of base backup, the point where + * the minimum recovery point in pg_control indicates. + * The data on disk is now consistent. Reset backupStartPoint + * and backupEndPoint. + */ + elog(DEBUG1, "end of backup reached"); + + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + + MemSet(&ControlFile->backupStartPoint, 0, sizeof(XLogRecPtr)); + MemSet(&ControlFile->backupEndPoint, 0, sizeof(XLogRecPtr)); + ControlFile->backupEndRequired = false; + UpdateControlFile(); + + LWLockRelease(ControlFileLock); + } + /* * Update shared recoveryLastRecPtr after this record has been * replayed. *************** *** 6763,6768 **** StartupXLOG(void) --- 6842,6857 ---- /* Pre-scan prepared transactions to find out the range of XIDs present */ oldestActiveXID = PrescanPreparedTransactions(NULL, NULL); + /* + * Update full_page_writes in shared memory and write an + * XLOG_FPW_CHANGE record before resource manager writes cleanup + * WAL records or checkpoint record is written. + */ + Insert->fullPageWrites = lastFullPageWrites; + LocalSetXLogInsertAllowed(); + UpdateFullPageWrites(); + LocalXLogInsertAllowed = -1; + if (InRecovery) { int rmid; *************** *** 7644,7649 **** CreateCheckPoint(int flags) --- 7733,7739 ---- LocalSetXLogInsertAllowed(); checkPoint.ThisTimeLineID = ThisTimeLineID; + checkPoint.fullPageWrites = Insert->fullPageWrites; /* * Compute new REDO record ptr = location of next XLOG record. *************** *** 8359,8364 **** XLogReportParameters(void) --- 8449,8496 ---- } /* + * Update full_page_writes in shared memory, and write an + * XLOG_FPW_CHANGE record if necessary. + */ + void + UpdateFullPageWrites(void) + { + XLogCtlInsert *Insert = &XLogCtl->Insert; + + /* + * Do nothing if full_page_writes has not been changed. + * + * It's safe to check the shared full_page_writes without the lock, + * because we can guarantee that there is no concurrently running + * process which can update it. + */ + if (fullPageWrites == Insert->fullPageWrites) + return; + + /* + * Write an XLOG_FPW_CHANGE record. This allows us to keep + * track of full_page_writes during archive recovery, if required. + */ + if (XLogStandbyInfoActive()) + { + XLogRecData rdata; + + rdata.data = (char *) (&fullPageWrites); + rdata.len = sizeof(bool); + rdata.buffer = InvalidBuffer; + rdata.next = NULL; + + XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata); + } + else + { + LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); + Insert->fullPageWrites = fullPageWrites; + LWLockRelease(WALInsertLock); + } + } + + /* * XLOG resource manager's routines * * Definitions of info values are in include/catalog/pg_control.h, though *************** *** 8402,8408 **** xlog_redo(XLogRecPtr lsn, XLogRecord *record) * never arrive. */ if (InArchiveRecovery && ! !XLogRecPtrIsInvalid(ControlFile->backupStartPoint)) ereport(ERROR, (errmsg("online backup was canceled, recovery cannot continue"))); --- 8534,8541 ---- * never arrive. */ if (InArchiveRecovery && ! !XLogRecPtrIsInvalid(ControlFile->backupStartPoint) && ! XLogRecPtrIsInvalid(ControlFile->backupEndPoint)) ereport(ERROR, (errmsg("online backup was canceled, recovery cannot continue"))); *************** *** 8571,8576 **** xlog_redo(XLogRecPtr lsn, XLogRecord *record) --- 8704,8733 ---- /* Check to see if any changes to max_connections give problems */ CheckRequiredParameterValues(); } + else if (info == XLOG_FPW_CHANGE) + { + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + bool fpw; + + memcpy(&fpw, XLogRecGetData(record), sizeof(bool)); + + /* + * Update the LSN of the last replayed XLOG_FPW_CHANGE record + * so that do_pg_start_backup() and do_pg_stop_backup() can check + * whether full_page_writes has been disabled during online backup. + */ + if (!fpw) + { + SpinLockAcquire(&xlogctl->info_lck); + if (XLByteLT(xlogctl->lastFpwDisableRecPtr, ReadRecPtr)) + xlogctl->lastFpwDisableRecPtr = ReadRecPtr; + SpinLockRelease(&xlogctl->info_lck); + } + + /* Keep track of full_page_writes */ + lastFullPageWrites = fpw; + } } void *************** *** 8584,8593 **** xlog_desc(StringInfo buf, uint8 xl_info, char *rec) CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "checkpoint: redo %X/%X; " ! "tli %u; xid %u/%u; oid %u; multi %u; offset %u; " "oldest xid %u in DB %u; oldest running xid %u; %s", checkpoint->redo.xlogid, checkpoint->redo.xrecoff, checkpoint->ThisTimeLineID, checkpoint->nextXidEpoch, checkpoint->nextXid, checkpoint->nextOid, checkpoint->nextMulti, --- 8741,8751 ---- CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "checkpoint: redo %X/%X; " ! "tli %u; fpw %s; xid %u/%u; oid %u; multi %u; offset %u; " "oldest xid %u in DB %u; oldest running xid %u; %s", checkpoint->redo.xlogid, checkpoint->redo.xrecoff, checkpoint->ThisTimeLineID, + checkpoint->fullPageWrites ? "true" : "false", checkpoint->nextXidEpoch, checkpoint->nextXid, checkpoint->nextOid, checkpoint->nextMulti, *************** *** 8652,8657 **** xlog_desc(StringInfo buf, uint8 xl_info, char *rec) --- 8810,8822 ---- xlrec.max_locks_per_xact, wal_level_str); } + else if (info == XLOG_FPW_CHANGE) + { + bool fpw; + + memcpy(&fpw, rec, sizeof(bool)); + appendStringInfo(buf, "full_page_writes: %s", fpw ? "true" : "false"); + } else appendStringInfo(buf, "UNKNOWN"); } *************** *** 8837,8842 **** XLogRecPtr --- 9002,9008 ---- do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile) { bool exclusive = (labelfile == NULL); + bool recovery_in_progress = false; XLogRecPtr checkpointloc; XLogRecPtr startpoint; pg_time_t stamp_time; *************** *** 8848,8865 **** do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile) FILE *fp; StringInfoData labelfbuf; if (!superuser() && !is_authenticated_user_replication_role()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to run a backup"))); ! if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("WAL control functions cannot be executed during recovery."))); ! if (!XLogIsNeeded()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("WAL level not sufficient for making an online backup"), --- 9014,9040 ---- FILE *fp; StringInfoData labelfbuf; + recovery_in_progress = RecoveryInProgress(); + if (!superuser() && !is_authenticated_user_replication_role()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to run a backup"))); ! /* ! * Currently only non-exclusive backup can be taken during recovery. ! */ ! if (recovery_in_progress && exclusive) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("WAL control functions cannot be executed during recovery."))); ! /* ! * During recovery, we don't need to check WAL level. Because, if WAL level ! * is not sufficient, it's impossible to get here during recovery. ! */ ! if (!recovery_in_progress && !XLogIsNeeded()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("WAL level not sufficient for making an online backup"), *************** *** 8885,8890 **** do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile) --- 9060,9068 ---- * since we expect that any pages not modified during the backup interval * must have been correctly captured by the backup.) * + * Note that forcePageWrites has no effect during an online backup from + * the standby. + * * We must hold WALInsertLock to change the value of forcePageWrites, to * ensure adequate interlocking against XLogInsert(). */ *************** *** 8927,8943 **** do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile) * Therefore, if a WAL archiver (such as pglesslog) is trying to * compress out removable backup blocks, it won't remove any that * occur after this point. */ ! RequestXLogSwitch(); do { /* ! * Force a CHECKPOINT. Aside from being necessary to prevent torn * page problems, this guarantees that two successive backup runs * will have different checkpoint positions and hence different * history file names, even if nothing happened in between. * * We use CHECKPOINT_IMMEDIATE only if requested by user (via * passing fast = true). Otherwise this can take awhile. */ --- 9105,9136 ---- * Therefore, if a WAL archiver (such as pglesslog) is trying to * compress out removable backup blocks, it won't remove any that * occur after this point. + * + * During recovery, we skip forcing XLOG file switch, which means that + * the backup taken during recovery is not available for the special + * recovery case described above. */ ! if (!recovery_in_progress) ! RequestXLogSwitch(); do { + bool checkpointfpw; + /* ! * Force a CHECKPOINT. Aside from being necessary to prevent torn * page problems, this guarantees that two successive backup runs * will have different checkpoint positions and hence different * history file names, even if nothing happened in between. * + * During recovery, establish a restartpoint if possible. We use the last + * restartpoint as the backup starting checkpoint. This means that two + * successive backup runs can have same checkpoint positions. + * + * Since the fact that we are executing do_pg_start_backup() during + * recovery means that checkpointer is running, we can use + * RequestCheckpoint() to establish a restartpoint. + * * We use CHECKPOINT_IMMEDIATE only if requested by user (via * passing fast = true). Otherwise this can take awhile. */ *************** *** 8953,8960 **** do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile) --- 9146,9187 ---- LWLockAcquire(ControlFileLock, LW_SHARED); checkpointloc = ControlFile->checkPoint; startpoint = ControlFile->checkPointCopy.redo; + checkpointfpw = ControlFile->checkPointCopy.fullPageWrites; LWLockRelease(ControlFileLock); + if (recovery_in_progress) + { + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + XLogRecPtr recptr; + + /* + * Check to see if all WAL replayed during online backup (i.e., + * since last restartpoint used as backup starting checkpoint) + * contain full-page writes. + */ + SpinLockAcquire(&xlogctl->info_lck); + recptr = xlogctl->lastFpwDisableRecPtr; + SpinLockRelease(&xlogctl->info_lck); + + if (!checkpointfpw || XLByteLE(startpoint, recptr)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL generated with full_page_writes=off was replayed " + "since last restartpoint"), + errhint("Enable full_page_writes and run CHECKPOINT on the master, " + "and then try an online backup again."))); + + /* + * During recovery, since we don't use the end-of-backup WAL + * record and don't write the backup history file, the starting WAL + * location doesn't need to be unique. This means that two base + * backups started at the same time might use the same checkpoint + * as starting locations. + */ + gotUniqueStartpoint = true; + } + /* * If two base backups are started at the same time (in WAL sender * processes), we need to make sure that they use different *************** *** 8994,8999 **** do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile) --- 9221,9228 ---- checkpointloc.xlogid, checkpointloc.xrecoff); appendStringInfo(&labelfbuf, "BACKUP METHOD: %s\n", exclusive ? "pg_start_backup" : "streamed"); + appendStringInfo(&labelfbuf, "BACKUP FROM: %s\n", + recovery_in_progress ? "standby" : "master"); appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf); appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr); *************** *** 9088,9093 **** XLogRecPtr --- 9317,9323 ---- do_pg_stop_backup(char *labelfile, bool waitforarchive) { bool exclusive = (labelfile == NULL); + bool recovery_in_progress = false; XLogRecPtr startpoint; XLogRecPtr stoppoint; XLogRecData rdata; *************** *** 9098,9103 **** do_pg_stop_backup(char *labelfile, bool waitforarchive) --- 9328,9334 ---- char stopxlogfilename[MAXFNAMELEN]; char lastxlogfilename[MAXFNAMELEN]; char histfilename[MAXFNAMELEN]; + char backupfrom[20]; uint32 _logId; uint32 _logSeg; FILE *lfp; *************** *** 9107,9125 **** do_pg_stop_backup(char *labelfile, bool waitforarchive) int waits = 0; bool reported_waiting = false; char *remaining; if (!superuser() && !is_authenticated_user_replication_role()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser or replication role to run a backup")))); ! if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("WAL control functions cannot be executed during recovery."))); ! if (!XLogIsNeeded()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("WAL level not sufficient for making an online backup"), --- 9338,9366 ---- int waits = 0; bool reported_waiting = false; char *remaining; + char *ptr; + + recovery_in_progress = RecoveryInProgress(); if (!superuser() && !is_authenticated_user_replication_role()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser or replication role to run a backup")))); ! /* ! * Currently only non-exclusive backup can be taken during recovery. ! */ ! if (recovery_in_progress && exclusive) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("WAL control functions cannot be executed during recovery."))); ! /* ! * During recovery, we don't need to check WAL level. Because, if WAL level ! * is not sufficient, it's impossible to get here during recovery. ! */ ! if (!recovery_in_progress && !XLogIsNeeded()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("WAL level not sufficient for making an online backup"), *************** *** 9210,9215 **** do_pg_stop_backup(char *labelfile, bool waitforarchive) --- 9451,9526 ---- remaining = strchr(labelfile, '\n') + 1; /* %n is not portable enough */ /* + * Parse the BACKUP FROM line. If we are taking an online backup from + * the standby, we confirm that the standby has not been promoted + * during the backup. + */ + ptr = strstr(remaining, "BACKUP FROM:"); + if (sscanf(ptr, "BACKUP FROM: %19s\n", backupfrom) != 1) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE))); + if (strcmp(backupfrom, "standby") == 0 && !recovery_in_progress) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("online backup from the standby was canceled because " + "the standby had been promoted during the backup"), + errhint("The database backup will not be usable."))); + + /* + * During recovery, we don't write an end-of-backup record. We can + * assume that pg_control was backed up last and its minimum recovery + * point can be available as the backup end location. Without an + * end-of-backup record, we can check correctly whether we've + * reached the end of backup when starting recovery from this backup. + * + * We don't force a switch to new WAL file and wait for all the required + * files to be archived. This is okay if we use the backup to start + * the standby. But, if it's for an archive recovery, to ensure all the + * required files are available, a user should wait for them to be archived, + * or include them into the backup. + * + * We return the current minimum recovery point as the backup end + * location. Note that it's would be bigger than the exact backup end + * location if the minimum recovery point is updated since the backup + * of pg_control. This is harmless for current uses. + * + * XXX currently a backup history file is for informational and debug + * purposes only. It's not essential for an online backup. Furthermore, + * even if it's created, it will not be archived during recovery because + * an archiver is not invoked. So it doesn't seem worthwhile to write + * a backup history file during recovery. + */ + if (recovery_in_progress) + { + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + XLogRecPtr recptr; + + /* + * Check to see if all WAL replayed during online backup contain + * full-page writes. + */ + SpinLockAcquire(&xlogctl->info_lck); + recptr = xlogctl->lastFpwDisableRecPtr; + SpinLockRelease(&xlogctl->info_lck); + + if (XLByteLE(startpoint, recptr)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL generated with full_page_writes=off was replayed " + "during online backup"), + errhint("Enable full_page_writes and run CHECKPOINT on the master, " + "and then try an online backup again."))); + + LWLockAcquire(ControlFileLock, LW_SHARED); + stoppoint = ControlFile->minRecoveryPoint; + LWLockRelease(ControlFileLock); + + return stoppoint; + } + + /* * Write the backup-end xlog record */ rdata.data = (char *) (&startpoint); *************** *** 9454,9471 **** GetXLogWriteRecPtr(void) * Returns TRUE if a backup_label was found (and fills the checkpoint * location and its REDO location into *checkPointLoc and RedoStartLSN, * respectively); returns FALSE if not. If this backup_label came from a ! * streamed backup, *backupEndRequired is set to TRUE. */ static bool ! read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired) { char startxlogfilename[MAXFNAMELEN]; TimeLineID tli; FILE *lfp; char ch; char backuptype[20]; *backupEndRequired = false; /* * See if label file is present --- 9765,9786 ---- * Returns TRUE if a backup_label was found (and fills the checkpoint * location and its REDO location into *checkPointLoc and RedoStartLSN, * respectively); returns FALSE if not. If this backup_label came from a ! * streamed backup, *backupEndRequired is set to TRUE. If this backup_label ! * was created during recovery, *backupFromStandby is set to TRUE. */ static bool ! read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired, ! bool *backupFromStandby) { char startxlogfilename[MAXFNAMELEN]; TimeLineID tli; FILE *lfp; char ch; char backuptype[20]; + char backupfrom[20]; *backupEndRequired = false; + *backupFromStandby = false; /* * See if label file is present *************** *** 9499,9514 **** read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired) (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE))); /* ! * BACKUP METHOD line is new in 9.1. We can't restore from an older backup ! * anyway, but since the information on it is not strictly required, don't ! * error out if it's missing for some reason. */ ! if (fscanf(lfp, "BACKUP METHOD: %19s", backuptype) == 1) { if (strcmp(backuptype, "streamed") == 0) *backupEndRequired = true; } if (ferror(lfp) || FreeFile(lfp)) ereport(FATAL, (errcode_for_file_access(), --- 9814,9835 ---- (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE))); /* ! * BACKUP METHOD and BACKUP FROM lines are new in 9.2. We can't ! * restore from an older backup anyway, but since the information on it ! * is not strictly required, don't error out if it's missing for some reason. */ ! if (fscanf(lfp, "BACKUP METHOD: %19s\n", backuptype) == 1) { if (strcmp(backuptype, "streamed") == 0) *backupEndRequired = true; } + if (fscanf(lfp, "BACKUP FROM: %19s\n", backupfrom) == 1) + { + if (strcmp(backupfrom, "standby") == 0) + *backupFromStandby = true; + } + if (ferror(lfp) || FreeFile(lfp)) ereport(FATAL, (errcode_for_file_access(), *** a/src/backend/postmaster/postmaster.c --- b/src/backend/postmaster/postmaster.c *************** *** 3067,3074 **** PostmasterStateMachine(void) else { /* ! * Terminate backup mode to avoid recovery after a clean fast ! * shutdown. Since a backup can only be taken during normal * running (and not, for example, while running under Hot Standby) * it only makes sense to do this if we reached normal running. If * we're still in recovery, the backup file is one we're --- 3067,3074 ---- else { /* ! * Terminate exclusive backup mode to avoid recovery after a clean fast ! * shutdown. Since an exclusive backup can only be taken during normal * running (and not, for example, while running under Hot Standby) * it only makes sense to do this if we reached normal running. If * we're still in recovery, the backup file is one we're *** a/src/backend/postmaster/walwriter.c --- b/src/backend/postmaster/walwriter.c *************** *** 218,223 **** WalWriterMain(void) --- 218,230 ---- PG_SETMASK(&UnBlockSig); /* + * There is a race condition: full_page_writes might have been changed + * by SIGHUP since the startup process had updated it in shared memory. + * To handle this case, we always update shared full_page_writes here. + */ + UpdateFullPageWrites(); + + /* * Loop forever */ for (;;) *************** *** 238,243 **** WalWriterMain(void) --- 245,256 ---- { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); + + /* + * If full_page_writes has been changed by SIGHUP, we update it + * in shared memory and write an XLOG_FPW_CHANGE record. + */ + UpdateFullPageWrites(); } if (shutdown_requested) { *** a/src/backend/replication/basebackup.c --- b/src/backend/replication/basebackup.c *************** *** 180,185 **** perform_base_backup(basebackup_options *opt, DIR *tblspcdir) --- 180,201 ---- ti->path == NULL ? 1 : strlen(ti->path), false); + /* In the main tar, include pg_control last. */ + if (ti->path == NULL) + { + struct stat statbuf; + + if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat control file \"%s\": %m", + XLOG_CONTROL_FILE))); + } + + sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf); + } + /* * If we're including WAL, and this is the main data directory we * don't terminate the tar stream here. Instead, we will append *************** *** 361,371 **** SendBaseBackup(BaseBackupCmd *cmd) MemoryContext old_context; basebackup_options opt; - if (am_cascading_walsender) - ereport(FATAL, - (errcode(ERRCODE_CANNOT_CONNECT_NOW), - errmsg("recovery is still in progress, can't accept WAL streaming connections for backup"))); - parse_basebackup_options(cmd->options, &opt); backup_context = AllocSetContextCreate(CurrentMemoryContext, --- 377,382 ---- *************** *** 609,614 **** sendDir(char *path, int basepathlen, bool sizeonly) --- 620,633 ---- strcmp(pathbuf, "./postmaster.opts") == 0) continue; + /* Skip recovery.conf in the data directory */ + if (strcmp(pathbuf, "./recovery.conf") == 0) + continue; + + /* Skip pg_control here to back up it last */ + if (strcmp(pathbuf, "./global/pg_control") == 0) + continue; + if (lstat(pathbuf, &statbuf) != 0) { if (errno != ENOENT) *** a/src/backend/utils/misc/guc.c --- b/src/backend/utils/misc/guc.c *************** *** 130,136 **** extern int CommitSiblings; extern char *default_tablespace; extern char *temp_tablespaces; extern bool synchronize_seqscans; - extern bool fullPageWrites; extern int ssl_renegotiation_limit; extern char *SSLCipherSuites; --- 130,135 ---- *** a/src/bin/pg_controldata/pg_controldata.c --- b/src/bin/pg_controldata/pg_controldata.c *************** *** 209,214 **** main(int argc, char *argv[]) --- 209,216 ---- ControlFile.checkPointCopy.redo.xrecoff); printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID); + printf(_("Latest checkpoint's full_page_writes: %s\n"), + ControlFile.checkPointCopy.fullPageWrites ? _("yes") : _("no")); printf(_("Latest checkpoint's NextXID: %u/%u\n"), ControlFile.checkPointCopy.nextXidEpoch, ControlFile.checkPointCopy.nextXid); *************** *** 232,237 **** main(int argc, char *argv[]) --- 234,242 ---- printf(_("Backup start location: %X/%X\n"), ControlFile.backupStartPoint.xlogid, ControlFile.backupStartPoint.xrecoff); + printf(_("Backup end location: %X/%X\n"), + ControlFile.backupEndPoint.xlogid, + ControlFile.backupEndPoint.xrecoff); printf(_("End-of-backup record required: %s\n"), ControlFile.backupEndRequired ? _("yes") : _("no")); printf(_("Current wal_level setting: %s\n"), *** a/src/bin/pg_resetxlog/pg_resetxlog.c --- b/src/bin/pg_resetxlog/pg_resetxlog.c *************** *** 489,494 **** GuessControlValues(void) --- 489,495 ---- ControlFile.checkPointCopy.redo.xlogid = 0; ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD; ControlFile.checkPointCopy.ThisTimeLineID = 1; + ControlFile.checkPointCopy.fullPageWrites = false; ControlFile.checkPointCopy.nextXidEpoch = 0; ControlFile.checkPointCopy.nextXid = FirstNormalTransactionId; ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId; *************** *** 503,509 **** GuessControlValues(void) ControlFile.time = (pg_time_t) time(NULL); ControlFile.checkPoint = ControlFile.checkPointCopy.redo; ! /* minRecoveryPoint and backupStartPoint can be left zero */ ControlFile.wal_level = WAL_LEVEL_MINIMAL; ControlFile.MaxConnections = 100; --- 504,510 ---- ControlFile.time = (pg_time_t) time(NULL); ControlFile.checkPoint = ControlFile.checkPointCopy.redo; ! /* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */ ControlFile.wal_level = WAL_LEVEL_MINIMAL; ControlFile.MaxConnections = 100; *************** *** 569,574 **** PrintControlValues(bool guessed) --- 570,577 ---- sysident_str); printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID); + printf(_("Latest checkpoint's full_page_writes: %s\n"), + ControlFile.checkPointCopy.fullPageWrites ? _("yes") : _("no")); printf(_("Latest checkpoint's NextXID: %u/%u\n"), ControlFile.checkPointCopy.nextXidEpoch, ControlFile.checkPointCopy.nextXid); *************** *** 637,642 **** RewriteControlFile(void) --- 640,647 ---- ControlFile.minRecoveryPoint.xrecoff = 0; ControlFile.backupStartPoint.xlogid = 0; ControlFile.backupStartPoint.xrecoff = 0; + ControlFile.backupEndPoint.xlogid = 0; + ControlFile.backupEndPoint.xrecoff = 0; ControlFile.backupEndRequired = false; /* *** a/src/include/access/xlog.h --- b/src/include/access/xlog.h *************** *** 192,197 **** extern int XLogArchiveTimeout; --- 192,198 ---- extern bool XLogArchiveMode; extern char *XLogArchiveCommand; extern bool EnableHotStandby; + extern bool fullPageWrites; extern bool log_checkpoints; /* WAL levels */ *************** *** 307,312 **** extern void CreateCheckPoint(int flags); --- 308,314 ---- extern bool CreateRestartPoint(int flags); extern void XLogPutNextOid(Oid nextOid); extern XLogRecPtr XLogRestorePoint(const char *rpName); + extern void UpdateFullPageWrites(void); extern XLogRecPtr GetRedoRecPtr(void); extern XLogRecPtr GetInsertRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void); *** a/src/include/catalog/pg_control.h --- b/src/include/catalog/pg_control.h *************** *** 21,27 **** /* Version identifier for this pg_control format */ ! #define PG_CONTROL_VERSION 921 /* * Body of CheckPoint XLOG records. This is declared here because we keep --- 21,27 ---- /* Version identifier for this pg_control format */ ! #define PG_CONTROL_VERSION 922 /* * Body of CheckPoint XLOG records. This is declared here because we keep *************** *** 33,38 **** typedef struct CheckPoint --- 33,39 ---- XLogRecPtr redo; /* next RecPtr available when we began to * create CheckPoint (i.e. REDO start point) */ TimeLineID ThisTimeLineID; /* current TLI */ + bool fullPageWrites; /* current full_page_writes */ uint32 nextXidEpoch; /* higher-order bits of nextXid */ TransactionId nextXid; /* next free XID */ Oid nextOid; /* next free OID */ *************** *** 60,65 **** typedef struct CheckPoint --- 61,67 ---- #define XLOG_BACKUP_END 0x50 #define XLOG_PARAMETER_CHANGE 0x60 #define XLOG_RESTORE_POINT 0x70 + #define XLOG_FPW_CHANGE 0x80 /* *************** *** 138,143 **** typedef struct ControlFileData --- 140,151 ---- * record, to make sure the end-of-backup record corresponds the base * backup we're recovering from. * + * backupEndPoint is the backup end location, if we are recovering from + * an online backup which was taken from the standby and haven't reached + * the end of backup yet. It is initialized to the minimum recovery point + * in pg_control which was backed up last. It is reset to zero when + * the end of backup is reached, and we mustn't start up before that. + * * If backupEndRequired is true, we know for sure that we're restoring * from a backup, and must see a backup-end record before we can safely * start up. If it's false, but backupStartPoint is set, a backup_label *************** *** 146,151 **** typedef struct ControlFileData --- 154,160 ---- */ XLogRecPtr minRecoveryPoint; XLogRecPtr backupStartPoint; + XLogRecPtr backupEndPoint; bool backupEndRequired; /*