From 9652e80028e1c5414445e69f1bfd3631cce2d6f3 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Mon, 1 Apr 2024 00:37:59 +0200 Subject: [PATCH v20240401 7/7] WIP: copy larger chunks from the same file --- src/bin/pg_combinebackup/reconstruct.c | 162 +++++++++++++++++-------- 1 file changed, 114 insertions(+), 48 deletions(-) diff --git a/src/bin/pg_combinebackup/reconstruct.c b/src/bin/pg_combinebackup/reconstruct.c index e4081ddfcec..c8c0f7046e5 100644 --- a/src/bin/pg_combinebackup/reconstruct.c +++ b/src/bin/pg_combinebackup/reconstruct.c @@ -540,7 +540,7 @@ write_reconstructed_file(char *input_filename, CopyMethod copy_method) { int wfd = -1; - unsigned i; + unsigned next_idx; unsigned zero_blocks = 0; unsigned prefetch_index = 0; @@ -623,23 +623,43 @@ write_reconstructed_file(char *input_filename, pg_fatal("could not open file \"%s\": %m", output_filename); /* Read and write the blocks as required. */ - for (i = 0; i < block_length; ++i) + next_idx = 0; + while (next_idx < block_length) { uint8 buffer[BLCKSZ]; - rfile *s = sourcemap[i]; + int start_idx = next_idx; + int last_idx = next_idx; + rfile *s = sourcemap[start_idx]; int wb; + int nblocks; bool skip_page_read = false; bool use_copy_range = false; + /* + * find the last block to use from the same source file, but don't + * work with more than 128 blocks (1MB) at a time + * + * XXX not sure if the second condition is 100% correct, but it gets + * capped a couple lines later so ok + */ + while ((last_idx + 1 < block_length) && + (sourcemap[start_idx] == sourcemap[last_idx+1]) && + (last_idx - start_idx <= 128)) + last_idx += 1; + + /* how many blocks in this chunk, and set start of the next loop */ + nblocks = Min(128, last_idx - start_idx + 1); + next_idx += nblocks; + /* Update accounting information. */ if (s == NULL) - ++zero_blocks; + zero_blocks += nblocks; else { - s->num_blocks_read++; + s->num_blocks_read += nblocks; s->highest_offset_read = Max(s->highest_offset_read, - offsetmap[i] + BLCKSZ); + offsetmap[last_idx] + BLCKSZ); } /* Skip the rest of this in dry-run mode. */ @@ -673,14 +693,34 @@ write_reconstructed_file(char *input_filename, * uninitialized block, so just zero-fill it. */ memset(buffer, 0, BLCKSZ); + + for (int j = 0; j < nblocks; j++) + { + /* Write out the block. */ + if ((wb = write(wfd, buffer, BLCKSZ)) != BLCKSZ) + { + if (wb < 0) + pg_fatal("could not write file \"%s\": %m", output_filename); + else + pg_fatal("could not write file \"%s\": wrote only %d of %d bytes", + output_filename, wb, BLCKSZ); + } + + /* Update the checksum computation. */ + if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0) + pg_fatal("could not update checksum of file \"%s\"", + output_filename); + } + + continue; } - else if (!skip_page_read) - { - int rb; - /* maybe do some prefetching */ + /* do the prefetching, if we're to read the pages in any way */ + if (!skip_page_read) + { + /* maybe do some prefetching, until after the end of this range */ prefetch_distance = Min(prefetch_distance + 1, prefetch_max); - while (prefetch_index < Min(block_length, i + prefetch_distance)) + while (prefetch_index < Min(block_length, last_idx + prefetch_distance)) { rfile *ps = sourcemap[prefetch_index]; @@ -691,56 +731,82 @@ write_reconstructed_file(char *input_filename, prefetch_index++; } + } - /* Read the block from the correct source, except if dry-run. */ - rb = pg_pread(s->fd, buffer, BLCKSZ, offsetmap[i]); - if (rb != BLCKSZ) + /* now copy the blocks using either read/write or copy_file_range */ + if (!use_copy_range) + { + for (int j = 0; j < nblocks; j++) { - if (rb < 0) - pg_fatal("could not read file \"%s\": %m", s->filename); - else - pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu", - s->filename, rb, BLCKSZ, - (unsigned long long) offsetmap[i]); + int rb; + + /* Read the block from the correct source, except if dry-run. */ + rb = pg_pread(s->fd, buffer, BLCKSZ, offsetmap[start_idx + j]); + if (rb != BLCKSZ) + { + if (rb < 0) + pg_fatal("could not read file \"%s\": %m", s->filename); + else + pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu", + s->filename, rb, BLCKSZ, + (unsigned long long) offsetmap[start_idx + j]); + } + + /* Write out the block. */ + if ((wb = write(wfd, buffer, BLCKSZ)) != BLCKSZ) + { + if (wb < 0) + pg_fatal("could not write file \"%s\": %m", output_filename); + else + pg_fatal("could not write file \"%s\": wrote only %d of %d bytes", + output_filename, wb, BLCKSZ); + } + + /* Update the checksum computation. */ + if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0) + pg_fatal("could not update checksum of file \"%s\"", + output_filename); } } - - /* - * If possible, copy the block using copy_file_range. If not possible - * (not requested/supported, or the block is zero-filled), fallback to - * the regular write. - */ - if (use_copy_range) + else /* use copy_file_range */ { - wb = copy_file_range(s->fd, &offsetmap[i], wfd, NULL, BLCKSZ, 0); + /* copy_file_range modifies the passed offset, so make a copy */ + off_t off = offsetmap[start_idx]; + + wb = copy_file_range(s->fd, &off, wfd, NULL, BLCKSZ * nblocks, 0); if (wb < 0) pg_fatal("error while copying file range from \"%s\" to \"%s\": %m", input_filename, output_filename); - else if (wb != BLCKSZ) + else if (wb != nblocks * BLCKSZ) pg_fatal("could not write file \"%s\": wrote only %d of %d bytes", - output_filename, wb, BLCKSZ); - } - else - { - /* Write out the block. */ - if ((wb = write(wfd, buffer, BLCKSZ)) != BLCKSZ) + output_filename, wb, nblocks * BLCKSZ); + + if (skip_page_read) + continue; + + for (int j = 0; j < nblocks; j++) { - if (wb < 0) - pg_fatal("could not write file \"%s\": %m", output_filename); - else - pg_fatal("could not write file \"%s\": wrote only %d of %d bytes", - output_filename, wb, BLCKSZ); + int rb; + + /* Read the block from the correct source, except if dry-run. */ + rb = pg_pread(s->fd, buffer, BLCKSZ, offsetmap[start_idx + j]); + if (rb != BLCKSZ) + { + if (rb < 0) + pg_fatal("could not read file \"%s\": %m", s->filename); + else + pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu", + s->filename, rb, BLCKSZ, + (unsigned long long) offsetmap[start_idx + j]); + } + + /* Update the checksum computation. */ + if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0) + pg_fatal("could not update checksum of file \"%s\"", + output_filename); } } - - /* - * Update the checksum computation (we must have read the page if we're - * to calculate the checksum). - */ - if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0) - pg_fatal("could not update checksum of file \"%s\"", - output_filename); } /* Debugging output. */ -- 2.44.0