From 772790b5a5b4ab215d1243722f1b31303dc976f5 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Tue, 27 Jun 2023 15:59:23 +0200 Subject: [PATCH v4 1/6] Expose f_smgr to extensions for manual implementation There are various reasons why one would want to create their own implementation of a storage manager, among which are block-level compression, encryption and offloading to cold storage. This patch is a first patch that allows extensions to register their own SMgr. Note, however, that this SMgr is not yet used - only the first SMgr to register is used, and this is currently the md.c smgr. Future commits will include facilities to select an SMgr for each tablespace. --- src/backend/postmaster/postmaster.c | 5 + src/backend/storage/smgr/md.c | 187 +++++++++++++++++++--------- src/backend/storage/smgr/smgr.c | 137 ++++++++++---------- src/backend/utils/init/miscinit.c | 13 ++ src/include/miscadmin.h | 1 + src/include/storage/md.h | 4 + src/include/storage/smgr.h | 59 +++++++-- src/tools/pgindent/typedefs.list | 1 + 8 files changed, 266 insertions(+), 141 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index d2a7a7add6f..88ea821573d 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -916,6 +916,11 @@ PostmasterMain(int argc, char *argv[]) */ ApplyLauncherRegister(); + /* + * Register built-in managers that are not part of static arrays + */ + register_builtin_dynamic_managers(); + /* * process any libraries that should be preloaded at postmaster start */ diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index f3220f98dc4..5a2072e0816 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -84,6 +84,21 @@ typedef struct _MdfdVec } MdfdVec; static MemoryContext MdCxt; /* context for all MdfdVec objects */ +SMgrId MdSMgrId; + +typedef struct +{ + SMgrRelationData reln; /* parent data */ + + /* + * for md.c; per-fork arrays of the number of open segments + * (md_num_open_segs) and the segments themselves (md_seg_fds). + */ + int md_num_open_segs[MAX_FORKNUM + 1]; + MdfdVec *md_seg_fds[MAX_FORKNUM + 1]; +} MdSMgrRelationData; + +typedef MdSMgrRelationData *MdSMgrRelation; /* Populate a file tag describing an md.c segment file. */ @@ -130,26 +145,55 @@ typedef struct MdPathStr } MdPathStr; +void +mdsmgr_register(void) +{ + /* magnetic disk */ + f_smgr md_smgr = (f_smgr) { + .name = "md", + .smgr_init = mdinit, + .smgr_shutdown = NULL, + .smgr_open = mdopen, + .smgr_close = mdclose, + .smgr_create = mdcreate, + .smgr_exists = mdexists, + .smgr_unlink = mdunlink, + .smgr_extend = mdextend, + .smgr_zeroextend = mdzeroextend, + .smgr_prefetch = mdprefetch, + .smgr_maxcombine = mdmaxcombine, + .smgr_readv = mdreadv, + .smgr_writev = mdwritev, + .smgr_writeback = mdwriteback, + .smgr_nblocks = mdnblocks, + .smgr_truncate = mdtruncate, + .smgr_immedsync = mdimmedsync, + .smgr_registersync = mdregistersync, + }; + + MdSMgrId = smgr_register(&md_smgr, sizeof(MdSMgrRelationData)); +} + /* local routines */ static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo); -static MdfdVec *mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior); -static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, +static MdfdVec *mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior); +static void register_dirty_segment(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg); static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno); static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno); -static void _fdvec_resize(SMgrRelation reln, +static void _fdvec_resize(MdSMgrRelation reln, ForkNumber forknum, int nseg); -static MdPathStr _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, +static MdPathStr _mdfd_segpath(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno); -static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, +static MdfdVec *_mdfd_openseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags); -static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, +static MdfdVec *_mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior); -static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, +static BlockNumber _mdnblocks(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg); static inline int @@ -182,6 +226,8 @@ mdinit(void) bool mdexists(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + /* * Close it first, to ensure that we notice if the fork has been unlinked * since we opened it. As an optimization, we can skip that in recovery, @@ -190,7 +236,7 @@ mdexists(SMgrRelation reln, ForkNumber forknum) if (!InRecovery) mdclose(reln, forknum); - return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL); + return (mdopenfork(mdreln, forknum, EXTENSION_RETURN_NULL) != NULL); } /* @@ -201,14 +247,15 @@ mdexists(SMgrRelation reln, ForkNumber forknum) void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *mdfd; RelPathStr path; File fd; - if (isRedo && reln->md_num_open_segs[forknum] > 0) + if (isRedo && mdreln->md_num_open_segs[forknum] > 0) return; /* created and opened already... */ - Assert(reln->md_num_open_segs[forknum] == 0); + Assert(mdreln->md_num_open_segs[forknum] == 0); /* * We may be using the target table space for the first time in this @@ -243,13 +290,13 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) } } - _fdvec_resize(reln, forknum, 1); - mdfd = &reln->md_seg_fds[forknum][0]; + _fdvec_resize(mdreln, forknum, 1); + mdfd = &mdreln->md_seg_fds[forknum][0]; mdfd->mdfd_vfd = fd; mdfd->mdfd_segno = 0; if (!SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, mdfd); + register_dirty_segment(mdreln, forknum, mdfd); } /* @@ -467,6 +514,7 @@ void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; off_t seekpos; int nbytes; MdfdVec *v; @@ -493,7 +541,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, relpath(reln->smgr_rlocator, forknum).str, InvalidBlockNumber))); - v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE); + v = _mdfd_getseg(mdreln, forknum, blocknum, skipFsync, EXTENSION_CREATE); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -517,9 +565,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, } if (!skipFsync && !SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); - Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(mdreln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); } /* @@ -532,6 +580,7 @@ void mdzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v; BlockNumber curblocknum = blocknum; int remblocks = nblocks; @@ -566,7 +615,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, else numblocks = remblocks; - v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE); + v = _mdfd_getseg(mdreln, forknum, curblocknum, skipFsync, EXTENSION_CREATE); Assert(segstartblock < RELSEG_SIZE); Assert(segstartblock + numblocks <= RELSEG_SIZE); @@ -621,9 +670,9 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, } if (!skipFsync && !SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); - Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(mdreln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); remblocks -= numblocks; curblocknum += numblocks; @@ -641,7 +690,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, * invent one out of whole cloth. */ static MdfdVec * -mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) +mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior) { MdfdVec *mdfd; RelPathStr path; @@ -651,7 +700,7 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) if (reln->md_num_open_segs[forknum] > 0) return &reln->md_seg_fds[forknum][0]; - path = relpath(reln->smgr_rlocator, forknum); + path = relpath(reln->reln.smgr_rlocator, forknum); fd = PathNameOpenFile(path.str, _mdfd_open_flags()); @@ -681,9 +730,11 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) void mdopen(SMgrRelation reln) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + /* mark it not open */ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++) - reln->md_num_open_segs[forknum] = 0; + mdreln->md_num_open_segs[forknum] = 0; } /* @@ -692,7 +743,8 @@ mdopen(SMgrRelation reln) void mdclose(SMgrRelation reln, ForkNumber forknum) { - int nopensegs = reln->md_num_open_segs[forknum]; + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + int nopensegs = mdreln->md_num_open_segs[forknum]; /* No work if already closed */ if (nopensegs == 0) @@ -701,10 +753,10 @@ mdclose(SMgrRelation reln, ForkNumber forknum) /* close segments starting from the end */ while (nopensegs > 0) { - MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1]; + MdfdVec *v = &mdreln->md_seg_fds[forknum][nopensegs - 1]; FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, nopensegs - 1); + _fdvec_resize(mdreln, forknum, nopensegs - 1); nopensegs--; } } @@ -717,6 +769,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks) { #ifdef USE_PREFETCH + MdSMgrRelation mdreln = (MdSMgrRelation) reln; Assert((io_direct_flags & IO_DIRECT_DATA) == 0); @@ -729,7 +782,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, MdfdVec *v; int nblocks_this_segment; - v = _mdfd_getseg(reln, forknum, blocknum, false, + v = _mdfd_getseg(mdreln, forknum, blocknum, false, InRecovery ? EXTENSION_RETURN_NULL : EXTENSION_FAIL); if (v == NULL) return false; @@ -827,6 +880,8 @@ void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + while (nblocks > 0) { struct iovec iov[PG_IOV_MAX]; @@ -838,7 +893,7 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, size_t transferred_this_segment; size_t size_this_segment; - v = _mdfd_getseg(reln, forknum, blocknum, false, + v = _mdfd_getseg(mdreln, forknum, blocknum, false, EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -948,6 +1003,8 @@ void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + /* This assert is too expensive to have on normally ... */ #ifdef CHECK_WRITE_VS_EXTEND Assert((uint64) blocknum + (uint64) nblocks <= (uint64) mdnblocks(reln, forknum)); @@ -964,7 +1021,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, size_t transferred_this_segment; size_t size_this_segment; - v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, + v = _mdfd_getseg(mdreln, forknum, blocknum, skipFsync, EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -1034,7 +1091,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, } if (!skipFsync && !SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); nblocks -= nblocks_this_segment; buffers += nblocks_this_segment; @@ -1053,6 +1110,8 @@ void mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + Assert((io_direct_flags & IO_DIRECT_DATA) == 0); /* @@ -1067,7 +1126,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, int segnum_start, segnum_end; - v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ , + v = _mdfd_getseg(mdreln, forknum, blocknum, true /* not used */ , EXTENSION_DONT_OPEN); /* @@ -1111,14 +1170,15 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v; BlockNumber nblocks; BlockNumber segno; - mdopenfork(reln, forknum, EXTENSION_FAIL); + mdopenfork(mdreln, forknum, EXTENSION_FAIL); /* mdopen has opened the first segment */ - Assert(reln->md_num_open_segs[forknum] > 0); + Assert(mdreln->md_num_open_segs[forknum] > 0); /* * Start from the last open segments, to avoid redundant seeks. We have @@ -1133,12 +1193,12 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * that's OK because the checkpointer never needs to compute relation * size.) */ - segno = reln->md_num_open_segs[forknum] - 1; - v = &reln->md_seg_fds[forknum][segno]; + segno = mdreln->md_num_open_segs[forknum] - 1; + v = &mdreln->md_seg_fds[forknum][segno]; for (;;) { - nblocks = _mdnblocks(reln, forknum, v); + nblocks = _mdnblocks(mdreln, forknum, v); if (nblocks > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big"); if (nblocks < ((BlockNumber) RELSEG_SIZE)) @@ -1156,7 +1216,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * undermines _mdfd_getseg's attempts to notice and report an error * upon access to a missing segment. */ - v = _mdfd_openseg(reln, forknum, segno, 0); + v = _mdfd_openseg(mdreln, forknum, segno, 0); if (v == NULL) return segno * ((BlockNumber) RELSEG_SIZE); } @@ -1176,6 +1236,7 @@ void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; BlockNumber priorblocks; int curopensegs; @@ -1196,14 +1257,14 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, * Truncate segments, starting at the last one. Starting at the end makes * managing the memory for the fd array easier, should there be errors. */ - curopensegs = reln->md_num_open_segs[forknum]; + curopensegs = mdreln->md_num_open_segs[forknum]; while (curopensegs > 0) { MdfdVec *v; priorblocks = (curopensegs - 1) * RELSEG_SIZE; - v = &reln->md_seg_fds[forknum][curopensegs - 1]; + v = &mdreln->md_seg_fds[forknum][curopensegs - 1]; if (priorblocks > nblocks) { @@ -1218,13 +1279,13 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, FilePathName(v->mdfd_vfd)))); if (!SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); /* we never drop the 1st segment */ - Assert(v != &reln->md_seg_fds[forknum][0]); + Assert(v != &mdreln->md_seg_fds[forknum][0]); FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, curopensegs - 1); + _fdvec_resize(mdreln, forknum, curopensegs - 1); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { @@ -1244,7 +1305,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, FilePathName(v->mdfd_vfd), nblocks))); if (!SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); } else { @@ -1264,6 +1325,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, void mdregistersync(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; int segno; int min_inactive_seg; @@ -1273,7 +1335,7 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) */ mdnblocks(reln, forknum); - min_inactive_seg = segno = reln->md_num_open_segs[forknum]; + min_inactive_seg = segno = mdreln->md_num_open_segs[forknum]; /* * Temporarily open inactive segments, then close them after sync. There @@ -1281,20 +1343,20 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) * harmless. We don't bother to clean them up and take a risk of further * trouble. The next mdclose() will soon close them. */ - while (_mdfd_openseg(reln, forknum, segno, 0) != NULL) + while (_mdfd_openseg(mdreln, forknum, segno, 0) != NULL) segno++; while (segno > 0) { - MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1]; + MdfdVec *v = &mdreln->md_seg_fds[forknum][segno - 1]; - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); /* Close inactive segments immediately */ if (segno > min_inactive_seg) { FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, segno - 1); + _fdvec_resize(mdreln, forknum, segno - 1); } segno--; @@ -1315,6 +1377,7 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) void mdimmedsync(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; int segno; int min_inactive_seg; @@ -1324,7 +1387,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) */ mdnblocks(reln, forknum); - min_inactive_seg = segno = reln->md_num_open_segs[forknum]; + min_inactive_seg = segno = mdreln->md_num_open_segs[forknum]; /* * Temporarily open inactive segments, then close them after sync. There @@ -1332,12 +1395,12 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) * is harmless. We don't bother to clean them up and take a risk of * further trouble. The next mdclose() will soon close them. */ - while (_mdfd_openseg(reln, forknum, segno, 0) != NULL) + while (_mdfd_openseg(mdreln, forknum, segno, 0) != NULL) segno++; while (segno > 0) { - MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1]; + MdfdVec *v = &mdreln->md_seg_fds[forknum][segno - 1]; /* * fsyncs done through mdimmedsync() should be tracked in a separate @@ -1358,7 +1421,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) if (segno > min_inactive_seg) { FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, segno - 1); + _fdvec_resize(mdreln, forknum, segno - 1); } segno--; @@ -1375,14 +1438,14 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) * enough to be a performance problem). */ static void -register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) +register_dirty_segment(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { FileTag tag; - INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno); + INIT_MD_FILETAG(tag, reln->reln.smgr_rlocator.locator, forknum, seg->mdfd_segno); /* Temp relations should never be fsync'd */ - Assert(!SmgrIsTemp(reln)); + Assert(!SmgrIsTemp(&reln->reln)); if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ )) { @@ -1500,7 +1563,7 @@ DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo) * _fdvec_resize() -- Resize the fork's open segments array */ static void -_fdvec_resize(SMgrRelation reln, +_fdvec_resize(MdSMgrRelation reln, ForkNumber forknum, int nseg) { @@ -1548,12 +1611,12 @@ _fdvec_resize(SMgrRelation reln, * returned string is palloc'd. */ static MdPathStr -_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno) +_mdfd_segpath(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno) { RelPathStr path; MdPathStr fullpath; - path = relpath(reln->smgr_rlocator, forknum); + path = relpath(reln->reln.smgr_rlocator, forknum); if (segno > 0) sprintf(fullpath.str, "%s.%u", path.str, segno); @@ -1568,7 +1631,7 @@ _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno) * and make a MdfdVec object for it. Returns NULL on failure. */ static MdfdVec * -_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, +_mdfd_openseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags) { MdfdVec *v; @@ -1611,7 +1674,7 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, * EXTENSION_CREATE case. */ static MdfdVec * -_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, +_mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior) { MdfdVec *v; @@ -1685,7 +1748,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, char *zerobuf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, MCXT_ALLOC_ZERO); - mdextend(reln, forknum, + mdextend((SMgrRelation) reln, forknum, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, zerobuf, skipFsync); pfree(zerobuf); @@ -1740,7 +1803,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, * Get number of blocks present in a single disk file */ static BlockNumber -_mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) +_mdnblocks(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { off_t len; @@ -1763,7 +1826,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) int mdsyncfiletag(const FileTag *ftag, char *path) { - SMgrRelation reln = smgropen(ftag->rlocator, INVALID_PROC_NUMBER); + MdSMgrRelation reln = (MdSMgrRelation) smgropen(ftag->rlocator, INVALID_PROC_NUMBER); File file; instr_time io_start; bool need_to_close; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index ebe35c04de5..7635c231ea0 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -53,84 +53,21 @@ #include "access/xlogutils.h" #include "lib/ilist.h" +#include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/ipc.h" #include "storage/md.h" #include "storage/smgr.h" +#include "port/atomics.h" #include "utils/hsearch.h" #include "utils/inval.h" +#include "utils/memutils.h" +static f_smgr *smgrsw; -/* - * This struct of function pointers defines the API between smgr.c and - * any individual storage manager module. Note that smgr subfunctions are - * generally expected to report problems via elog(ERROR). An exception is - * that smgr_unlink should use elog(WARNING), rather than erroring out, - * because we normally unlink relations during post-commit/abort cleanup, - * and so it's too late to raise an error. Also, various conditions that - * would normally be errors should be allowed during bootstrap and/or WAL - * recovery --- see comments in md.c for details. - */ -typedef struct f_smgr -{ - void (*smgr_init) (void); /* may be NULL */ - void (*smgr_shutdown) (void); /* may be NULL */ - void (*smgr_open) (SMgrRelation reln); - void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, - bool isRedo); - bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum, - bool isRedo); - void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, const void *buffer, bool skipFsync); - void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync); - bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks); - uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum); - void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, - void **buffers, BlockNumber nblocks); - void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, - const void **buffers, BlockNumber nblocks, - bool skipFsync); - void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); - BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, - BlockNumber old_blocks, BlockNumber nblocks); - void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum); -} f_smgr; - -static const f_smgr smgrsw[] = { - /* magnetic disk */ - { - .smgr_init = mdinit, - .smgr_shutdown = NULL, - .smgr_open = mdopen, - .smgr_close = mdclose, - .smgr_create = mdcreate, - .smgr_exists = mdexists, - .smgr_unlink = mdunlink, - .smgr_extend = mdextend, - .smgr_zeroextend = mdzeroextend, - .smgr_prefetch = mdprefetch, - .smgr_maxcombine = mdmaxcombine, - .smgr_readv = mdreadv, - .smgr_writev = mdwritev, - .smgr_writeback = mdwriteback, - .smgr_nblocks = mdnblocks, - .smgr_truncate = mdtruncate, - .smgr_immedsync = mdimmedsync, - .smgr_registersync = mdregistersync, - } -}; +static int NSmgr = 0; -static const int NSmgr = lengthof(smgrsw); +static Size LargestSMgrRelationSize = 0; /* * Each backend has a hashtable that stores all extant SMgrRelation objects. @@ -144,6 +81,60 @@ static dlist_head unpinned_relns; static void smgrshutdown(int code, Datum arg); static void smgrdestroy(SMgrRelation reln); +#define MaxSMgrId UINT8_MAX + +SMgrId +smgr_register(const f_smgr *smgr, Size smgrrelation_size) +{ + SMgrId my_id; + MemoryContext old; + + if (process_shared_preload_libraries_done) + elog(FATAL, "SMgrs must be registered in the shared_preload_libraries phase"); + if (NSmgr == MaxSMgrId) + elog(FATAL, "Too many smgrs registered"); + if (smgr->name == NULL || *smgr->name == 0) + elog(FATAL, "smgr registered with invalid name"); + + Assert(smgr->smgr_open != NULL); + Assert(smgr->smgr_close != NULL); + Assert(smgr->smgr_create != NULL); + Assert(smgr->smgr_exists != NULL); + Assert(smgr->smgr_unlink != NULL); + Assert(smgr->smgr_extend != NULL); + Assert(smgr->smgr_zeroextend != NULL); + Assert(smgr->smgr_prefetch != NULL); + Assert(smgr->smgr_readv != NULL); + Assert(smgr->smgr_writev != NULL); + Assert(smgr->smgr_writeback != NULL); + Assert(smgr->smgr_nblocks != NULL); + Assert(smgr->smgr_truncate != NULL); + Assert(smgr->smgr_immedsync != NULL); + + old = MemoryContextSwitchTo(TopMemoryContext); + + my_id = NSmgr++; + if (my_id == 0) + smgrsw = palloc_array(f_smgr, 1); + else + smgrsw = repalloc_array(smgrsw, f_smgr, NSmgr); + + MemoryContextSwitchTo(old); + + pg_compiler_barrier(); + + if (!smgrsw) + { + NSmgr--; + elog(FATAL, "Failed to extend smgr array"); + } + + smgrsw[my_id] = *smgr; + + LargestSMgrRelationSize = Max(LargestSMgrRelationSize, smgrrelation_size); + + return my_id; +} /* * smgrinit(), smgrshutdown() -- Initialize or shut down storage @@ -211,8 +202,11 @@ smgropen(RelFileLocator rlocator, ProcNumber backend) /* First time through: initialize the hash table */ HASHCTL ctl; + LargestSMgrRelationSize = MAXALIGN(LargestSMgrRelationSize); + Assert(NSmgr > 0); + ctl.keysize = sizeof(RelFileLocatorBackend); - ctl.entrysize = sizeof(SMgrRelationData); + ctl.entrysize = LargestSMgrRelationSize; SMgrRelationHash = hash_create("smgr relation table", 400, &ctl, HASH_ELEM | HASH_BLOBS); dlist_init(&unpinned_relns); @@ -232,7 +226,8 @@ smgropen(RelFileLocator rlocator, ProcNumber backend) reln->smgr_targblock = InvalidBlockNumber; for (int i = 0; i <= MAX_FORKNUM; ++i) reln->smgr_cached_nblocks[i] = InvalidBlockNumber; - reln->smgr_which = 0; /* we only have md.c at present */ + + reln->smgr_which = MdSMgrId; /* we only have md.c at present */ /* implementation-specific initialization */ smgrsw[reln->smgr_which].smgr_open(reln); diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index dc3521457c7..3176cdce6d7 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -43,6 +43,7 @@ #include "replication/slotsync.h" #include "storage/fd.h" #include "storage/ipc.h" +#include "storage/md.h" #include "storage/latch.h" #include "storage/pg_shmem.h" #include "storage/pmsignal.h" @@ -192,6 +193,9 @@ InitStandaloneProcess(const char *argv0) InitProcessLocalLatch(); InitializeLatchWaitSet(); + /* Initialize smgrs */ + register_builtin_dynamic_managers(); + /* * For consistency with InitPostmasterChild, initialize signal mask here. * But we don't unblock SIGQUIT or provide a default handler for it. @@ -1920,6 +1924,15 @@ process_session_preload_libraries(void) true); } +/* + * Register any internal managers. + */ +void +register_builtin_dynamic_managers(void) +{ + mdsmgr_register(); +} + /* * process any shared memory requests from preloaded libraries */ diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index a2b63495eec..ff4ef578a1f 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -513,6 +513,7 @@ extern void TouchSocketLockFiles(void); extern void AddToDataDirLockFile(int target_line, const char *str); extern bool RecheckDataDirLockFile(void); extern void ValidatePgVersion(const char *path); +extern void register_builtin_dynamic_managers(void); extern void process_shared_preload_libraries(void); extern void process_session_preload_libraries(void); extern void process_shmem_requests(void); diff --git a/src/include/storage/md.h b/src/include/storage/md.h index 05bf537066e..da1d1d339be 100644 --- a/src/include/storage/md.h +++ b/src/include/storage/md.h @@ -19,6 +19,10 @@ #include "storage/smgr.h" #include "storage/sync.h" +/* registration function for md storage manager */ +extern void mdsmgr_register(void); +extern SMgrId MdSMgrId; + /* md storage manager functionality */ extern void mdinit(void); extern void mdopen(SMgrRelation reln); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 4016b206ad6..52f74f917b2 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -18,6 +18,8 @@ #include "storage/block.h" #include "storage/relfilelocator.h" +typedef uint8 SMgrId; + /* * smgr.c maintains a table of SMgrRelation objects, which are essentially * cached file handles. An SMgrRelation is created (if not already present) @@ -51,14 +53,7 @@ typedef struct SMgrRelationData * Fields below here are intended to be private to smgr.c and its * submodules. Do not touch them from elsewhere. */ - int smgr_which; /* storage manager selector */ - - /* - * for md.c; per-fork arrays of the number of open segments - * (md_num_open_segs) and the segments themselves (md_seg_fds). - */ - int md_num_open_segs[MAX_FORKNUM + 1]; - struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1]; + SMgrId smgr_which; /* storage manager selector */ /* * Pinning support. If unpinned (ie. pincount == 0), 'node' is a list @@ -73,6 +68,54 @@ typedef SMgrRelationData *SMgrRelation; #define SmgrIsTemp(smgr) \ RelFileLocatorBackendIsTemp((smgr)->smgr_rlocator) +/* + * This struct of function pointers defines the API between smgr.c and + * any individual storage manager module. Note that smgr subfunctions are + * generally expected to report problems via elog(ERROR). An exception is + * that smgr_unlink should use elog(WARNING), rather than erroring out, + * because we normally unlink relations during post-commit/abort cleanup, + * and so it's too late to raise an error. Also, various conditions that + * would normally be errors should be allowed during bootstrap and/or WAL + * recovery --- see comments in md.c for details. + */ +typedef struct f_smgr +{ + const char *name; + void (*smgr_init) (void); /* may be NULL */ + void (*smgr_shutdown) (void); /* may be NULL */ + void (*smgr_open) (SMgrRelation reln); + void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, + bool isRedo); + bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum, + bool isRedo); + void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, const void *buffer, bool skipFsync); + void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks, bool skipFsync); + bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks); + uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum); + void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, + void **buffers, BlockNumber nblocks); + void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, + const void **buffers, BlockNumber nblocks, + bool skipFsync); + void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks); + BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, + BlockNumber old_blocks, BlockNumber nblocks); + void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum); +} f_smgr; + +extern SMgrId smgr_register(const f_smgr *smgr, Size smgrrelation_size); + extern void smgrinit(void); extern SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend); extern bool smgrexists(SMgrRelation reln, ForkNumber forknum); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9840060997f..4b971b81ae5 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1629,6 +1629,7 @@ ManyTestResourceKind Material MaterialPath MaterialState +MdSMgrRelationData MdfdVec MdPathStr Memoize -- 2.47.2