From 2ea0f9c9dad8482275eab2e77cc4d128ba2d5196 Mon Sep 17 00:00:00 2001 From: Dilip Kumar Date: Sat, 28 Oct 2023 13:48:44 +0530 Subject: [PATCH v3 4/5] Introduce bank-wise LRU counter Since we have already divided buffer pool in banks and victim buffer search is also done at the bank level so there is no need to have a centralized lru counter. And this will also improve the performance by reducing the frequent cpu cache invalidation by not updating the common variable. Dilip Kumar based on design idea from Robert Haas --- src/backend/access/transam/slru.c | 83 +++++++++++++++++-------------- src/include/access/slru.h | 28 +++++++---- 2 files changed, 64 insertions(+), 47 deletions(-) diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index cf215627ea..6c8c21f215 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -105,34 +105,6 @@ typedef struct SlruWriteAllData *SlruWriteAll; (a).segno = (xx_segno) \ ) -/* - * Macro to mark a buffer slot "most recently used". Note multiple evaluation - * of arguments! - * - * The reason for the if-test is that there are often many consecutive - * accesses to the same page (particularly the latest page). By suppressing - * useless increments of cur_lru_count, we reduce the probability that old - * pages' counts will "wrap around" and make them appear recently used. - * - * We allow this code to be executed concurrently by multiple processes within - * SimpleLruReadPage_ReadOnly(). As long as int reads and writes are atomic, - * this should not cause any completely-bogus values to enter the computation. - * However, it is possible for either cur_lru_count or individual - * page_lru_count entries to be "reset" to lower values than they should have, - * in case a process is delayed while it executes this macro. With care in - * SlruSelectLRUPage(), this does little harm, and in any case the absolute - * worst possible consequence is a nonoptimal choice of page to evict. The - * gain from allowing concurrent reads of SLRU pages seems worth it. - */ -#define SlruRecentlyUsed(shared, slotno) \ - do { \ - int new_lru_count = (shared)->cur_lru_count; \ - if (new_lru_count != (shared)->page_lru_count[slotno]) { \ - (shared)->cur_lru_count = ++new_lru_count; \ - (shared)->page_lru_count[slotno] = new_lru_count; \ - } \ - } while (0) - /* Saved info for SlruReportIOError */ typedef enum { @@ -159,6 +131,8 @@ static int SlruSelectLRUPage(SlruCtl ctl, int pageno); static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data); static void SlruInternalDeleteSegment(SlruCtl ctl, int segno); +static inline void SlruRecentlyUsed(SlruShared shared, int slotno, + int banksize); static int SlruAdjustNSlots(int *nslots, int *banksize, int *bankmask); /* @@ -184,6 +158,7 @@ SimpleLruShmemSize(int nslots, int nlsns) sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */ sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */ sz += MAXALIGN(nbanks * sizeof(LWLockPadded)); /* bank_locks[] */ + sz += MAXALIGN(nbanks * sizeof(int)); /* bank_cur_lru_count[] */ if (nlsns > 0) sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */ @@ -236,8 +211,6 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, shared->num_slots = nslots; shared->lsn_groups_per_page = nlsns; - shared->cur_lru_count = 0; - /* shared->latest_page_number will be set later */ shared->slru_stats_idx = pgstat_get_slru_index(name); @@ -260,6 +233,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, offset += MAXALIGN(nslots * sizeof(LWLockPadded)); shared->bank_locks = (LWLockPadded *) (ptr + offset); offset += MAXALIGN(nbanks * sizeof(LWLockPadded)); + shared->bank_cur_lru_count = (int *) (ptr + offset); + offset += MAXALIGN(nbanks * sizeof(int)); if (nlsns > 0) { @@ -281,8 +256,11 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, } /* Initialize bank locks for each buffer bank. */ for (bankno = 0; bankno < nbanks; bankno++) + { LWLockInitialize(&shared->bank_locks[bankno].lock, bank_tranche_id); + shared->bank_cur_lru_count[bankno] = 0; + } /* Should fit to estimated shmem size */ Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns)); @@ -329,7 +307,7 @@ SimpleLruZeroPage(SlruCtl ctl, int pageno) shared->page_number[slotno] = pageno; shared->page_status[slotno] = SLRU_PAGE_VALID; shared->page_dirty[slotno] = true; - SlruRecentlyUsed(shared, slotno); + SlruRecentlyUsed(shared, slotno, ctl->bank_size); /* Set the buffer to zeroes */ MemSet(shared->page_buffer[slotno], 0, BLCKSZ); @@ -461,7 +439,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, continue; } /* Otherwise, it's ready to use */ - SlruRecentlyUsed(shared, slotno); + SlruRecentlyUsed(shared, slotno, ctl->bank_size); /* update the stats counter of pages found in the SLRU */ pgstat_count_slru_page_hit(shared->slru_stats_idx); @@ -507,7 +485,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, if (!ok) SlruReportIOError(ctl, pageno, xid); - SlruRecentlyUsed(shared, slotno); + SlruRecentlyUsed(shared, slotno, ctl->bank_size); /* update the stats counter of pages not found in SLRU */ pgstat_count_slru_page_read(shared->slru_stats_idx); @@ -550,7 +528,7 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid) shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS) { /* See comments for SlruRecentlyUsed macro */ - SlruRecentlyUsed(shared, slotno); + SlruRecentlyUsed(shared, slotno, ctl->bank_size); /* update the stats counter of pages found in the SLRU */ pgstat_count_slru_page_hit(shared->slru_stats_idx); @@ -1073,7 +1051,8 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) int best_invalid_page_number = 0; /* keep compiler quiet */ /* See if page already has a buffer assigned */ - int bankstart = (pageno & ctl->bank_mask) * ctl->bank_size; + int bankno = pageno & ctl->bank_mask; + int bankstart = bankno * ctl->bank_size; int bankend = bankstart + ctl->bank_size; for (slotno = bankstart; slotno < bankend; slotno++) @@ -1110,7 +1089,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) * That gets us back on the path to having good data when there are * multiple pages with the same lru_count. */ - cur_count = (shared->cur_lru_count)++; + cur_count = (shared->bank_cur_lru_count[bankno])++; for (slotno = bankstart; slotno < bankend; slotno++) { int this_delta; @@ -1701,6 +1680,38 @@ SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path) return result; } +/* + * Function to mark a buffer slot "most recently used". Note multiple + * evaluation of arguments! + * + * The reason for the if-test is that there are often many consecutive + * accesses to the same page (particularly the latest page). By suppressing + * useless increments of bank_cur_lru_count, we reduce the probability that old + * pages' counts will "wrap around" and make them appear recently used. + * + * We allow this code to be executed concurrently by multiple processes within + * SimpleLruReadPage_ReadOnly(). As long as int reads and writes are atomic, + * this should not cause any completely-bogus values to enter the computation. + * However, it is possible for either bank_cur_lru_count or individual + * page_lru_count entries to be "reset" to lower values than they should have, + * in case a process is delayed while it executes this macro. With care in + * SlruSelectLRUPage(), this does little harm, and in any case the absolute + * worst possible consequence is a nonoptimal choice of page to evict. The + * gain from allowing concurrent reads of SLRU pages seems worth it. + */ +static inline void +SlruRecentlyUsed(SlruShared shared, int slotno, int banksize) +{ + int slrubankno = slotno / banksize; + int new_lru_count = shared->bank_cur_lru_count[slrubankno]; + + if (new_lru_count != shared->page_lru_count[slotno]) + { + shared->bank_cur_lru_count[slrubankno] = ++new_lru_count; + shared->page_lru_count[slotno] = new_lru_count; + } +} + /* * Pick bank size optimal for N-assiciative SLRU buffers. * diff --git a/src/include/access/slru.h b/src/include/access/slru.h index f3545d5f5d..a18b07f5d0 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -78,6 +78,23 @@ typedef struct SlruSharedData */ LWLockPadded *bank_locks; + /*---------- + * Instead of global counter we maintain a bank-wise lru counter because + * a) we are doing the victim buffer selection as bank level so there is + * no point of having a global counter b) manipulating a global counter + * will have frequent cpu cache invalidation and that will affect the + * performance. + * + * We mark a page "most recently used" by setting + * page_lru_count[slotno] = ++bank_cur_lru_count[bankno]; + * The oldest page is therefore the one with the highest value of + * bank_cur_lru_count[bankno] - page_lru_count[slotno] + * The counts will eventually wrap around, but this calculation still + * works as long as no page's age exceeds INT_MAX counts. + *---------- + */ + int *bank_cur_lru_count; + /* * Optional array of WAL flush LSNs associated with entries in the SLRU * pages. If not zero/NULL, we must flush WAL before writing pages (true @@ -89,17 +106,6 @@ typedef struct SlruSharedData XLogRecPtr *group_lsn; int lsn_groups_per_page; - /*---------- - * We mark a page "most recently used" by setting - * page_lru_count[slotno] = ++cur_lru_count; - * The oldest page is therefore the one with the highest value of - * cur_lru_count - page_lru_count[slotno] - * The counts will eventually wrap around, but this calculation still - * works as long as no page's age exceeds INT_MAX counts. - *---------- - */ - int cur_lru_count; - /* * latest_page_number is the page number of the current end of the log; * this is not critical data, since we use it only to avoid swapping out -- 2.39.2 (Apple Git-143)