diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out new file mode 100644 index 67b103a..7f92249 *** a/contrib/pageinspect/expected/btree.out --- b/contrib/pageinspect/expected/btree.out *************** live_items | 1 *** 21,27 **** dead_items | 0 avg_item_size | 16 page_size | 8192 ! free_size | 8128 btpo_prev | 0 btpo_next | 0 btpo | 0 --- 21,27 ---- dead_items | 0 avg_item_size | 16 page_size | 8192 ! free_size | 8132 btpo_prev | 0 btpo_next | 0 btpo | 0 diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out new file mode 100644 index 8e15947..4b76d1f *** a/contrib/pageinspect/expected/page.out --- b/contrib/pageinspect/expected/page.out *************** SELECT tuple_data_split('test1'::regclas *** 65,83 **** SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0)); fsm_page_contents ------------------- ! 0: 254 + ! 1: 254 + ! 3: 254 + ! 7: 254 + ! 15: 254 + ! 31: 254 + ! 63: 254 + ! 127: 254 + ! 255: 254 + ! 511: 254 + ! 1023: 254 + ! 2047: 254 + ! 4095: 254 + fp_next_slot: 0 + (1 row) --- 65,83 ---- SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0)); fsm_page_contents ------------------- ! 0: 253 + ! 1: 253 + ! 3: 253 + ! 7: 253 + ! 15: 253 + ! 31: 253 + ! 63: 253 + ! 127: 253 + ! 255: 253 + ! 511: 253 + ! 1023: 253 + ! 2047: 253 + ! 4095: 253 + fp_next_slot: 0 + (1 row) diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c new file mode 100644 index 25af22f..e374a9b *** a/contrib/pageinspect/rawpage.c --- b/contrib/pageinspect/rawpage.c *************** page_header(PG_FUNCTION_ARGS) *** 270,276 **** values[5] = UInt16GetDatum(page->pd_special); values[6] = UInt16GetDatum(PageGetPageSize(page)); values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page)); ! values[8] = TransactionIdGetDatum(page->pd_prune_xid); /* Build and return the tuple. */ --- 270,293 ---- values[5] = UInt16GetDatum(page->pd_special); values[6] = UInt16GetDatum(PageGetPageSize(page)); values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page)); ! if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HeapPageSpecialData))) ! { ! HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); ! if (pageSpecial->pd_magic == HEAP_PAGE_MAGIC || ! pageSpecial->pd_magic == SEQ_PAGE_MAGIC) ! { ! values[8] = TransactionIdGetDatum(pageSpecial->pd_prune_xid); ! nulls[8] = false; ! } ! else ! { ! nulls[8] = true; ! } ! } ! else ! { ! nulls[8] = true; ! } /* Build and return the tuple. */ diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c new file mode 100644 index 75317b9..e8ac837 *** a/contrib/pgstattuple/pgstatindex.c --- b/contrib/pgstattuple/pgstatindex.c *************** pgstathashindex(PG_FUNCTION_ARGS) *** 620,626 **** metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); stats.version = metap->hashm_version; ! stats.space_per_page = metap->hashm_bsize; _hash_relbuf(rel, metabuf); /* Get the current relation length */ --- 620,626 ---- metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); stats.version = metap->hashm_version; ! stats.space_per_page = BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(HashPageOpaqueData)); _hash_relbuf(rel, metabuf); /* Get the current relation length */ diff --git a/src/backend/access/common/bufmask.c b/src/backend/access/common/bufmask.c new file mode 100644 index 10253d3..460c0ab *** a/src/backend/access/common/bufmask.c --- b/src/backend/access/common/bufmask.c *************** mask_page_lsn(Page page) *** 43,53 **** void mask_page_hint_bits(Page page) { - PageHeader phdr = (PageHeader) page; - - /* Ignore prune_xid (it's like a hint-bit) */ - phdr->pd_prune_xid = MASK_MARKER; - /* Ignore PD_PAGE_FULL and PD_HAS_FREE_LINES flags, they are just hints. */ PageClearFull(page); PageClearHasFreeLinePointers(page); --- 43,48 ---- diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c new file mode 100644 index d20f038..436a01f *** a/src/backend/access/heap/heapam.c --- b/src/backend/access/heap/heapam.c *************** heap_xlog_insert(XLogReaderState *record *** 8352,8358 **** { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); ! PageInit(page, BufferGetPageSize(buffer), 0); action = BLK_NEEDS_REDO; } else --- 8352,8359 ---- { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); ! PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); ! HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; action = BLK_NEEDS_REDO; } else *************** heap_xlog_multi_insert(XLogReaderState * *** 8468,8474 **** { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); ! PageInit(page, BufferGetPageSize(buffer), 0); action = BLK_NEEDS_REDO; } else --- 8469,8476 ---- { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); ! PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); ! HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; action = BLK_NEEDS_REDO; } else *************** heap_xlog_update(XLogReaderState *record *** 8682,8688 **** { nbuffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(nbuffer); ! PageInit(page, BufferGetPageSize(nbuffer), 0); newaction = BLK_NEEDS_REDO; } else --- 8684,8691 ---- { nbuffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(nbuffer); ! PageInit(page, BufferGetPageSize(nbuffer), sizeof(HeapPageSpecialData)); ! HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; newaction = BLK_NEEDS_REDO; } else *************** heap_mask(char *pagedata, BlockNumber bl *** 9169,9174 **** --- 9172,9181 ---- mask_page_lsn(page); mask_page_hint_bits(page); + + /* Ignore prune_xid (it's like a hint-bit) */ + HeapPageGetSpecial(page)->pd_prune_xid = InvalidTransactionId; + mask_unused_space(page); for (off = 1; off <= PageGetMaxOffsetNumber(page); off++) diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c new file mode 100644 index 13e3bdc..8d8b5c1 *** a/src/backend/access/heap/hio.c --- b/src/backend/access/heap/hio.c *************** RelationAddExtraBlocks(Relation relation *** 206,212 **** /* Extend by one page. */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buffer); ! PageInit(page, BufferGetPageSize(buffer), 0); MarkBufferDirty(buffer); blockNum = BufferGetBlockNumber(buffer); freespace = PageGetHeapFreeSpace(page); --- 206,213 ---- /* Extend by one page. */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buffer); ! PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); ! HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; MarkBufferDirty(buffer); blockNum = BufferGetBlockNumber(buffer); freespace = PageGetHeapFreeSpace(page); *************** loop: *** 590,596 **** BufferGetBlockNumber(buffer), RelationGetRelationName(relation)); ! PageInit(page, BufferGetPageSize(buffer), 0); if (len > PageGetHeapFreeSpace(page)) { --- 591,598 ---- BufferGetBlockNumber(buffer), RelationGetRelationName(relation)); ! PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); ! HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; if (len > PageGetHeapFreeSpace(page)) { diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c new file mode 100644 index 52231ac..b526fc3 *** a/src/backend/access/heap/pruneheap.c --- b/src/backend/access/heap/pruneheap.c *************** heap_page_prune(Relation relation, Buffe *** 245,251 **** * Update the page's pd_prune_xid field to either zero, or the lowest * XID of any soon-prunable tuple. */ ! ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; /* * Also clear the "page is full" flag, since there's no point in --- 245,251 ---- * Update the page's pd_prune_xid field to either zero, or the lowest * XID of any soon-prunable tuple. */ ! HeapPageGetSpecial(page)->pd_prune_xid = prstate.new_prune_xid; /* * Also clear the "page is full" flag, since there's no point in *************** heap_page_prune(Relation relation, Buffe *** 283,292 **** * point in repeating the prune/defrag process until something else * happens to the page. */ ! if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid || PageIsFull(page)) { ! ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; PageClearFull(page); MarkBufferDirtyHint(buffer, true); } --- 283,292 ---- * point in repeating the prune/defrag process until something else * happens to the page. */ ! if (HeapPageGetSpecial(page)->pd_prune_xid != prstate.new_prune_xid || PageIsFull(page)) { ! HeapPageGetSpecial(page)->pd_prune_xid = prstate.new_prune_xid; PageClearFull(page); MarkBufferDirtyHint(buffer, true); } diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c new file mode 100644 index bd560e4..0a7b799 *** a/src/backend/access/heap/rewriteheap.c --- b/src/backend/access/heap/rewriteheap.c *************** raw_heap_insert(RewriteState state, Heap *** 708,714 **** if (!state->rs_buffer_valid) { /* Initialize a new empty page */ ! PageInit(page, BLCKSZ, 0); state->rs_buffer_valid = true; } --- 708,715 ---- if (!state->rs_buffer_valid) { /* Initialize a new empty page */ ! PageInit(page, BLCKSZ, sizeof(HeapPageSpecialData)); ! HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; state->rs_buffer_valid = true; } diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c new file mode 100644 index 6293712..7c47bef *** a/src/backend/commands/sequence.c --- b/src/backend/commands/sequence.c *************** *** 54,69 **** #define SEQ_LOG_VALS 32 /* - * The "special area" of a sequence's buffer page looks like this. - */ - #define SEQ_MAGIC 0x1717 - - typedef struct sequence_magic - { - uint32 magic; - } sequence_magic; - - /* * We store a SeqTable item for every sequence we have touched in the current * session. This is needed to hold onto nextval/currval state. (We can't * rely on the relcache, since it's only, well, a cache, and may decide to --- 54,59 ---- *************** ResetSequence(Oid seq_relid) *** 336,345 **** static void fill_seq_with_data(Relation rel, HeapTuple tuple) { ! Buffer buf; ! Page page; ! sequence_magic *sm; ! OffsetNumber offnum; /* Initialize first page of relation with special magic number */ --- 326,334 ---- static void fill_seq_with_data(Relation rel, HeapTuple tuple) { ! Buffer buf; ! Page page; ! OffsetNumber offnum; /* Initialize first page of relation with special magic number */ *************** fill_seq_with_data(Relation rel, HeapTup *** 348,356 **** page = BufferGetPage(buf); ! PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic)); ! sm = (sequence_magic *) PageGetSpecialPointer(page); ! sm->magic = SEQ_MAGIC; /* Now insert sequence tuple */ --- 337,344 ---- page = BufferGetPage(buf); ! PageInit(page, BufferGetPageSize(buf), sizeof(HeapPageSpecialData)); ! HeapPageGetSpecial(page)->pd_magic = SEQ_PAGE_MAGIC; /* Now insert sequence tuple */ *************** read_seq_tuple(Relation rel, Buffer *buf *** 1169,1186 **** { Page page; ItemId lp; ! sequence_magic *sm; Form_pg_sequence_data seq; *buf = ReadBuffer(rel, 0); LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(*buf); ! sm = (sequence_magic *) PageGetSpecialPointer(page); ! if (sm->magic != SEQ_MAGIC) elog(ERROR, "bad magic number in sequence \"%s\": %08X", ! RelationGetRelationName(rel), sm->magic); lp = PageGetItemId(page, FirstOffsetNumber); Assert(ItemIdIsNormal(lp)); --- 1157,1174 ---- { Page page; ItemId lp; ! HeapPageSpecial pageSpecial; Form_pg_sequence_data seq; *buf = ReadBuffer(rel, 0); LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(*buf); ! pageSpecial = HeapPageGetSpecial(page); ! if (pageSpecial->pd_magic != SEQ_PAGE_MAGIC) elog(ERROR, "bad magic number in sequence \"%s\": %08X", ! RelationGetRelationName(rel), pageSpecial->pd_magic); lp = PageGetItemId(page, FirstOffsetNumber); Assert(ItemIdIsNormal(lp)); *************** seq_redo(XLogReaderState *record) *** 1881,1887 **** char *item; Size itemsz; xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record); - sequence_magic *sm; if (info != XLOG_SEQ_LOG) elog(PANIC, "seq_redo: unknown op code %u", info); --- 1869,1874 ---- *************** seq_redo(XLogReaderState *record) *** 1900,1908 **** */ localpage = (Page) palloc(BufferGetPageSize(buffer)); ! PageInit(localpage, BufferGetPageSize(buffer), sizeof(sequence_magic)); ! sm = (sequence_magic *) PageGetSpecialPointer(localpage); ! sm->magic = SEQ_MAGIC; item = (char *) xlrec + sizeof(xl_seq_rec); itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec); --- 1887,1894 ---- */ localpage = (Page) palloc(BufferGetPageSize(buffer)); ! PageInit(localpage, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); ! HeapPageGetSpecial(page)->pd_magic = SEQ_PAGE_MAGIC; item = (char *) xlrec + sizeof(xl_seq_rec); itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec); diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c new file mode 100644 index 45b1859..246b63c *** a/src/backend/commands/vacuumlazy.c --- b/src/backend/commands/vacuumlazy.c *************** lazy_scan_heap(Relation onerel, int opti *** 857,863 **** ereport(WARNING, (errmsg("relation \"%s\" page %u is uninitialized --- fixing", relname, blkno))); ! PageInit(page, BufferGetPageSize(buf), 0); empty_pages++; } freespace = PageGetHeapFreeSpace(page); --- 857,864 ---- ereport(WARNING, (errmsg("relation \"%s\" page %u is uninitialized --- fixing", relname, blkno))); ! PageInit(page, BufferGetPageSize(buf), sizeof(HeapPageSpecialData)); ! HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; empty_pages++; } freespace = PageGetHeapFreeSpace(page); diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c new file mode 100644 index 41642eb..7a19d51 *** a/src/backend/storage/page/bufpage.c --- b/src/backend/storage/page/bufpage.c *************** PageInit(Page page, Size pageSize, Size *** 55,61 **** p->pd_upper = pageSize - specialSize; p->pd_special = pageSize - specialSize; PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION); - /* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */ } --- 55,60 ---- diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h new file mode 100644 index 50c72a3..cafd365 *** a/src/include/storage/bufpage.h --- b/src/include/storage/bufpage.h *************** typedef struct *** 110,116 **** * pd_upper - offset to end of free space. * pd_special - offset to start of special space. * pd_pagesize_version - size in bytes and page layout version number. - * pd_prune_xid - oldest XID among potentially prunable tuples on page. * * The LSN is used by the buffer manager to enforce the basic rule of WAL: * "thou shalt write xlog before data". A dirty buffer cannot be dumped --- 110,115 ---- *************** typedef struct *** 127,135 **** * of relying on the page contents to decide whether to verify it. Hence * there are no flag bits relating to checksums. * - * pd_prune_xid is a hint field that helps determine whether pruning will be - * useful. It is currently unused in index pages. - * * The page version number and page size are packed together into a single * uint16 field. This is for historical reasons: before PostgreSQL 7.3, * there was no concept of a page version number, and doing it this way --- 126,131 ---- *************** typedef struct PageHeaderData *** 155,167 **** LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ uint16 pd_pagesize_version; - TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */ } PageHeaderData; typedef PageHeaderData *PageHeader; /* * pd_flags contains the following flag bits. Undefined bits are initialized * to zero and may be used in the future. * --- 151,198 ---- LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ uint16 pd_pagesize_version; ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */ } PageHeaderData; typedef PageHeaderData *PageHeader; /* + * HeapPageSpecialData -- data that stored at the end of each heap page. + * + * pd_prune_xid - oldest XID among potentially prunable tuples on page. + * pd_magic - magic number identifies type of page + * + * pd_prune_xid is a hint field that helps determine whether pruning will be + * useful. It is currently unused in index pages. + * + * pd_magic allows identified an type of object heap page belongs to. + * Currently, heap page may belong to an regular table heap or sequence heap. + */ + typedef struct HeapPageSpecialData + { + TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ + uint32 pd_magic; /* magic number identifies type of page */ + } HeapPageSpecialData; + + /* + * Possible value for pd_magic field. + */ + #define HEAP_PAGE_MAGIC (0x1010) /* regular heap page */ + #define SEQ_PAGE_MAGIC (0x1717) /* sequence page */ + + typedef HeapPageSpecialData *HeapPageSpecial; + + /* + * Get pointer to HeapPageSpecialData without using pd_special of the page + * (for the sake of speed) assuming all heap pages have same size of special + * data. + */ + #define HeapPageGetSpecial(page) ( \ + AssertMacro(((PageHeader) page)->pd_special == BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData))), \ + (HeapPageSpecial) ((Pointer) page + BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData))) \ + ) + + /* * pd_flags contains the following flag bits. Undefined bits are initialized * to zero and may be used in the future. * *************** PageValidateSpecialPointer(Page page) *** 388,405 **** #define PageIsPrunable(page, oldestxmin) \ ( \ AssertMacro(TransactionIdIsNormal(oldestxmin)), \ ! TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \ ! TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \ ) #define PageSetPrunable(page, xid) \ do { \ Assert(TransactionIdIsNormal(xid)); \ ! if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \ ! TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \ ! ((PageHeader) (page))->pd_prune_xid = (xid); \ } while (0) #define PageClearPrunable(page) \ ! (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) /* ---------------------------------------------------------------- --- 419,436 ---- #define PageIsPrunable(page, oldestxmin) \ ( \ AssertMacro(TransactionIdIsNormal(oldestxmin)), \ ! TransactionIdIsValid(HeapPageGetSpecial(page)->pd_prune_xid) && \ ! TransactionIdPrecedes(HeapPageGetSpecial(page)->pd_prune_xid, oldestxmin) \ ) #define PageSetPrunable(page, xid) \ do { \ Assert(TransactionIdIsNormal(xid)); \ ! if (!TransactionIdIsValid(HeapPageGetSpecial(page)->pd_prune_xid) || \ ! TransactionIdPrecedes(xid, HeapPageGetSpecial(page)->pd_prune_xid)) \ ! HeapPageGetSpecial(page)->pd_prune_xid = (xid); \ } while (0) #define PageClearPrunable(page) \ ! (HeapPageGetSpecial(page)->pd_prune_xid = InvalidTransactionId) /* ----------------------------------------------------------------