Re: cache estimates, cache access cost - Mailing list pgsql-hackers
From | Greg Smith |
---|---|
Subject | Re: cache estimates, cache access cost |
Date | |
Msg-id | 4DD09F98.9050406@2ndquadrant.com Whole thread Raw |
In response to | cache estimates, cache access cost (Cédric Villemain <cedric.villemain.debian@gmail.com>) |
Responses |
Re: cache estimates, cache access cost
[WIP] cache estimates, cache access cost |
List | pgsql-hackers |
Cédric Villemain wrote: > http://git.postgresql.org/gitweb?p=users/c2main/postgres.git;a=shortlog;h=refs/heads/analyze_cache > This rebases easily to make Cedric's changes move to the end; I just pushed a version with that change to https://github.com/greg2ndQuadrant/postgres/tree/analyze_cache if anyone wants a cleaner one to browse. I've attached a patch too if that's more your thing. I'd recommend not getting too stuck on the particular hook Cédric has added here to compute the cache estimate, which uses mmap and mincore to figure it out. It's possible to compute similar numbers, albeit less accurate, using an approach similar to how pg_buffercache inspects things. And I even once wrote a background writer extension that collected this sort of data as it was running the LRU scan anyway. Discussions of this idea seem to focus on how the "what's in the cache?" data is collected, which as far as I'm concerned is the least important part. There are multiple options, some work better than others, and there's no reason that can't be swapped out later. The more important question is how to store the data collected and then use it for optimizing queries. -- Greg Smith 2ndQuadrant US greg@2ndQuadrant.com Baltimore, MD PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.us diff --git a/contrib/Makefile b/contrib/Makefile index 6967767..47652d5 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -27,6 +27,7 @@ SUBDIRS = \ lo \ ltree \ oid2name \ + oscache \ pageinspect \ passwordcheck \ pg_archivecleanup \ diff --git a/contrib/oscache/Makefile b/contrib/oscache/Makefile new file mode 100644 index 0000000..8d8dcc5 --- /dev/null +++ b/contrib/oscache/Makefile @@ -0,0 +1,15 @@ +# contrib/oscache/Makefile + +MODULE_big = oscache +OBJS = oscache.o + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/oscache +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/oscache/oscache.c b/contrib/oscache/oscache.c new file mode 100644 index 0000000..1ad7dc2 --- /dev/null +++ b/contrib/oscache/oscache.c @@ -0,0 +1,151 @@ +/*------------------------------------------------------------------------- + * + * oscache.c + * + * + * Copyright (c) 2011, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/oscache/oscache.c + * + *------------------------------------------------------------------------- + */ +/* { POSIX stuff */ +#include <stdlib.h> /* exit, calloc, free */ +#include <sys/stat.h> /* stat, fstat */ +#include <sys/types.h> /* size_t, mincore */ +#include <unistd.h> /* sysconf, close */ +#include <sys/mman.h> /* mmap, mincore */ +/* } */ + +/* { PostgreSQL stuff */ +#include "postgres.h" /* general Postgres declarations */ +#include "utils/rel.h" /* Relation */ +#include "storage/bufmgr.h" +#include "catalog/catalog.h" /* relpath */ +/* } */ + +PG_MODULE_MAGIC; + +void _PG_init(void); + +float4 oscache(Relation, ForkNumber); + +/* + * Module load callback + */ +void +_PG_init(void) +{ + /* Install hook. */ + OSCache_hook = &oscache; +} + +/* + * oscache process the os cache inspection for the relation. + * It returns the percentage of blocks in OS cache. + */ +float4 +oscache(Relation relation, ForkNumber forkNum) +{ + int segment = 0; + char *relationpath; + char filename[MAXPGPATH]; + int fd; + int64 total_block_disk = 0; + int64 total_block_mem = 0; + + /* OS things */ + int64 pageSize = sysconf(_SC_PAGESIZE); /* Page size */ + register int64 pageIndex; + + relationpath = relpathperm(relation->rd_node, forkNum); + + /* + * For each segment of the relation + */ + snprintf(filename, MAXPGPATH, "%s", relationpath); + while ((fd = open(filename, O_RDONLY)) != -1) + { + // for stat file + struct stat st; + // for mmap file + void *pa = (char *)0; + // for calloc file + unsigned char *vec = (unsigned char *)0; + int64 block_disk = 0; + int64 block_mem = 0; + + if (fstat(fd, &st) == -1) + { + close(fd); + elog(ERROR, "Can not stat object file : %s", + filename); + return 0; + } + + /* + * if file ok + * then process + */ + if (st.st_size != 0) + { + /* number of block in the current file */ + block_disk = st.st_size/pageSize; + + /* TODO We need to split mmap size to be sure (?) to be able to mmap */ + pa = mmap(NULL, st.st_size, PROT_NONE, MAP_SHARED, fd, 0); + if (pa == MAP_FAILED) + { + close(fd); + elog(ERROR, "Can not mmap object file : %s, errno = %i,%s\nThis error can happen if there is not enoughtspace in memory to do the projection. Please mail cedric@2ndQuadrant.fr with '[oscache] ENOMEM' as subject.", + filename, errno, strerror(errno)); + return 0; + } + + /* Prepare our vector containing all blocks information */ + vec = calloc(1, (st.st_size+pageSize-1)/pageSize); + if ((void *)0 == vec) + { + munmap(pa, st.st_size); + close(fd); + elog(ERROR, "Can not calloc object file : %s", + filename); + return 0; + } + + /* Affect vec with mincore */ + if (mincore(pa, st.st_size, vec) != 0) + { + free(vec); + munmap(pa, st.st_size); + close(fd); + elog(ERROR, "mincore(%p, %lld, %p): %s\n", + pa, (int64)st.st_size, vec, strerror(errno)); + return 0; + } + + /* handle the results */ + for (pageIndex = 0; pageIndex <= st.st_size/pageSize; pageIndex++) + { + // block in memory + if (vec[pageIndex] & 1) + { + block_mem++; + } + } + } + elog(DEBUG1, "oscache %s: %lld of %lld block in linux cache", + filename, block_mem, block_disk); + + // free things + free(vec); + munmap(pa, st.st_size); + close(fd); + total_block_mem += block_mem; + total_block_disk += block_disk; + + snprintf(filename, MAXPGPATH, "%s.%u", relationpath, segment++); + } + return (float4)(total_block_mem*100/(total_block_disk+1)); +} diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 7b62818..25338d0 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1634,6 +1634,26 @@ </row> <row> + <entry><structfield>reloscache</structfield></entry> + <entry><type>float4</type></entry> + <entry></entry> + <entry> + Percentage of the files in OS cache. This is only an estimate used by + the planner. It is updated by <command>ANALYZE OSCACHE</command>. + </entry> + </row> + + <row> + <entry><structfield>relpgcache</structfield></entry> + <entry><type>float4</type></entry> + <entry></entry> + <entry> + Percentage of the files in PostgreSQL cache. This is only an estimate used by + the planner. It is updated by <command>ANALYZE PGCACHE</command>. + </entry> + </row> + + <row> <entry><structfield>reltoastrelid</structfield></entry> <entry><type>oid</type></entry> <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry> diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 4cb29b2..7f39a93 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -54,6 +54,8 @@ hashbuild(PG_FUNCTION_ARGS) IndexBuildResult *result; BlockNumber relpages; double reltuples; + float4 reloscache; + float4 relpgcache; uint32 num_buckets; HashBuildState buildstate; @@ -66,7 +68,7 @@ hashbuild(PG_FUNCTION_ARGS) RelationGetRelationName(index)); /* Estimate the number of rows currently present in the table */ - estimate_rel_size(heap, NULL, &relpages, &reltuples); + estimate_rel_size(heap, NULL, &relpages, &reltuples, &reloscache, &relpgcache); /* Initialize the hash index metadata page and initial buckets */ num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM); diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 71c9931..73ba67b 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -756,6 +756,8 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace); values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages); values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples); + values[Anum_pg_class_reloscache - 1] = Float4GetDatum(rd_rel->reloscache); + values[Anum_pg_class_relpgcache - 1] = Float4GetDatum(rd_rel->relpgcache); values[Anum_pg_class_reltoastrelid - 1] = ObjectIdGetDatum(rd_rel->reltoastrelid); values[Anum_pg_class_reltoastidxid - 1] = ObjectIdGetDatum(rd_rel->reltoastidxid); values[Anum_pg_class_relhasindex - 1] = BoolGetDatum(rd_rel->relhasindex); diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 0568a1b..284ab5d 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -86,6 +86,8 @@ static BufferAccessStrategy vac_strategy; static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool update_reltuples, bool inh); +static void do_cache_analyze_rel(Relation onerel, VacuumStmt *vacstmt, + bool update_reltuples, bool inh); static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize); static bool BlockSampler_HasMore(BlockSampler bs); @@ -238,13 +240,21 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, /* * Do the normal non-recursive ANALYZE. */ - do_analyze_rel(onerel, vacstmt, update_reltuples, false); + if (vacstmt->options & (VACOPT_CACHE)) + do_cache_analyze_rel(onerel, vacstmt, update_reltuples, false); + else + do_analyze_rel(onerel, vacstmt, update_reltuples, false); /* * If there are child tables, do recursive ANALYZE. */ if (onerel->rd_rel->relhassubclass) - do_analyze_rel(onerel, vacstmt, false, true); + { + if (vacstmt->options & (VACOPT_CACHE)) + do_cache_analyze_rel(onerel, vacstmt, false, true); + else + do_analyze_rel(onerel, vacstmt, false, true); + } /* * Close source relation now, but keep lock so that no one deletes it @@ -640,6 +650,120 @@ cleanup: } /* + * do_analyze_rel() -- analyze one relation, recursively or not + */ +static void +do_cache_analyze_rel(Relation onerel, VacuumStmt *vacstmt, + bool update_relcache, bool inh) +{ + int ind; + Relation *Irel; + int nindexes; + bool hasindex; + AnlIndexData *indexdata; + PGRUsage ru0; + TimestampTz starttime = 0; + MemoryContext caller_context; + int save_nestlevel; + + if (inh) + ereport(elevel, + (errmsg("cache analyzing \"%s.%s\" inheritance tree", + get_namespace_name(RelationGetNamespace(onerel)), + RelationGetRelationName(onerel)))); + else + ereport(elevel, + (errmsg("cache analyzing \"%s.%s\"", + get_namespace_name(RelationGetNamespace(onerel)), + RelationGetRelationName(onerel)))); + + /* + * Set up a working context so that we can easily free whatever junk gets + * created. + */ + anl_context = AllocSetContextCreate(CurrentMemoryContext, + "Analyze", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + caller_context = MemoryContextSwitchTo(anl_context); + + /* + * Arrange to make GUC variable changes local to this command. + */ + save_nestlevel = NewGUCNestLevel(); + + /* measure elapsed time iff autovacuum logging requires it */ + if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) + { + pg_rusage_init(&ru0); + if (Log_autovacuum_min_duration > 0) + starttime = GetCurrentTimestamp(); + } + + /* + * Open all indexes of the relation, and see if there are any analyzable + * columns in the indexes. We do not analyze index columns if there was + * an explicit column list in the ANALYZE command, however. If we are + * doing a recursive scan, we don't want to touch the parent's indexes at + * all. + */ + if (!inh) + vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel); + else + { + Irel = NULL; + nindexes = 0; + } + hasindex = (nindexes > 0); + indexdata = NULL; + + /* + * Update cache stats in pg_class. + */ + cache_update_relstats(onerel, + RelationGetRelationOSCacheInFork(onerel, MAIN_FORKNUM), + RelationGetRelationPGCacheInFork(onerel, MAIN_FORKNUM), + InvalidTransactionId); + + /* + * Same for indexes. + */ + for (ind = 0; ind < nindexes; ind++) + { + cache_update_relstats(Irel[ind], + RelationGetRelationOSCacheInFork(Irel[ind], MAIN_FORKNUM), + RelationGetRelationPGCacheInFork(Irel[ind], MAIN_FORKNUM), + InvalidTransactionId); + } + + /* Done with indexes */ + vac_close_indexes(nindexes, Irel, NoLock); + + /* Log the action if appropriate */ + if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) + { + if (Log_autovacuum_min_duration == 0 || + TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(), + Log_autovacuum_min_duration)) + ereport(LOG, + (errmsg("automatic cache analyze of table \"%s.%s.%s\" system usage: %s", + get_database_name(MyDatabaseId), + get_namespace_name(RelationGetNamespace(onerel)), + RelationGetRelationName(onerel), + pg_rusage_show(&ru0)))); + } + + /* Roll back any GUC changes executed by index functions */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore current context and release memory */ + MemoryContextSwitchTo(caller_context); + MemoryContextDelete(anl_context); + anl_context = NULL; +} + +/* * Compute statistics about indexes of a relation */ static void diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 9606569..b45f012 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1096,3 +1096,61 @@ vacuum_delay_point(void) CHECK_FOR_INTERRUPTS(); } } + + +/* + * cache_update_relstats() -- update cache statistics for one relation + * + * /!\ Same comment as function vac_update_relstats() + */ +void +cache_update_relstats(Relation relation, + float4 per_oscache, float4 per_pgcache, + TransactionId frozenxid) +{ + Oid relid = RelationGetRelid(relation); + Relation rd; + HeapTuple ctup; + Form_pg_class pgcform; + bool dirty; + + rd = heap_open(RelationRelationId, RowExclusiveLock); + + /* Fetch a copy of the tuple to scribble on */ + ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(ctup)) + elog(ERROR, "pg_class entry for relid %u vanished during cache analyze", + relid); + pgcform = (Form_pg_class) GETSTRUCT(ctup); + + /* Apply required updates, if any, to copied tuple */ + + dirty = false; + if (pgcform->reloscache != (float4) per_oscache) + { + pgcform->reloscache = (float4) per_oscache; + dirty = true; + } + if (pgcform->relpgcache != (float4) per_pgcache) + { + pgcform->relpgcache = (float4) per_pgcache; + dirty = true; + } + + /* + * relfrozenxid should never go backward. Caller can pass + * InvalidTransactionId if it has no new data. + */ + if (TransactionIdIsNormal(frozenxid) && + TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid)) + { + pgcform->relfrozenxid = frozenxid; + dirty = true; + } + + /* If anything changed, write out the tuple. */ + if (dirty) + heap_inplace_update(rd, ctup); + + heap_close(rd, RowExclusiveLock); +} diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index fd8ea45..39f9eab 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -108,7 +108,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, */ if (!inhparent) estimate_rel_size(relation, rel->attr_widths - rel->min_attr, - &rel->pages, &rel->tuples); + &rel->pages, &rel->tuples, + &rel->oscache, &rel->pgcache); /* * Make list of indexes. Ignore indexes on system catalogs if told to. @@ -323,11 +324,14 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, { info->pages = RelationGetNumberOfBlocks(indexRelation); info->tuples = rel->tuples; + info->oscache = 0; + info->pgcache = 0; } else { estimate_rel_size(indexRelation, NULL, - &info->pages, &info->tuples); + &info->pages, &info->tuples, + &info->oscache, &info->pgcache); if (info->tuples > rel->tuples) info->tuples = rel->tuples; } @@ -362,7 +366,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, */ void estimate_rel_size(Relation rel, int32 *attr_widths, - BlockNumber *pages, double *tuples) + BlockNumber *pages, double *tuples, + float4 *oscache, float4 *pgcache) { BlockNumber curpages; BlockNumber relpages; @@ -451,21 +456,29 @@ estimate_rel_size(Relation rel, int32 *attr_widths, density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width; } *tuples = rint(density * (double) curpages); + *oscache = (float4) rel->rd_rel->reloscache; + *pgcache = (float4) rel->rd_rel->relpgcache; break; case RELKIND_SEQUENCE: /* Sequences always have a known size */ *pages = 1; *tuples = 1; + *oscache = 0; + *pgcache = 0; break; case RELKIND_FOREIGN_TABLE: /* Just use whatever's in pg_class */ *pages = rel->rd_rel->relpages; *tuples = rel->rd_rel->reltuples; + *oscache = 0; + *pgcache = 0; break; default: /* else it has no disk storage; probably shouldn't get here? */ *pages = 0; *tuples = 0; + *oscache = 0; + *pgcache = 0; break; } } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 1d39674..cc0d6f5 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -342,7 +342,7 @@ static void SplitColQualList(List *qualList, %type <boolean> opt_instead %type <boolean> opt_unique opt_concurrently opt_verbose opt_full -%type <boolean> opt_freeze opt_default opt_recheck +%type <boolean> opt_freeze opt_oscache opt_default opt_recheck %type <defelt> opt_binary opt_oids copy_delimiter %type <boolean> copy_from @@ -529,7 +529,7 @@ static void SplitColQualList(List *qualList, NULLS_P NUMERIC OBJECT_P OF OFF OFFSET OIDS ON ONLY OPERATOR OPTION OPTIONS OR - ORDER OUT_P OUTER_P OVER OVERLAPS OVERLAY OWNED OWNER + ORDER OSCACHE OUT_P OUTER_P OVER OVERLAPS OVERLAY OWNED OWNER PARSER PARTIAL PARTITION PASSING PASSWORD PLACING PLANS POSITION PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY @@ -7801,11 +7801,13 @@ vacuum_option_elem: ; AnalyzeStmt: - analyze_keyword opt_verbose + analyze_keyword opt_oscache opt_verbose { VacuumStmt *n = makeNode(VacuumStmt); n->options = VACOPT_ANALYZE; if ($2) + n->options |= VACOPT_CACHE; + if ($3) n->options |= VACOPT_VERBOSE; n->freeze_min_age = -1; n->freeze_table_age = -1; @@ -7813,16 +7815,18 @@ AnalyzeStmt: n->va_cols = NIL; $$ = (Node *)n; } - | analyze_keyword opt_verbose qualified_name opt_name_list + | analyze_keyword opt_oscache opt_verbose qualified_name opt_name_list { VacuumStmt *n = makeNode(VacuumStmt); n->options = VACOPT_ANALYZE; if ($2) + n->options |= VACOPT_CACHE; + if ($3) n->options |= VACOPT_VERBOSE; n->freeze_min_age = -1; n->freeze_table_age = -1; - n->relation = $3; - n->va_cols = $4; + n->relation = $4; + n->va_cols = $5; $$ = (Node *)n; } ; @@ -7845,6 +7849,11 @@ opt_freeze: FREEZE { $$ = TRUE; } | /*EMPTY*/ { $$ = FALSE; } ; +opt_oscache: + OSCACHE { $$ = TRUE; } + | /*EMPTY*/ { $$ = FALSE; } + ; + opt_name_list: '(' name_list ')' { $$ = $2; } | /*EMPTY*/ { $$ = NIL; } @@ -12158,6 +12167,7 @@ type_func_name_keyword: | LIKE | NATURAL | NOTNULL + | OSCACHE | OUTER_P | OVER | OVERLAPS diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index f96685d..5cea929 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -106,6 +106,13 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln); static void AtProcExit_Buffers(int code, Datum arg); +/* + * Hooks for plugins to get control in + * RelationGetRelationOSCacheInFork + * RelationGetRelationPGCacheInFork + */ +oscache_hook_type OSCache_hook = NULL; +pgcache_hook_type PGCache_hook = NULL; /* * PrefetchBuffer -- initiate asynchronous read of a block of a relation @@ -1922,6 +1929,40 @@ RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum) return smgrnblocks(relation->rd_smgr, forkNum); } +/* + * RelationGetRelationOSCacheInFork + * Determines the current percentage of pages in OS cache for the + * relation. + */ +float4 +RelationGetRelationOSCacheInFork(Relation relation, ForkNumber forkNum) +{ + float4 percent = 0; + + /* if a plugin is present, let it manage things */ + if (OSCache_hook) + percent = (*OSCache_hook) (relation, forkNum); + + return percent; +} + +/* + * RelationGetRelationPGCacheInFork + * Determines the current percentage of pages in PostgreSQL cache + * for the relation. + */ +float4 +RelationGetRelationPGCacheInFork(Relation relation, ForkNumber forkNum) +{ + float4 percent = 0; + + /* if a plugin is present, let it manage things */ + if (PGCache_hook) + percent = (*PGCache_hook) (relation, forkNum); + + return percent; +} + /* --------------------------------------------------------------------- * DropRelFileNodeBuffers * diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index d7e94ff..159096a 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1417,6 +1417,8 @@ formrdesc(const char *relationName, Oid relationReltype, relation->rd_rel->relpages = 1; relation->rd_rel->reltuples = 1; + relation->rd_rel->reloscache = 0; + relation->rd_rel->relpgcache = 0; relation->rd_rel->relkind = RELKIND_RELATION; relation->rd_rel->relhasoids = hasoids; relation->rd_rel->relnatts = (int16) natts; @@ -2661,6 +2663,8 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) { classform->relpages = 0; /* it's empty until further notice */ classform->reltuples = 0; + classform->reloscache = 0; + classform->relpgcache = 0; } classform->relfrozenxid = freezeXid; diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index ffcce3c..dc79df5 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -45,6 +45,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO Oid reltablespace; /* identifier of table space for relation */ int4 relpages; /* # of blocks (not always up-to-date) */ float4 reltuples; /* # of tuples (not always up-to-date) */ + float4 reloscache; /* % of files in OS cache (not always up-to-date) */ + float4 relpgcache; /* % of files in PostgreSQL cache (not always up-to-date) */ Oid reltoastrelid; /* OID of toast table; 0 if none */ Oid reltoastidxid; /* if toast table, OID of chunk_id index */ bool relhasindex; /* T if has (or has had) any indexes */ @@ -92,7 +94,7 @@ typedef FormData_pg_class *Form_pg_class; * ---------------- */ -#define Natts_pg_class 26 +#define Natts_pg_class 28 #define Anum_pg_class_relname 1 #define Anum_pg_class_relnamespace 2 #define Anum_pg_class_reltype 3 @@ -103,22 +105,24 @@ typedef FormData_pg_class *Form_pg_class; #define Anum_pg_class_reltablespace 8 #define Anum_pg_class_relpages 9 #define Anum_pg_class_reltuples 10 -#define Anum_pg_class_reltoastrelid 11 -#define Anum_pg_class_reltoastidxid 12 -#define Anum_pg_class_relhasindex 13 -#define Anum_pg_class_relisshared 14 -#define Anum_pg_class_relpersistence 15 -#define Anum_pg_class_relkind 16 -#define Anum_pg_class_relnatts 17 -#define Anum_pg_class_relchecks 18 -#define Anum_pg_class_relhasoids 19 -#define Anum_pg_class_relhaspkey 20 -#define Anum_pg_class_relhasrules 21 -#define Anum_pg_class_relhastriggers 22 -#define Anum_pg_class_relhassubclass 23 -#define Anum_pg_class_relfrozenxid 24 -#define Anum_pg_class_relacl 25 -#define Anum_pg_class_reloptions 26 +#define Anum_pg_class_reloscache 11 +#define Anum_pg_class_relpgcache 12 +#define Anum_pg_class_reltoastrelid 13 +#define Anum_pg_class_reltoastidxid 14 +#define Anum_pg_class_relhasindex 15 +#define Anum_pg_class_relisshared 16 +#define Anum_pg_class_relpersistence 17 +#define Anum_pg_class_relkind 18 +#define Anum_pg_class_relnatts 19 +#define Anum_pg_class_relchecks 20 +#define Anum_pg_class_relhasoids 21 +#define Anum_pg_class_relhaspkey 22 +#define Anum_pg_class_relhasrules 23 +#define Anum_pg_class_relhastriggers 24 +#define Anum_pg_class_relhassubclass 25 +#define Anum_pg_class_relfrozenxid 26 +#define Anum_pg_class_relacl 27 +#define Anum_pg_class_reloptions 28 /* ---------------- * initial contents of pg_class @@ -130,13 +134,13 @@ typedef FormData_pg_class *Form_pg_class; */ /* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */ -DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 20 0 f f f f f 3 _null_ _null_ )); +DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 0 f f p r 20 0 f f f f f 3 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 25 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 0 f f p r 25 0 t f f f f 3 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f 3 _null_ _null_ )); DESCR(""); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 79c9f5d..7f1801a 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -155,6 +155,11 @@ extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age, extern void vac_update_datfrozenxid(void); extern void vacuum_delay_point(void); +extern void cache_update_relstats(Relation relation, + float4 per_oscache, + float4 per_pgcache, + TransactionId frozenxid); + /* in commands/vacuumlazy.c */ extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy, bool *scanned_all); diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index ee1881b..bc7a301 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2417,7 +2417,8 @@ typedef enum VacuumOption VACOPT_VERBOSE = 1 << 2, /* print progress info */ VACOPT_FREEZE = 1 << 3, /* FREEZE option */ VACOPT_FULL = 1 << 4, /* FULL (non-concurrent) vacuum */ - VACOPT_NOWAIT = 1 << 5 + VACOPT_NOWAIT = 1 << 5, + VACOPT_CACHE = 1 << 6 /* do CACHE stats analyze */ } VacuumOption; typedef struct VacuumStmt diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index f659269..3f08bb0 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -408,6 +408,8 @@ typedef struct RelOptInfo List *indexlist; /* list of IndexOptInfo */ BlockNumber pages; double tuples; + float4 oscache; + float4 pgcache; struct Plan *subplan; /* if subquery */ List *subrtable; /* if subquery */ List *subrowmark; /* if subquery */ @@ -466,6 +468,8 @@ typedef struct IndexOptInfo /* statistics from pg_class */ BlockNumber pages; /* number of disk pages in index */ double tuples; /* number of index tuples in index */ + float4 oscache; + float4 pgcache; /* index descriptor information */ int ncolumns; /* number of columns in index */ diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index c0b8eda..1dc78d5 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -29,7 +29,8 @@ extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel); extern void estimate_rel_size(Relation rel, int32 *attr_widths, - BlockNumber *pages, double *tuples); + BlockNumber *pages, double *tuples, + float4 *oscache, float4 *pgcache); extern int32 get_relation_data_width(Oid relid, int32 *attr_widths); diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 12c2faf..95a7e3d 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -264,6 +264,7 @@ PG_KEYWORD("option", OPTION, UNRESERVED_KEYWORD) PG_KEYWORD("options", OPTIONS, UNRESERVED_KEYWORD) PG_KEYWORD("or", OR, RESERVED_KEYWORD) PG_KEYWORD("order", ORDER, RESERVED_KEYWORD) +PG_KEYWORD("oscache", OSCACHE, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("out", OUT_P, COL_NAME_KEYWORD) PG_KEYWORD("outer", OUTER_P, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("over", OVER, TYPE_FUNC_NAME_KEYWORD) diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index b8fc87e..8b621de 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -179,6 +179,10 @@ extern void CheckPointBuffers(int flags); extern BlockNumber BufferGetBlockNumber(Buffer buffer); extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum); +extern float4 RelationGetRelationOSCacheInFork(Relation relation, + ForkNumber forkNum); +extern float4 RelationGetRelationPGCacheInFork(Relation relation, + ForkNumber forkNum); extern void FlushRelationBuffers(Relation rel); extern void FlushDatabaseBuffers(Oid dbid); extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, @@ -215,4 +219,14 @@ extern void AtProcExit_LocalBuffers(void); extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype); extern void FreeAccessStrategy(BufferAccessStrategy strategy); +/* +* Hooks for plugins to get control in +* RelationGetRelationOSCacheInFork +* RelationGetRelationPGCacheInFork +*/ +typedef float4 (*oscache_hook_type) (Relation relation, ForkNumber forkNum); +extern PGDLLIMPORT oscache_hook_type OSCache_hook; +typedef float4 (*pgcache_hook_type) (Relation relation, ForkNumber forkNum); +extern PGDLLIMPORT pgcache_hook_type PGCache_hook; + #endif diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 5e28289..64ef53f 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -963,6 +963,7 @@ OprCacheKey OprInfo OprProofCacheEntry OprProofCacheKey +OSCache_hook_type OutputContext OverrideSearchPath OverrideStackEntry @@ -973,6 +974,7 @@ PBOOL PCtxtHandle PFN PGAsyncStatusType +PGCache_hook_type PGCALL2 PGEvent PGEventConnDestroy
pgsql-hackers by date: