Rewritten Index Advisor patch - Mailing list pgsql-patches
From | Tom Lane |
---|---|
Subject | Rewritten Index Advisor patch |
Date | |
Msg-id | 27516.1180053940@sss.pgh.pa.us Whole thread Raw |
Responses |
Re: Rewritten Index Advisor patch
|
List | pgsql-patches |
Awhile back I complained that I didn't like the way that the index advisor patch plugged into the system: http://archives.postgresql.org/pgsql-hackers/2007-04/msg00346.php Attached is a proposed replacement patch that keeps essentially all the advisor logic outside the core backend, and uses the method I suggested of modifying the result of get_relation_info() rather than installing phony system-catalog entries. Most of the patch bulk is actually just a small refactoring of ExplainOnePlan's API to make it more convenient to call from an advisor plugin. I also added hooks to let an advisor work through EXPLAIN, as I still maintain is a more useful behavior than doubling the work involved in every planner call. However, the planner() hook is still there for those who are insistent. To test the code, I made up a silly little proof-of-concept "advisor" that just checks to see if 2-column indexes would be more helpful if the column order were switched. It's incomplete because I didn't do anything about printing out a nice explanation of what the hypothetical index is. Comments, objections? regards, tom lane regression=# create table fooey as select unique1,unique2 from tenk1; SELECT regression=# create index fooeyi on fooey(unique1,unique2); CREATE INDEX regression=# analyze fooey; ANALYZE regression=# explain select * from fooey order by unique2,unique1; QUERY PLAN ----------------------------------------------------------------- Sort (cost=809.39..834.39 rows=10000 width=8) Sort Key: unique2, unique1 -> Seq Scan on fooey (cost=0.00..145.00 rows=10000 width=8) (3 rows) regression=# explain select * from fooey where unique2 in (1,2,3); QUERY PLAN ------------------------------------------------------- Seq Scan on fooey (cost=0.00..182.50 rows=3 width=8) Filter: (unique2 = ANY ('{1,2,3}'::integer[])) (2 rows) regression=# load '/home/tgl/pgsql/advisor'; LOAD regression=# explain select * from fooey order by unique2,unique1; QUERY PLAN ---------------------------------------------------------------------------------------- Sort (cost=809.39..834.39 rows=10000 width=8) Sort Key: unique2, unique1 -> Seq Scan on fooey (cost=0.00..145.00 rows=10000 width=8) Plan with hypothetical indexes: Index Scan using <hypothetical index> on fooey (cost=0.00..376.00 rows=10000 width=8) (6 rows) regression=# explain select * from fooey where unique2 in (1,2,3); QUERY PLAN ------------------------------------------------------------------------------------ Seq Scan on fooey (cost=0.00..182.50 rows=3 width=8) Filter: (unique2 = ANY ('{1,2,3}'::integer[])) Plan with hypothetical indexes: Bitmap Heap Scan on fooey (cost=12.78..22.49 rows=3 width=8) Recheck Cond: (unique2 = ANY ('{1,2,3}'::integer[])) -> Bitmap Index Scan on <hypothetical index> (cost=0.00..12.77 rows=3 width=0) Index Cond: (unique2 = ANY ('{1,2,3}'::integer[])) (8 rows) #include "postgres.h" #include "fmgr.h" #include "commands/explain.h" #include "optimizer/plancat.h" #include "optimizer/planner.h" PG_MODULE_MAGIC; void _PG_init(void); void _PG_fini(void); static void my_get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel); static const char *my_explain_get_index_name(Oid indexId); /* * Main hook to take over control during EXPLAIN of a query */ static void my_ExplainOneQuery(Query *query, ExplainStmt *stmt, const char *queryString, ParamListInfo params, TupOutputState *tstate) { Query *savequery = NULL; PlannedStmt *plan; /* * Because planner scribbles on its input, we have to make a copy if * we want to plan twice. */ if (!stmt->analyze) savequery = copyObject(query); /* plan the query */ plan = planner(query, 0, params); /* run it (if needed) and produce output */ ExplainOnePlan(plan, params, stmt, tstate); /* if not EXPLAIN ANALYZE, try it with hypothetical indexes */ if (!stmt->analyze) { PG_TRY(); { PlannedStmt *hplan; /* Enable special processing */ get_relation_info_hook = my_get_relation_info; explain_get_index_name_hook = my_explain_get_index_name; /* plan the query */ hplan = planner(savequery, 0, params); /* if we got a better plan, print it */ if (hplan->planTree->total_cost < plan->planTree->total_cost) { do_text_output_oneline(tstate, ""); /* separator line */ do_text_output_oneline(tstate, "Plan with hypothetical indexes:"); ExplainOnePlan(hplan, params, stmt, tstate); } } PG_CATCH(); { /* Make sure hooks get cleared on error exit */ get_relation_info_hook = NULL; explain_get_index_name_hook = NULL; PG_RE_THROW(); } PG_END_TRY(); get_relation_info_hook = NULL; explain_get_index_name_hook = NULL; } } /* * Get control during planner's get_relation_info() function, which sets up * a RelOptInfo struct based on the system catalog contents. We can modify * the struct contents to cause the planner to work with a hypothetical * situation rather than what's actually in the catalogs. * * This simplistic example looks for two-column indexes on any two columns * (a,b), and sets up a hypothetical index in the other column order (b,a). */ static void my_get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel) { ListCell *ilist; /* Do nothing for an inheritance parent RelOptInfo */ if (inhparent) return; foreach(ilist, rel->indexlist) { IndexOptInfo *oldindex = (IndexOptInfo *) lfirst(ilist); if (oldindex->ncolumns == 2) { IndexOptInfo *info; int ncolumns = oldindex->ncolumns; int i; ListCell *lc; info = makeNode(IndexOptInfo); /* flat-copy as many fields as we can */ memcpy(info, oldindex, sizeof(IndexOptInfo)); /* * In this toy example we just assign all hypothetical indexes * OID 0, and the explain_get_index_name hook just prints * <hypothetical index>. In a realistic situation we'd probably * assume that OIDs smaller than, say, 100 are never the OID of * any real index, allowing us to identify one of up to 100 * hypothetical indexes per plan. Then we'd need to save aside * some state data that would let the explain hooks print info * about the selected index. */ info->indexoid = InvalidOid; /* * Need to make opfamily array large enough to put a terminating * zero at the end. */ info->indexkeys = (int *) palloc(sizeof(int) * ncolumns); info->opfamily = (Oid *) palloc0(sizeof(Oid) * (ncolumns + 1)); /* initialize these to zeroes in case index is unordered */ info->fwdsortop = (Oid *) palloc0(sizeof(Oid) * ncolumns); info->revsortop = (Oid *) palloc0(sizeof(Oid) * ncolumns); info->nulls_first = (bool *) palloc0(sizeof(bool) * ncolumns); /* Reverse the order of the per-column arrays */ for (i = 0; i < ncolumns; i++) { info->indexkeys[i] = oldindex->indexkeys[1 - i]; info->opfamily[i] = oldindex->opfamily[1 - i]; info->fwdsortop[i] = oldindex->fwdsortop[1 - i]; info->revsortop[i] = oldindex->revsortop[1 - i]; info->nulls_first[i] = oldindex->nulls_first[1 - i]; } /* must reverse the order of the indexprs list, too */ info->indexprs = NIL; foreach(lc, oldindex->indexprs) { Node *node = lfirst(lc); info->indexprs = lcons(node, info->indexprs); } /* * Put the phony IndexOptInfo on the front of the list, so that * we don't get into an infinite loop. */ rel->indexlist = lcons(info, rel->indexlist); } } } /* * Hook to let EXPLAIN print something for a nonexistent index * * This is too simplistic for real-world use. */ static const char * my_explain_get_index_name(Oid indexId) { if (indexId == InvalidOid) return "<hypothetical index>"; return NULL; /* allow default behavior */ } /* * _pg_init() - library load-time initialization * * DO NOT make this static nor change its name! */ void _PG_init(void) { /* Get into the hooks we need to be in all the time */ ExplainOneQuery_hook = my_ExplainOneQuery; } /* * _PG_fini() - library unload-time finalization * * DO NOT make this static nor change its name! */ void _PG_fini(void) { /* Get out of all the hooks (just to be sure) */ ExplainOneQuery_hook = NULL; get_relation_info_hook = NULL; explain_get_index_name_hook = NULL; }
Attachment
pgsql-patches by date: