>From e2ec19d8567a238bea617afc41f65dbf4501d315 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sat, 24 Nov 2012 13:42:49 +0100 Subject: [PATCH 1/2] Don't ignore !indisvalid && !indisready indexes in RelationGetIndexList The previous behaviour - introduced in 8cb53654dbdb4c386369eb988062d0bbb6de725e - was incorrect because it caused caused RelationGetIndexAttrBitmap to ignore indexes which are currently being built with CREATE INDEX CONCURRENTLY. Due to this HOT updates were being used even when columns in the currently-building index differed. As validate_index_heapscan always inserts the root tuple of a HOT chain this leads to the update being missed. Instead add code in all callers of RelationGetIndexList to check whether the index is in a suitable state. Some of those checks should have been made anyway because indexes where only skipped if not valid *and* not ready. So, parts of this commit should be backpatched even to branches that don't have DROP INDEX CONCURRENTLY. To keep DROP INDEX CONCURRENTLY working we need to take care that we don't lock indexes during routine operations before testing ->indisready. It is safe to lock the index checking this as D.I.C takes care to wait for all backends possibly seeing an incorrect state and normal operations acquire a conflicting lock on the heap relation itself. This fixes the bug found by Amit Kapila that updates can be missing from an index build with CREATE INDEX CONCURRENTLY. --- contrib/tcn/tcn.c | 3 ++- src/backend/catalog/index.c | 4 +++- src/backend/commands/cluster.c | 7 +++++-- src/backend/commands/indexcmds.c | 1 + src/backend/commands/tablecmds.c | 4 +++- src/backend/commands/vacuum.c | 25 ++++++++++++++++++++++++- src/backend/executor/execUtils.c | 17 ++++++++++++++--- src/backend/optimizer/util/plancat.c | 7 ++++++- src/backend/parser/parse_utilcmd.c | 10 ++++++++++ src/backend/utils/cache/relcache.c | 23 +++++++++++++++-------- 10 files changed, 83 insertions(+), 18 deletions(-) diff --git a/contrib/tcn/tcn.c b/contrib/tcn/tcn.c index 6a8a96f..4b194bc 100644 --- a/contrib/tcn/tcn.c +++ b/contrib/tcn/tcn.c @@ -142,7 +142,8 @@ triggered_change_notification(PG_FUNCTION_ARGS) elog(ERROR, "cache lookup failed for index %u", indexoid); index = (Form_pg_index) GETSTRUCT(indexTuple); /* we're only interested if it is the primary key */ - if (index->indisprimary) + + if (index->indisvalid && index->indisprimary) { int numatts = index->indnatts; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index d2d91c1..2d298de 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -144,11 +144,13 @@ relationHasPrimaryKey(Relation rel) { Oid indexoid = lfirst_oid(indexoidscan); HeapTuple indexTuple; + Form_pg_index indexform; indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid)); if (!HeapTupleIsValid(indexTuple)) /* should not happen */ elog(ERROR, "cache lookup failed for index %u", indexoid); - result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary; + indexform = (Form_pg_index) GETSTRUCT(indexTuple); + result = indexform->indisprimary && indexform->indisvalid; ReleaseSysCache(indexTuple); if (result) break; diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index de71a35..9bb47d2 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -139,7 +139,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel) if (!HeapTupleIsValid(idxtuple)) elog(ERROR, "cache lookup failed for index %u", indexOid); indexForm = (Form_pg_index) GETSTRUCT(idxtuple); - if (indexForm->indisclustered) + if (indexForm->indisvalid && indexForm->indisclustered) { ReleaseSysCache(idxtuple); break; @@ -477,7 +477,7 @@ mark_index_clustered(Relation rel, Oid indexOid) elog(ERROR, "cache lookup failed for index %u", indexOid); indexForm = (Form_pg_index) GETSTRUCT(indexTuple); - if (indexForm->indisclustered) + if (indexForm->indisvalid && indexForm->indisclustered) { ReleaseSysCache(indexTuple); return; @@ -513,6 +513,9 @@ mark_index_clustered(Relation rel, Oid indexOid) } else if (thisIndexOid == indexOid) { + /* should be checked before, be sure */ + if (!indexForm->indisvalid) + elog(ERROR, "cannot cluster on invalid index %u", indexOid); indexForm->indisclustered = true; simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); CatalogUpdateIndexes(pg_index, indexTuple); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index dd46cf9..0568061 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -226,6 +226,7 @@ CheckIndexCompatible(Oid oldId, /* For polymorphic opcintype, column type changes break compatibility. */ irel = index_open(oldId, AccessShareLock); /* caller probably has a lock */ + /* FIXME: verify index is ready & valid? */ for (i = 0; i < old_natts; i++) { if (IsPolymorphicType(get_opclass_input_type(classObjectId[i])) && diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index f88bf79..1136007 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -6430,10 +6430,12 @@ transformFkeyCheckAttrs(Relation pkrel, /* * Must have the right number of columns; must be unique and not a - * partial index; forget it if there are any expressions, too + * partial index; forget it if there are any expressions, too. Invalid + * indexes are out as well. */ if (indexStruct->indnatts == numattrs && indexStruct->indisunique && + indexStruct->indisvalid && heap_attisnull(indexTuple, Anum_pg_index_indpred) && heap_attisnull(indexTuple, Anum_pg_index_indexprs)) { diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 14d1c08..d156d6b 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1113,6 +1113,7 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode, indexoidlist = RelationGetIndexList(relation); + /* allocate memory for all indexes */ *nindexes = list_length(indexoidlist); if (*nindexes > 0) @@ -1120,12 +1121,34 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode, else *Irel = NULL; + /* count valid indexes */ + *nindexes = 0; + i = 0; foreach(indexoidscan, indexoidlist) { Oid indexoid = lfirst_oid(indexoidscan); + Relation indrel; + + /* + * Don't lock index before we checked we can use it, dangerous schema + * changes are prevented by the relation level lock. + */ + indrel = index_open(indexoid, NoLock); + + /* check whether index is ready for writes */ + if (!indrel->rd_index->indisready) + { + index_close(indrel, NoLock); + continue; + } + + /* acquire appropriate lock */ + LockRelationOid(indexoid, lockmode); - (*Irel)[i++] = index_open(indexoid, lockmode); + /* return values */ + (*Irel)[i++] = indrel; + (*nindexes)++; } list_free(indexoidlist); diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 0bbd0d4..e5cae6b 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -914,11 +914,14 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo) Relation indexDesc; IndexInfo *ii; - indexDesc = index_open(indexOid, RowExclusiveLock); + indexDesc = index_open(indexOid, NoLock); /* extract index key information from the index's pg_index info */ ii = BuildIndexInfo(indexDesc); + if (ii->ii_ReadyForInserts) + LockRelationOid(indexOid, RowExclusiveLock); + relationDescs[i] = indexDesc; indexInfoArray[i] = ii; i++; @@ -939,17 +942,25 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) int i; int numIndices; RelationPtr indexDescs; + IndexInfo **indexInfo; numIndices = resultRelInfo->ri_NumIndices; indexDescs = resultRelInfo->ri_IndexRelationDescs; + indexInfo = resultRelInfo->ri_IndexRelationInfo; for (i = 0; i < numIndices; i++) { if (indexDescs[i] == NULL) continue; /* shouldn't happen? */ - /* Drop lock acquired by ExecOpenIndices */ - index_close(indexDescs[i], RowExclusiveLock); + /* + * Drop lock acquired by ExecOpenIndices with the same strength it was + * opened with. + */ + if (indexInfo[i]->ii_ReadyForInserts) + index_close(indexDescs[i], RowExclusiveLock); + else + index_close(indexDescs[i], NoLock); } /* diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index abcd0ee..61f9e07 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -162,8 +162,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, /* * Extract info from the relation descriptor for the index. + * + * Don't lock the table before checking it's valid. */ - indexRelation = index_open(indexoid, lmode); + indexRelation = index_open(indexoid, NoLock); index = indexRelation->rd_index; /* @@ -178,6 +180,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, continue; } + /* Now lock the index */ + LockRelationOid(indexoid, lmode); + /* * If the index is valid, but cannot yet be used, ignore it; but * mark the plan we are generating as transient. See diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 95c57e8..29012d4 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -868,6 +868,16 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla parent_index = index_open(parent_index_oid, AccessShareLock); + /* + * Don't clone invalid indexes, they probably are leftovers from a + * failed concurrent creation. + */ + if (!parent_index->rd_index->indisvalid) + { + index_close(parent_index, AccessShareLock); + continue; + } + /* Build CREATE INDEX statement to recreate the parent_index */ index_stmt = generateClonedIndexStmt(cxt, parent_index, attmap, tupleDesc->natts); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 8c9ebe0..abacc4a 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3357,12 +3357,6 @@ RelationGetIndexList(Relation relation) oidvector *indclass; bool isnull; - /* - * Ignore any indexes that are currently being dropped - */ - if (!index->indisvalid && !index->indisready) - continue; - /* Add index's OID to result list in the proper order */ result = insert_ordered_oid(result, index->indexrelid); @@ -3683,7 +3677,20 @@ RelationGetIndexAttrBitmap(Relation relation) IndexInfo *indexInfo; int i; - indexDesc = index_open(indexOid, AccessShareLock); + /* + * We use NoLock to open the index so we don't interfere with + * concurrent drops. All code manipulating indexes either needs a + * stronger lock on the relation anyway or takes special care not to + * interfere with concurrent operations like DROP INDEX CONCURRENTLY + * does. Also, the locks taken here are way much shorter lived than the + * cached relation->rd_indexattr. + */ + indexDesc = index_open(indexOid, NoLock); + + /* + * We must not skip !indisready indexes here, if an index is built + * concurrently we cannot allow HOT updates for the covered columns. + */ /* Extract index key information from the index's pg_index row */ indexInfo = BuildIndexInfo(indexDesc); @@ -3704,7 +3711,7 @@ RelationGetIndexAttrBitmap(Relation relation) /* Collect all attributes in the index predicate, too */ pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs); - index_close(indexDesc, AccessShareLock); + index_close(indexDesc, NoLock); } list_free(indexoidlist); -- 1.7.12.289.g0ce9864.dirty