From 6d46b6d10cb077bfc2ab261fbd74304d8b19d33e Mon Sep 17 00:00:00 2001 From: Alexandra Wang Date: Thu, 18 Sep 2025 16:34:47 -0700 Subject: [PATCH v22 2/5] Add an alternative transform function in SubscriptRoutines Add a transform_partial() function pointer to enable processing a prefix of indirection lists. Data types that support subscripting can opt to use the transform() function that transforms the full input indirection list (e.g., arrays, hstore), or can opt to use the transform_partial() function to be more flexible on indirection node types, and make best effort in transforming only a prefix of the indirection list, letting the caller handle the remaining indirections. This allows transform functions to accept dot notation indirection as input, preparing for future JSONB dot notation support. --- contrib/hstore/hstore_subs.c | 1 + src/backend/parser/parse_expr.c | 76 +++++++++++++++---------- src/backend/parser/parse_node.c | 92 +++++++++++++++++++++++-------- src/backend/parser/parse_target.c | 6 +- src/backend/utils/adt/arraysubs.c | 2 + src/backend/utils/adt/jsonbsubs.c | 26 ++++++--- src/include/nodes/subscripting.h | 24 +++++++- src/include/parser/parse_node.h | 3 +- 8 files changed, 168 insertions(+), 62 deletions(-) diff --git a/contrib/hstore/hstore_subs.c b/contrib/hstore/hstore_subs.c index 3d03f66fa0d..d678dc56f86 100644 --- a/contrib/hstore/hstore_subs.c +++ b/contrib/hstore/hstore_subs.c @@ -287,6 +287,7 @@ hstore_subscript_handler(PG_FUNCTION_ARGS) { static const SubscriptRoutines sbsroutines = { .transform = hstore_subscript_transform, + .transform_partial = NULL, .exec_setup = hstore_exec_setup, .fetch_strict = true, /* fetch returns NULL for NULL inputs */ .fetch_leakproof = true, /* fetch returns NULL for bad subscript */ diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index e1979a80c19..95ce330e506 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -436,44 +436,65 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) { Node *last_srf = pstate->p_last_srf; Node *result = transformExprRecurse(pstate, ind->arg); - List *subscripts = NIL; + List *indirections = NIL; int location = exprLocation(result); ListCell *i; /* - * We have to split any field-selection operations apart from - * subscripting. Adjacent A_Indices nodes have to be treated as a single + * Combine field names and subscripts into a single indirection list, as + * some subscripting containers, such as jsonb, support field access using + * dot notation. Adjacent A_Indices nodes have to be treated as a single * multidimensional subscript operation. */ foreach(i, ind->indirection) { Node *n = lfirst(i); - if (IsA(n, A_Indices)) - subscripts = lappend(subscripts, n); - else if (IsA(n, A_Star)) + if (IsA(n, A_Indices) || IsA(n, String)) + indirections = lappend(indirections, n); + else { + Assert(IsA(n, A_Star)); ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("row expansion via \"*\" is not supported here"), parser_errposition(pstate, location))); } - else + } + + while (indirections) + { + /* try processing container subscripts first */ + int transformed_count = 0; + Node *newresult = (Node *) + transformContainerSubscripts(pstate, + result, + exprType(result), + exprTypmod(result), + indirections, + false, + &transformed_count); + + if (!newresult) { - Node *newresult; + /* + * generic subscripting failed; falling back to field selection + * for a composite type, or a single-argument function. + */ + Node *n; + + Assert(indirections); - Assert(IsA(n, String)); + n = linitial(indirections); - /* process subscripts before this field selection */ - if (subscripts) - result = (Node *) transformContainerSubscripts(pstate, - result, - exprType(result), - exprTypmod(result), - subscripts, - false); - subscripts = NIL; + if (!IsA(n, String)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot subscript type %s because it does not support subscripting", + format_type_be(exprType(result))), + parser_errposition(pstate, exprLocation(result)))); + /* try to parse function or field selection */ newresult = ParseFuncOrColumn(pstate, list_make1(n), list_make1(result), @@ -481,19 +502,18 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) NULL, false, location); - if (newresult == NULL) + + if (!newresult) unknown_attribute(pstate, result, strVal(n), location); - result = newresult; + else + transformed_count = 1; } + + /* remove the processed indirections */ + indirections = list_delete_first_n(indirections, transformed_count); + + result = newresult; } - /* process trailing subscripts, if any */ - if (subscripts) - result = (Node *) transformContainerSubscripts(pstate, - result, - exprType(result), - exprTypmod(result), - subscripts, - false); return result; } diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c index 203b7a32178..803089ab6fc 100644 --- a/src/backend/parser/parse_node.c +++ b/src/backend/parser/parse_node.c @@ -238,20 +238,22 @@ transformContainerType(Oid *containerType, int32 *containerTypmod) * containerTypMod typmod for the container * indirection Untransformed list of subscripts (must not be NIL) * isAssignment True if this will become a container assignment. - */ + * nSubscripts Output parameter for number of transformed subscripts. +*/ SubscriptingRef * transformContainerSubscripts(ParseState *pstate, Node *containerBase, Oid containerType, int32 containerTypMod, List *indirection, - bool isAssignment) + bool isAssignment, + int *nSubscripts) { SubscriptingRef *sbsref; const SubscriptRoutines *sbsroutines; Oid elementType; - bool isSlice = false; - ListCell *idx; + + *nSubscripts = 0; /* * Determine the actual container type, smashing any domain. In the @@ -267,28 +269,15 @@ transformContainerSubscripts(ParseState *pstate, */ sbsroutines = getSubscriptingRoutines(containerType, &elementType); if (!sbsroutines) + { + if (!isAssignment) + return NULL; + ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("cannot subscript type %s because it does not support subscripting", format_type_be(containerType)), parser_errposition(pstate, exprLocation(containerBase)))); - - /* - * Detect whether any of the indirection items are slice specifiers. - * - * A list containing only simple subscripts refers to a single container - * element. If any of the items are slice specifiers (lower:upper), then - * the subscript expression means a container slice operation. - */ - foreach(idx, indirection) - { - A_Indices *ai = lfirst_node(A_Indices, idx); - - if (ai->is_slice) - { - isSlice = true; - break; - } } /* @@ -309,8 +298,65 @@ transformContainerSubscripts(ParseState *pstate, * Call the container-type-specific logic to transform the subscripts and * determine the subscripting result type. */ - sbsroutines->transform(sbsref, indirection, pstate, - isSlice, isAssignment); + Assert((sbsroutines->transform_partial == NULL) ^ (sbsroutines->transform == NULL)); + if (sbsroutines->transform_partial != NULL) + { + /* + * If the container type provides a partial transform function, use it + * here. This function can accept any node types in the indirection + * list as input, and is responsible for identifying and transforming + * as many leading elements as it can handle, which may be only a + * prefix of the indirection list. For example, it might process + * A_Indices nodes, String nodes (for jsonb dot-notation), or other + * node types, depending on the container's requirements. It returns + * the number of elements it transformed. + */ + *nSubscripts = sbsroutines->transform_partial(sbsref, indirection, pstate, isAssignment); + } + else + { + /* + * Full transform only accepts bracket subscripts (A_Indices nodes). + * We pre-collect the leading A_Indices nodes from the indirection + * list, then call the transform function to process this prefix of + * subscripts. + */ + List *subscriptlist = NIL; + ListCell *lc; + bool isSlice = false; + + /* Collect leading A_Indices subscripts */ + foreach(lc, indirection) + { + Node *n = lfirst(lc); + + if (IsA(n, A_Indices)) + { + A_Indices *ai = (A_Indices *) n; + + subscriptlist = lappend(subscriptlist, n); + if (ai->is_slice) + isSlice = true; + } + else + break; + } + + if (subscriptlist) + sbsroutines->transform(sbsref, subscriptlist, pstate, isSlice, isAssignment); + + *nSubscripts = list_length(subscriptlist); + } + + if (*nSubscripts == 0) + { + /* Fallback to field selection in caller */ + if (!isAssignment) + return NULL; + + /* This should not happen with well-behaved transform functions */ + elog(ERROR, "subscripting transform function failed to consume any indirection elements"); + } /* * Verify we got a valid type (this defends, for example, against someone diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index 905c975d83b..a14f8fb85bf 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -922,6 +922,7 @@ transformAssignmentSubscripts(ParseState *pstate, Oid typeNeeded; int32 typmodNeeded; Oid collationNeeded; + int nSubscripts = 0; Assert(subscripts != NIL); @@ -936,7 +937,10 @@ transformAssignmentSubscripts(ParseState *pstate, containerType, containerTypMod, subscripts, - true); + true, + &nSubscripts); + + Assert(nSubscripts == list_length(subscripts)); typeNeeded = sbsref->refrestype; typmodNeeded = sbsref->reftypmod; diff --git a/src/backend/utils/adt/arraysubs.c b/src/backend/utils/adt/arraysubs.c index 2940fb8e8d7..0049907b942 100644 --- a/src/backend/utils/adt/arraysubs.c +++ b/src/backend/utils/adt/arraysubs.c @@ -541,6 +541,7 @@ array_subscript_handler(PG_FUNCTION_ARGS) { static const SubscriptRoutines sbsroutines = { .transform = array_subscript_transform, + .transform_partial = NULL, .exec_setup = array_exec_setup, .fetch_strict = true, /* fetch returns NULL for NULL inputs */ .fetch_leakproof = true, /* fetch returns NULL for bad subscript */ @@ -568,6 +569,7 @@ raw_array_subscript_handler(PG_FUNCTION_ARGS) { static const SubscriptRoutines sbsroutines = { .transform = array_subscript_transform, + .transform_partial = NULL, .exec_setup = array_exec_setup, .fetch_strict = true, /* fetch returns NULL for NULL inputs */ .fetch_leakproof = true, /* fetch returns NULL for bad subscript */ diff --git a/src/backend/utils/adt/jsonbsubs.c b/src/backend/utils/adt/jsonbsubs.c index e8626d3b4fc..b4824d46a5b 100644 --- a/src/backend/utils/adt/jsonbsubs.c +++ b/src/backend/utils/adt/jsonbsubs.c @@ -36,14 +36,16 @@ typedef struct JsonbSubWorkspace /* * Finish parse analysis of a SubscriptingRef expression for a jsonb. * - * Transform the subscript expressions, coerce them to text, - * and determine the result type of the SubscriptingRef node. + * This is a partial transform function that transforms the subscript + * expressions, coerces them to text, and determines the result type + * of the SubscriptingRef node. + * + * Returns the number of indirection elements processed. */ -static void +static int jsonb_subscript_transform(SubscriptingRef *sbsref, List *indirection, ParseState *pstate, - bool isSlice, bool isAssignment) { List *upperIndexpr = NIL; @@ -55,10 +57,15 @@ jsonb_subscript_transform(SubscriptingRef *sbsref, */ foreach(idx, indirection) { - A_Indices *ai = lfirst_node(A_Indices, idx); + A_Indices *ai; Node *subExpr; - if (isSlice) + if (!IsA(lfirst(idx), A_Indices)) + break; + + ai = lfirst_node(A_Indices, idx); + + if (ai->is_slice) { Node *expr = ai->uidx ? ai->uidx : ai->lidx; @@ -142,7 +149,7 @@ jsonb_subscript_transform(SubscriptingRef *sbsref, * Slice with omitted upper bound. Should not happen as we already * errored out on slice earlier, but handle this just in case. */ - Assert(isSlice && ai->is_slice); + Assert(ai->is_slice); ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("jsonb subscript does not support slices"), @@ -159,6 +166,8 @@ jsonb_subscript_transform(SubscriptingRef *sbsref, /* Determine the result type of the subscripting operation; always jsonb */ sbsref->refrestype = JSONBOID; sbsref->reftypmod = -1; + + return list_length(upperIndexpr); } /* @@ -402,7 +411,8 @@ Datum jsonb_subscript_handler(PG_FUNCTION_ARGS) { static const SubscriptRoutines sbsroutines = { - .transform = jsonb_subscript_transform, + .transform = NULL, /* jsonb uses partial transform instead */ + .transform_partial = jsonb_subscript_transform, .exec_setup = jsonb_exec_setup, .fetch_strict = true, /* fetch returns NULL for NULL inputs */ .fetch_leakproof = true, /* fetch returns NULL for bad subscript */ diff --git a/src/include/nodes/subscripting.h b/src/include/nodes/subscripting.h index e991f4bf826..008076b8b57 100644 --- a/src/include/nodes/subscripting.h +++ b/src/include/nodes/subscripting.h @@ -98,6 +98,25 @@ typedef void (*SubscriptTransform) (SubscriptingRef *sbsref, bool isSlice, bool isAssignment); +/* + * SubscriptTransformPartial is an alternative to SubscriptTransform for + * container types that can accept non-A_Indices indirection as input + * (e.g., JSONB accepts dot-notation (String node) for field access). + + * It may transform a prefix of the indirection list and leave the rest + * unprocessed. It returns the number of indirections it transformed. + * The caller will then remove that many items from the head of the + * list, and handle the remaining indirections differently or to raise + * an error as needed. + * + * If transform_partial is NULL, the complete transform function is used, + * which accepts only A_Indices (bracket) nodes. + */ +typedef int (*SubscriptTransformPartial) (SubscriptingRef *sbsref, + List *indirection, + ParseState *pstate, + bool isAssignment); + /* * The exec_setup method is called during executor-startup compilation of a * SubscriptingRef node in an expression. It must fill *methods with pointers @@ -157,7 +176,10 @@ typedef void (*SubscriptExecSetup) (const SubscriptingRef *sbsref, /* Struct returned by the SQL-visible subscript handler function */ typedef struct SubscriptRoutines { - SubscriptTransform transform; /* parse analysis function */ + SubscriptTransform transform; /* parse analysis function, or NULL */ + SubscriptTransformPartial transform_partial; /* alternative parse + * analysis function, or + * NULL */ SubscriptExecSetup exec_setup; /* expression compilation function */ bool fetch_strict; /* is fetch SubscriptRef strict? */ bool fetch_leakproof; /* is fetch SubscriptRef leakproof? */ diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index f7d07c84542..80f486acff0 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -362,7 +362,8 @@ extern SubscriptingRef *transformContainerSubscripts(ParseState *pstate, Oid containerType, int32 containerTypMod, List *indirection, - bool isAssignment); + bool isAssignment, + int *consumed_count); extern Const *make_const(ParseState *pstate, A_Const *aconst); #endif /* PARSE_NODE_H */ -- 2.39.5 (Apple Git-154)