From 6498ff82f48827718c3475862e8fb32133b72073 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 4 Jul 2023 12:39:41 -0400 Subject: [PATCH v4 1/7] Simplify and speed up ReadArrayStr(). ReadArrayStr() seems to have been written on the assumption that non-rectangular input is fine and it should pad with NULLs anywhere that elements are missing. We disallowed non-rectangular input ages ago (commit 0e13d627b), but never simplified this function as a follow-up. In particular, the existing code recomputes each element's linear location from scratch, which is quite unnecessary for rectangular input: we can just assign the elements sequentially, saving lots of arithmetic. Add some more commentary while at it. ArrayGetOffset0() is no longer used anywhere, so remove it. --- src/backend/utils/adt/arrayfuncs.c | 69 ++++++++++++++---------------- src/backend/utils/adt/arrayutils.c | 15 ------- src/include/utils/array.h | 1 - 3 files changed, 33 insertions(+), 52 deletions(-) diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 7828a626..df1ebb47 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -93,7 +93,7 @@ typedef struct ArrayIteratorData static int ArrayCount(const char *str, int *dim, char typdelim, Node *escontext); static bool ReadArrayStr(char *arrayStr, const char *origStr, - int nitems, int ndim, int *dim, + int nitems, FmgrInfo *inputproc, Oid typioparam, int32 typmod, char typdelim, int typlen, bool typbyval, char typalign, @@ -391,7 +391,7 @@ array_in(PG_FUNCTION_ARGS) dataPtr = (Datum *) palloc(nitems * sizeof(Datum)); nullsPtr = (bool *) palloc(nitems * sizeof(bool)); if (!ReadArrayStr(p, string, - nitems, ndim, dim, + nitems, &my_extra->proc, typioparam, typmod, typdelim, typlen, typbyval, typalign, @@ -436,7 +436,8 @@ array_in(PG_FUNCTION_ARGS) /* * ArrayCount - * Determines the dimensions for an array string. + * Determines the dimensions for an array string. This includes + * syntax-checking the array structure decoration (braces and delimiters). * * Returns number of dimensions as function result. The axis lengths are * returned in dim[], which must be of size MAXDIM. @@ -683,16 +684,14 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) /* * ReadArrayStr : * parses the array string pointed to by "arrayStr" and converts the values - * to internal format. Unspecified elements are initialized to nulls. - * The array dimensions must already have been determined. + * to internal format. The array dimensions must have been determined, + * and the case of an empty array must have been handled earlier. * * Inputs: * arrayStr: the string to parse. * CAUTION: the contents of "arrayStr" will be modified! * origStr: the unmodified input string, used only in error messages. * nitems: total number of array elements, as already determined. - * ndim: number of array dimensions - * dim[]: array axis lengths * inputproc: type-specific input procedure for element datatype. * typioparam, typmod: auxiliary values to pass to inputproc. * typdelim: the value delimiter (type-specific). @@ -717,8 +716,6 @@ static bool ReadArrayStr(char *arrayStr, const char *origStr, int nitems, - int ndim, - int *dim, FmgrInfo *inputproc, Oid typioparam, int32 typmod, @@ -732,20 +729,13 @@ ReadArrayStr(char *arrayStr, int32 *nbytes, Node *escontext) { - int i, + int i = 0, nest_level = 0; char *srcptr; bool in_quotes = false; bool eoArray = false; bool hasnull; int32 totbytes; - int indx[MAXDIM] = {0}, - prod[MAXDIM]; - - mda_get_prod(ndim, dim, prod); - - /* Initialize is-null markers to true */ - memset(nulls, true, nitems * sizeof(bool)); /* * We have to remove " and \ characters to create a clean item value to @@ -768,11 +758,20 @@ ReadArrayStr(char *arrayStr, bool itemdone = false; bool leadingspace = true; bool hasquoting = false; - char *itemstart; - char *dstptr; - char *dstendptr; + char *itemstart; /* start of de-escaped text */ + char *dstptr; /* next output point for de-escaped text */ + char *dstendptr; /* last significant output char + 1 */ - i = -1; + /* + * Parse next array element, collecting the de-escaped text into + * itemstart..dstendptr-1. + * + * Notice that we do not set "itemdone" until we see a separator + * (typdelim character) or the array's final right brace. Since the + * array is already verified to be nonempty and rectangular, there is + * guaranteed to be another element to be processed in the first case, + * while in the second case of course we'll exit the outer loop. + */ itemstart = dstptr = dstendptr = srcptr; while (!itemdone) @@ -819,13 +818,7 @@ ReadArrayStr(char *arrayStr, case '{': if (!in_quotes) { - if (nest_level >= ndim) - ereturn(escontext, false, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed array literal: \"%s\"", - origStr))); nest_level++; - indx[nest_level - 1] = 0; srcptr++; } else @@ -839,14 +832,9 @@ ReadArrayStr(char *arrayStr, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); - if (i == -1) - i = ArrayGetOffset0(ndim, indx, prod); - indx[nest_level - 1] = 0; nest_level--; if (nest_level == 0) eoArray = itemdone = true; - else - indx[nest_level - 1]++; srcptr++; } else @@ -857,10 +845,7 @@ ReadArrayStr(char *arrayStr, *dstptr++ = *srcptr++; else if (*srcptr == typdelim) { - if (i == -1) - i = ArrayGetOffset0(ndim, indx, prod); itemdone = true; - indx[ndim - 1]++; srcptr++; } else if (scanner_isspace(*srcptr)) @@ -884,15 +869,18 @@ ReadArrayStr(char *arrayStr, } } + /* Terminate de-escaped string */ Assert(dstptr < srcptr); *dstendptr = '\0'; - if (i < 0 || i >= nitems) + /* Safety check that we don't write past the output arrays */ + if (i >= nitems) ereturn(escontext, false, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); + /* Convert the de-escaped string into the next output array entries */ if (Array_nulls && !hasquoting && pg_strcasecmp(itemstart, "NULL") == 0) { @@ -913,8 +901,17 @@ ReadArrayStr(char *arrayStr, return false; nulls[i] = false; } + + i++; } + /* Cross-check that we filled all the output array entries */ + if (i != nitems) + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", + origStr))); + /* * Check for nulls, compute total data space needed */ diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c index 62152f79..b8bccc7c 100644 --- a/src/backend/utils/adt/arrayutils.c +++ b/src/backend/utils/adt/arrayutils.c @@ -43,21 +43,6 @@ ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx) return offset; } -/* - * Same, but subscripts are assumed 0-based, and use a scale array - * instead of raw dimension data (see mda_get_prod to create scale array) - */ -int -ArrayGetOffset0(int n, const int *tup, const int *scale) -{ - int i, - lin = 0; - - for (i = 0; i < n; i++) - lin += tup[i] * scale[i]; - return lin; -} - /* * Convert array dimensions into number of elements * diff --git a/src/include/utils/array.h b/src/include/utils/array.h index b13dfb34..d49adc38 100644 --- a/src/include/utils/array.h +++ b/src/include/utils/array.h @@ -448,7 +448,6 @@ extern void array_free_iterator(ArrayIterator iterator); */ extern int ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx); -extern int ArrayGetOffset0(int n, const int *tup, const int *scale); extern int ArrayGetNItems(int ndim, const int *dims); extern int ArrayGetNItemsSafe(int ndim, const int *dims, struct Node *escontext); -- 2.34.1