From 6bdcaab8667be03127830c56bed0eb31a70ae500 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 8 Jul 2023 20:37:15 +0300 Subject: [PATCH v4 4/7] Extract loop to read array dimensions to subroutine. For clarity. One functional change: this changes from atoi() to strtol() for parsing the dimensions. We now check the result of strtol(). That means that bogus dimensions like select '[1+-:2]={foo,bar}'::text[]; are no longer accepted. --- src/backend/utils/adt/arrayfuncs.c | 229 +++++++++++++++++++---------- 1 file changed, 152 insertions(+), 77 deletions(-) diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index d1b5d48b..7a63db09 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -89,6 +89,9 @@ typedef struct ArrayIteratorData int current_item; /* the item # we're at in the array */ } ArrayIteratorData; +static bool ReadDimensionInt(char **srcptr, int *result, const char *origStr, Node *escontext); +static bool ReadArrayDimensions(char **srcptr, int *ndim, int *dim, int *lBound, + const char *origStr, Node *escontext); static int ArrayCount(const char *str, int *dim, char typdelim, Node *escontext); static bool ReadArrayStr(char *arrayStr, const char *origStr, @@ -238,85 +241,10 @@ array_in(PG_FUNCTION_ARGS) * If the input string starts with dimension info, read and use that. * Otherwise, we require the input to be in curly-brace style, and we * prescan the input to determine dimensions. - * - * Dimension info takes the form of one or more [n] or [m:n] items. The - * outer loop iterates once per dimension item. */ p = string_save; - ndim = 0; - for (;;) - { - char *q; - int ub; - - /* - * Note: we currently allow whitespace between, but not within, - * dimension items. - */ - while (scanner_isspace(*p)) - p++; - if (*p != '[') - break; /* no more dimension items */ - p++; - if (ndim >= MAXDIM) - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", - ndim + 1, MAXDIM))); - - for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++) - /* skip */ ; - if (q == p) /* no digits? */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed array literal: \"%s\"", string), - errdetail("\"[\" must introduce explicitly-specified array dimensions."))); - - if (*q == ':') - { - /* [m:n] format */ - *q = '\0'; - lBound[ndim] = atoi(p); - p = q + 1; - for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++) - /* skip */ ; - if (q == p) /* no digits? */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed array literal: \"%s\"", string), - errdetail("Missing array dimension value."))); - } - else - { - /* [n] format */ - lBound[ndim] = 1; - } - if (*q != ']') - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed array literal: \"%s\"", string), - errdetail("Missing \"%s\" after array dimensions.", - "]"))); - - *q = '\0'; - ub = atoi(p); - p = q + 1; - - /* Upper bound of INT_MAX is disallowed, cf ArrayCheckBounds() */ - if (ub == INT_MAX) - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("array upper bound is too large: %d", - ub))); - /* Now it's safe to compute ub + 1 */ - if (ub + 1 < lBound[ndim]) - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("upper bound cannot be less than lower bound minus one"))); - - dim[ndim] = ub - lBound[ndim] + 1; - ndim++; - } + if (!ReadArrayDimensions(&p, &ndim, dim, lBound, string, escontext)) + return (Datum) 0; if (ndim == 0) { @@ -441,6 +369,153 @@ array_in(PG_FUNCTION_ARGS) PG_RETURN_ARRAYTYPE_P(retval); } + +/* + * ReadArrayDimensions + * parses the array dimensions part from "src" and converts the values + * to internal format. + * + * On entry, *srcptr points to the string to parse. It is advanced to point + * after the dimension info. + * + * *ndim_p, *dim, and *lBound are output variables. They are filled with the + * number of dimensions (<= MAXDIM), the length of each dimension, and the + * lower bounds of the slices, respectively. If there is no dimension + * information, *ndim_p is set to 0. + * + * 'origStr' is the original input string, used only in error messages. + * If *escontext points to an ErrorSaveContext, details of any error are + * reported there. + * + * Result: + * true for success, false for failure (if escontext is provided). + * + * Note that dim[] and lBound[] are allocated by the caller, and must have + * MAXDIM elements. + */ +static bool +ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound, + const char *origStr, Node *escontext) +{ + char *p = *srcptr; + int ndim; + + /* + * Dimension info takes the form of one or more [n] or [m:n] items. The + * loop iterates once per dimension item. + */ + ndim = 0; + for (;;) + { + char *q; + int ub; + int i; + + /* + * Note: we currently allow whitespace between, but not within, + * dimension items. + */ + while (scanner_isspace(*p)) + p++; + if (*p != '[') + break; /* no more dimension items */ + p++; + if (ndim >= MAXDIM) + ereturn(escontext, false, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", + ndim + 1, MAXDIM))); + + q = p; + if (!ReadDimensionInt(&p, &i, origStr, escontext)) + return false; + if (p == q) /* no digits? */ + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", origStr), + errdetail("\"[\" must introduce explicitly-specified array dimensions."))); + + if (*p == ':') + { + /* [m:n] format */ + lBound[ndim] = i; + p++; + q = p; + if (!ReadDimensionInt(&p, &ub, origStr, escontext)) + return false; + if (p == q) /* no digits? */ + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", origStr), + errdetail("Missing array dimension value."))); + } + else + { + /* [n] format */ + lBound[ndim] = 1; + ub = i; + } + if (*p != ']') + ereturn(escontext, false, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", origStr), + errdetail("Missing \"%s\" after array dimensions.", + "]"))); + p++; + + /* Upper bound of INT_MAX is disallowed, cf ArrayCheckBounds() */ + if (ub == INT_MAX) + ereturn(escontext, false, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array upper bound is too large: %d", ub))); + /* Now it's safe to compute ub + 1 */ + if (ub + 1 < lBound[ndim]) + ereturn(escontext, false, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("upper bound cannot be less than lower bound minus one"))); + + dim[ndim] = ub - lBound[ndim] + 1; + ndim++; + } + + *srcptr = p; + *ndim_p = ndim; + return true; +} + +/* + * ReadDimensionInt + * parse an integer, for the array dimensions + * + * On entry, *srcptr points to the string to parse. It is advanced past the + * digits of the integer. If there are no digits, returns true and leaves + * *srcptr unchanged. + * + * Result: + * true for success, false for failure (if escontext is provided). + * On success, the parsed integer is returned in *result. + */ +static bool +ReadDimensionInt(char **srcptr, int *result, const char *origStr, Node *escontext) +{ + char *p = *srcptr; + + /* don't accept leading whitespace */ + if (!isdigit((unsigned char) *p) && *p != '-' && *p != '+') + { + *result = 0; + return true; /* leave 'src' unmodified */ + } + + errno = 0; + *result = strtol(p, srcptr, 10); + if (errno == ERANGE) + ereturn(escontext, false, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array bound is out of range"))); + return true; +} + /* * ArrayCount * Determines the dimensions for an array string. This includes -- 2.34.1