From 2ee4f9a778d4eec41ceb6af054d98dc08afad87c Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Sun, 9 Nov 2025 22:49:03 -0500 Subject: [PATCH v9 8/9] Make pg_ndinstinct a proper adt. Move the in/out/send/recv functions for pg_ndistinct to pg_ndistinct.c, which allows mvdistinct.c to focus on the transformation from sample data to the internal MVDistinct structure. --- src/backend/statistics/mvdistinct.c | 529 -------------------------- src/backend/utils/adt/Makefile | 1 + src/backend/utils/adt/meson.build | 1 + src/backend/utils/adt/pg_ndistinct.c | 550 +++++++++++++++++++++++++++ 4 files changed, 552 insertions(+), 529 deletions(-) create mode 100644 src/backend/utils/adt/pg_ndistinct.c diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 09bec8ff718..f6bf68db01a 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -27,16 +27,7 @@ #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" -#include "common/int.h" -#include "common/jsonapi.h" -#include "lib/stringinfo.h" -#include "mb/pg_wchar.h" -#include "nodes/miscnodes.h" -#include "nodes/pg_list.h" #include "statistics/extended_stats_internal.h" -#include "statistics/statistics.h" -#include "utils/builtins.h" -#include "utils/fmgrprotos.h" #include "utils/syscache.h" #include "utils/typcache.h" #include "varatt.h" @@ -334,458 +325,6 @@ statext_ndistinct_deserialize(bytea *data) return ndistinct; } -typedef enum -{ - NDIST_EXPECT_START = 0, - NDIST_EXPECT_ITEM, - NDIST_EXPECT_KEY, - NDIST_EXPECT_ATTNUM_LIST, - NDIST_EXPECT_ATTNUM, - NDIST_EXPECT_NDISTINCT, - NDIST_EXPECT_COMPLETE -} ndistinctSemanticState; - -typedef struct -{ - const char *str; - ndistinctSemanticState state; - - List *distinct_items; /* Accumulated complete MVNDistinctItems */ - Node *escontext; - - bool found_attributes; /* Item has an attributes key */ - bool found_ndistinct; /* Item has ndistinct key */ - List *attnum_list; /* Accumulated attributes attnums */ - int64 ndistinct; -} ndistinctParseState; - -/* - * Invoked at the start of each MVNDistinctItem. - * - * The entire JSON document shoul be one array of MVNDistinctItem objects. - * - * If we're anywhere else in the document, it's an error. - */ -static JsonParseErrorType -ndistinct_object_start(void *state) -{ - ndistinctParseState *parse = state; - - if (parse->state != NDIST_EXPECT_ITEM) - { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Expected Item object"))); - return JSON_SEM_ACTION_FAILED; - } - - /* Now we expect to see attributes/ndistinct keys */ - parse->state = NDIST_EXPECT_KEY; - return JSON_SUCCESS; -} - -/* - * Routine to allow qsorting of AttNumbers - */ -static int -attnum_compare(const void *aptr, const void *bptr) -{ - AttrNumber a = *(const AttrNumber *) aptr; - AttrNumber b = *(const AttrNumber *) bptr; - - return pg_cmp_s16(a, b); -} - - -/* - * Invoked at the end of an object. - * - * Check to ensure that it was a complete MVNDistinctItem - * - */ -static JsonParseErrorType -ndistinct_object_end(void *state) -{ - ndistinctParseState *parse = state; - - int natts = 0; - AttrNumber *attrsort; - - MVNDistinctItem *item; - - if (!parse->found_attributes) - { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Item must contain \"attributes\" key"))); - return JSON_SEM_ACTION_FAILED; - } - - if (!parse->found_ndistinct) - { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Item must contain \"ndistinct\" key"))); - return JSON_SEM_ACTION_FAILED; - } - - if (parse->attnum_list == NIL) - { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("The \"attributes\" key must be an non-empty array"))); - return JSON_SEM_ACTION_FAILED; - } - - /* - * We need at least 2 attnums for a ndistinct item, anything less is - * malformed. - */ - natts = parse->attnum_list->length; - if (natts < 2) - { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("The attributes key must contain an array of at least two attnums"))); - - return JSON_SEM_ACTION_FAILED; - } - attrsort = palloc0(natts * sizeof(AttrNumber)); - - /* Create the MVNDistinctItem */ - item = palloc(sizeof(MVNDistinctItem)); - item->nattributes = natts; - item->attributes = palloc0(natts * sizeof(AttrNumber)); - item->ndistinct = (double) parse->ndistinct; - - /* fill out both attnum list and sortable list */ - for (int i = 0; i < natts; i++) - { - attrsort[i] = (AttrNumber) parse->attnum_list->elements[i].int_value; - item->attributes[i] = attrsort[i]; - } - - /* Check attrsort for uniqueness */ - qsort(attrsort, natts, sizeof(AttrNumber), attnum_compare); - for (int i = 1; i < natts; i++) - if (attrsort[i] == attrsort[i - 1]) - { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("attnum list duplicate value found: %d", attrsort[i]))); - - return JSON_SEM_ACTION_FAILED; - } - pfree(attrsort); - - parse->distinct_items = lappend(parse->distinct_items, (void *) item); - - /* reset item state vars */ - list_free(parse->attnum_list); - parse->attnum_list = NIL; - parse->ndistinct = 0; - parse->found_attributes = false; - parse->found_ndistinct = false; - - /* Now we are looking for the next MVNDistinctItem */ - parse->state = NDIST_EXPECT_ITEM; - return JSON_SUCCESS; -} - - -/* - * ndsitinct input format has two types of arrays, the outer MVNDistinctItem - * array, and the attnum list array within each MVNDistinctItem. - */ -static JsonParseErrorType -ndistinct_array_start(void *state) -{ - ndistinctParseState *parse = state; - - switch (parse->state) - { - case NDIST_EXPECT_ATTNUM_LIST: - parse->state = NDIST_EXPECT_ATTNUM; - break; - - case NDIST_EXPECT_START: - parse->state = NDIST_EXPECT_ITEM; - break; - - default: - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Array found in unexpected place"))); - return JSON_SEM_ACTION_FAILED; - } - - return JSON_SUCCESS; -} - - -static JsonParseErrorType -ndistinct_array_end(void *state) -{ - ndistinctParseState *parse = state; - - /* The attnum list is complete, look for more MVNDistinctItem keys */ - if (parse->state == NDIST_EXPECT_ATTNUM) - { - parse->state = NDIST_EXPECT_KEY; - return JSON_SUCCESS; - } - - if (parse->state == NDIST_EXPECT_ITEM) - { - parse->state = NDIST_EXPECT_COMPLETE; - return JSON_SUCCESS; - } - - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Array found in unexpected place"))); - return JSON_SEM_ACTION_FAILED; -} - - -/* - * The valid keys for the MVNDistinctItem object are: - * - attributes - * - ndistinct - */ -static JsonParseErrorType -ndistinct_object_field_start(void *state, char *fname, bool isnull) -{ - ndistinctParseState *parse = state; - - const char *attributes = "attributes"; - const char *ndistinct = "ndistinct"; - - if (strcmp(fname, attributes) == 0) - { - parse->found_attributes = true; - parse->state = NDIST_EXPECT_ATTNUM_LIST; - return JSON_SUCCESS; - } - - if (strcmp(fname, ndistinct) == 0) - { - parse->found_ndistinct = true; - parse->state = NDIST_EXPECT_NDISTINCT; - return JSON_SUCCESS; - } - - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Only allowed keys are \%s\" and \%s\".", attributes, ndistinct))); - return JSON_SEM_ACTION_FAILED; -} - -/* - * - */ -static JsonParseErrorType -ndistinct_array_element_start(void *state, bool isnull) -{ - ndistinctParseState *parse = state; - - if (parse->state == NDIST_EXPECT_ATTNUM) - { - if (isnull) - { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Attnum list elements cannot be null."))); - - return JSON_SEM_ACTION_FAILED; - } - return JSON_SUCCESS; - } - - if (parse->state == NDIST_EXPECT_ITEM) - { - if (isnull) - { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Item list elements cannot be null."))); - - return JSON_SEM_ACTION_FAILED; - } - - return JSON_SUCCESS; - } - - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Unexpected array element."))); - - return JSON_SEM_ACTION_FAILED; -} - -/* - * Handle scalar events from the ndistinct input parser. - * - */ -static JsonParseErrorType -ndistinct_scalar(void *state, char *token, JsonTokenType tokentype) -{ - ndistinctParseState *parse = state; - - if (parse->state == NDIST_EXPECT_ATTNUM) - { - AttrNumber attnum = pg_strtoint16_safe(token, parse->escontext); - - if (SOFT_ERROR_OCCURRED(parse->escontext)) - return JSON_SEM_ACTION_FAILED; - - parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum); - return JSON_SUCCESS; - } - - if (parse->state == NDIST_EXPECT_NDISTINCT) - { - /* - * While the structure dictates that ndistinct in a double precision - * floating point, in practice it has always been an integer, and it - * is output as such. Therefore, we follow usage precendent over the - * actual storage structure, and read it in as an integer. - */ - parse->ndistinct = pg_strtoint64_safe(token, parse->escontext); - - if (SOFT_ERROR_OCCURRED(parse->escontext)) - return JSON_SEM_ACTION_FAILED; - - parse->state = NDIST_EXPECT_KEY; - return JSON_SUCCESS; - } - - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Unexpected scalar."))); - - return JSON_SEM_ACTION_FAILED; -} - -/* - * pg_ndistinct_in - * input routine for type pg_ndistinct - * - * example input: - * [{"attributes": [6, -1], "ndistinct": 14}, - * {"attributes": [6, -2], "ndistinct": 9143}, - * {"attributes": [-1,-2], "ndistinct": 13454}, - * {"attributes": [6, -1, -2], "ndistinct": 14549}] - */ -Datum -pg_ndistinct_in(PG_FUNCTION_ARGS) -{ - char *str = PG_GETARG_CSTRING(0); - - ndistinctParseState parse_state; - JsonParseErrorType result; - JsonLexContext *lex; - JsonSemAction sem_action; - - /* initialize semantic state */ - parse_state.str = str; - parse_state.state = NDIST_EXPECT_START; - parse_state.distinct_items = NIL; - parse_state.escontext = fcinfo->context; - parse_state.found_attributes = false; - parse_state.found_ndistinct = false; - parse_state.attnum_list = NIL; - parse_state.ndistinct = 0; - - /* set callbacks */ - sem_action.semstate = (void *) &parse_state; - sem_action.object_start = ndistinct_object_start; - sem_action.object_end = ndistinct_object_end; - sem_action.array_start = ndistinct_array_start; - sem_action.array_end = ndistinct_array_end; - sem_action.object_field_start = ndistinct_object_field_start; - sem_action.object_field_end = NULL; - sem_action.array_element_start = ndistinct_array_element_start; - sem_action.array_element_end = NULL; - sem_action.scalar = ndistinct_scalar; - - lex = makeJsonLexContextCstringLen(NULL, str, strlen(str), - PG_UTF8, true); - result = pg_parse_json(lex, &sem_action); - freeJsonLexContext(lex); - - if (result == JSON_SUCCESS) - { - MVNDistinct *ndistinct; - int nitems = parse_state.distinct_items->length; - bytea *bytes; - - ndistinct = palloc(offsetof(MVNDistinct, items) + - nitems * sizeof(MVNDistinctItem)); - - ndistinct->magic = STATS_NDISTINCT_MAGIC; - ndistinct->type = STATS_NDISTINCT_TYPE_BASIC; - ndistinct->nitems = nitems; - - for (int i = 0; i < nitems; i++) - { - MVNDistinctItem *item = parse_state.distinct_items->elements[i].ptr_value; - - ndistinct->items[i].ndistinct = item->ndistinct; - ndistinct->items[i].nattributes = item->nattributes; - ndistinct->items[i].attributes = item->attributes; - - /* - * free the MVNDistinctItem, but not the attributes we're still - * using - */ - pfree(item); - } - bytes = statext_ndistinct_serialize(ndistinct); - - list_free(parse_state.distinct_items); - for (int i = 0; i < nitems; i++) - pfree(ndistinct->items[i].attributes); - pfree(ndistinct); - - PG_RETURN_BYTEA_P(bytes); - } - else if (result == JSON_SEM_ACTION_FAILED) - PG_RETURN_NULL(); /* escontext already set */ - - /* Anything else is a generic JSON parse error */ - ereturn(parse_state.escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", str), - errdetail("Must be valid JSON."))); - PG_RETURN_NULL(); -} - -/* - * Free allocations of an MVNDistinct - */ -void -free_pg_ndistinct(MVNDistinct *ndistinct) -{ - for (int i = 0; i < ndistinct->nitems; i++) - pfree(ndistinct->items[i].attributes); - - pfree(ndistinct); -} - /* * Validate an MVNDistinct against the extended statistics object definition. * @@ -836,74 +375,6 @@ pg_ndistinct_validate_items(MVNDistinct *ndistinct, int2vector *stxkeys, int num } -/* - * pg_ndistinct - * output routine for type pg_ndistinct - * - * Produces a human-readable representation of the value, in the format: - * [{"attributes": [attnum,. ..], "ndistinct": int}, ...] - * - */ -Datum -pg_ndistinct_out(PG_FUNCTION_ARGS) -{ - bytea *data = PG_GETARG_BYTEA_PP(0); - MVNDistinct *ndist = statext_ndistinct_deserialize(data); - int i; - StringInfoData str; - - initStringInfo(&str); - appendStringInfoChar(&str, '['); - - for (i = 0; i < ndist->nitems; i++) - { - MVNDistinctItem item = ndist->items[i]; - - if (i > 0) - appendStringInfoString(&str, ", "); - - if (item.nattributes <= 0) - elog(ERROR, "invalid zero-length attribute array in MVNDistinct"); - - appendStringInfo(&str, "{\"attributes\": [%d", item.attributes[0]); - - for (int j = 1; j < item.nattributes; j++) - appendStringInfo(&str, ", %d", item.attributes[j]); - - appendStringInfo(&str, "], \"ndistinct\": %d}", (int) item.ndistinct); - } - - appendStringInfoChar(&str, ']'); - - PG_RETURN_CSTRING(str.data); -} - -/* - * pg_ndistinct_recv - * binary input routine for type pg_ndistinct - */ -Datum -pg_ndistinct_recv(PG_FUNCTION_ARGS) -{ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot accept a value of type %s", "pg_ndistinct"))); - - PG_RETURN_VOID(); /* keep compiler quiet */ -} - -/* - * pg_ndistinct_send - * binary output routine for type pg_ndistinct - * - * n-distinct is serialized into a bytea value, so let's send that. - */ -Datum -pg_ndistinct_send(PG_FUNCTION_ARGS) -{ - return byteasend(fcinfo); -} - /* * ndistinct_for_combination * Estimates number of distinct values in a combination of columns. diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index cc68ac545a5..70ff8e45516 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -85,6 +85,7 @@ OBJS = \ pg_locale_icu.o \ pg_locale_libc.o \ pg_lsn.o \ + pg_ndistinct.o \ pg_upgrade_support.o \ pgstatfuncs.o \ pseudorandomfuncs.o \ diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index 12fa0c20912..b6b642c77a0 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -81,6 +81,7 @@ backend_sources += files( 'pg_locale_icu.c', 'pg_locale_libc.c', 'pg_lsn.c', + 'pg_ndistinct.c', 'pg_upgrade_support.c', 'pgstatfuncs.c', 'pseudorandomfuncs.c', diff --git a/src/backend/utils/adt/pg_ndistinct.c b/src/backend/utils/adt/pg_ndistinct.c new file mode 100644 index 00000000000..9dd24f06ecf --- /dev/null +++ b/src/backend/utils/adt/pg_ndistinct.c @@ -0,0 +1,550 @@ +/*------------------------------------------------------------------------- + * + * pg_ndistinct.c + * pg_ndistinct data type support. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/pg_ndistinct.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "common/int.h" +#include "common/jsonapi.h" +#include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" +#include "nodes/pg_list.h" +#include "statistics/statistics.h" +#include "statistics/extended_stats_internal.h" +#include "utils/builtins.h" +#include "utils/syscache.h" + +/* + * The valid keys for the pg_ndistinct object: + */ +static const char *attributes = "attributes"; +static const char *ndistinct = "ndistinct"; + +typedef enum +{ + NDIST_EXPECT_START = 0, + NDIST_EXPECT_ITEM, + NDIST_EXPECT_KEY, + NDIST_EXPECT_ATTNUM_LIST, + NDIST_EXPECT_ATTNUM, + NDIST_EXPECT_NDISTINCT, + NDIST_EXPECT_COMPLETE +} ndistinctSemanticState; + +typedef struct +{ + const char *str; + ndistinctSemanticState state; + + List *distinct_items; /* Accumulated complete MVNDistinctItems */ + Node *escontext; + + bool found_attributes; /* Item has an attributes key */ + bool found_ndistinct; /* Item has ndistinct key */ + List *attnum_list; /* Accumulated attributes attnums */ + int64 ndistinct; +} ndistinctParseState; + +/* + * Invoked at the start of each MVNDistinctItem. + * + * The entire JSON document should be one array of MVNDistinctItem objects. + * + * If we're anywhere else in the document, it's an error. + */ +static JsonParseErrorType +ndistinct_object_start(void *state) +{ + ndistinctParseState *parse = state; + + if (parse->state != NDIST_EXPECT_ITEM) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Expected Item object"))); + return JSON_SEM_ACTION_FAILED; + } + + /* Now we expect to see attributes/ndistinct keys */ + parse->state = NDIST_EXPECT_KEY; + return JSON_SUCCESS; +} + +/* + * Routine to allow qsorting of AttNumbers + */ +static int +attnum_compare(const void *aptr, const void *bptr) +{ + AttrNumber a = *(const AttrNumber *) aptr; + AttrNumber b = *(const AttrNumber *) bptr; + + return pg_cmp_s16(a, b); +} + + +/* + * Invoked at the end of an object. + * + * Check to ensure that it was a complete MVNDistinctItem + * + */ +static JsonParseErrorType +ndistinct_object_end(void *state) +{ + ndistinctParseState *parse = state; + + int natts = 0; + AttrNumber *attrsort; + + MVNDistinctItem *item; + + if (!parse->found_attributes) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item must contain \"%s\" key", attributes))); + return JSON_SEM_ACTION_FAILED; + } + + if (!parse->found_ndistinct) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item must contain \"%s\" key", ndistinct))); + return JSON_SEM_ACTION_FAILED; + } + + if (parse->attnum_list == NIL) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("The \"%s\" key must be an non-empty array", attributes))); + return JSON_SEM_ACTION_FAILED; + } + + /* + * We need at least 2 attnums for a ndistinct item, anything less is + * malformed. + */ + natts = parse->attnum_list->length; + if (natts < 2) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("The \"%s\" key must contain an array of at least two attnums", attributes))); + + return JSON_SEM_ACTION_FAILED; + } + attrsort = palloc0(natts * sizeof(AttrNumber)); + + /* Create the MVNDistinctItem */ + item = palloc(sizeof(MVNDistinctItem)); + item->nattributes = natts; + item->attributes = palloc0(natts * sizeof(AttrNumber)); + item->ndistinct = (double) parse->ndistinct; + + /* fill out both attnum list and sortable list */ + for (int i = 0; i < natts; i++) + { + attrsort[i] = (AttrNumber) parse->attnum_list->elements[i].int_value; + item->attributes[i] = attrsort[i]; + } + + /* Check attrsort for uniqueness */ + qsort(attrsort, natts, sizeof(AttrNumber), attnum_compare); + for (int i = 1; i < natts; i++) + if (attrsort[i] == attrsort[i - 1]) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("attnum list duplicate value found: %d", attrsort[i]))); + + return JSON_SEM_ACTION_FAILED; + } + pfree(attrsort); + + parse->distinct_items = lappend(parse->distinct_items, (void *) item); + + /* reset item state vars */ + list_free(parse->attnum_list); + parse->attnum_list = NIL; + parse->ndistinct = 0; + parse->found_attributes = false; + parse->found_ndistinct = false; + + /* Now we are looking for the next MVNDistinctItem */ + parse->state = NDIST_EXPECT_ITEM; + return JSON_SUCCESS; +} + + +/* + * ndsitinct input format has two types of arrays, the outer MVNDistinctItem + * array, and the attnum list array within each MVNDistinctItem. + */ +static JsonParseErrorType +ndistinct_array_start(void *state) +{ + ndistinctParseState *parse = state; + + switch (parse->state) + { + case NDIST_EXPECT_ATTNUM_LIST: + parse->state = NDIST_EXPECT_ATTNUM; + break; + + case NDIST_EXPECT_START: + parse->state = NDIST_EXPECT_ITEM; + break; + + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Array found in unexpected place"))); + return JSON_SEM_ACTION_FAILED; + } + + return JSON_SUCCESS; +} + + +/* + * + */ +static JsonParseErrorType +ndistinct_array_end(void *state) +{ + ndistinctParseState *parse = state; + + /* The attnum list is complete, look for more MVNDistinctItem keys */ + if (parse->state == NDIST_EXPECT_ATTNUM) + { + parse->state = NDIST_EXPECT_KEY; + return JSON_SUCCESS; + } + + if (parse->state == NDIST_EXPECT_ITEM) + { + parse->state = NDIST_EXPECT_COMPLETE; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Array found in unexpected place"))); + return JSON_SEM_ACTION_FAILED; +} + + +/* + * + */ +static JsonParseErrorType +ndistinct_object_field_start(void *state, char *fname, bool isnull) +{ + ndistinctParseState *parse = state; + + if (strcmp(fname, attributes) == 0) + { + parse->found_attributes = true; + parse->state = NDIST_EXPECT_ATTNUM_LIST; + return JSON_SUCCESS; + } + + if (strcmp(fname, ndistinct) == 0) + { + parse->found_ndistinct = true; + parse->state = NDIST_EXPECT_NDISTINCT; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Only allowed keys are \%s\" and \%s\".", attributes, ndistinct))); + return JSON_SEM_ACTION_FAILED; +} + +/* + * + */ +static JsonParseErrorType +ndistinct_array_element_start(void *state, bool isnull) +{ + ndistinctParseState *parse = state; + + if (parse->state == NDIST_EXPECT_ATTNUM) + { + if (isnull) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Attnum list elements cannot be null."))); + + return JSON_SEM_ACTION_FAILED; + } + return JSON_SUCCESS; + } + + if (parse->state == NDIST_EXPECT_ITEM) + { + if (isnull) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item list elements cannot be null."))); + + return JSON_SEM_ACTION_FAILED; + } + + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Unexpected array element."))); + + return JSON_SEM_ACTION_FAILED; +} + +/* + * Handle scalar events from the ndistinct input parser. + * + */ +static JsonParseErrorType +ndistinct_scalar(void *state, char *token, JsonTokenType tokentype) +{ + ndistinctParseState *parse = state; + + if (parse->state == NDIST_EXPECT_ATTNUM) + { + AttrNumber attnum = pg_strtoint16_safe(token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum); + return JSON_SUCCESS; + } + + if (parse->state == NDIST_EXPECT_NDISTINCT) + { + /* + * While the structure dictates that ndistinct in a double precision + * floating point, in practice it has always been an integer, and it + * is output as such. Therefore, we follow usage precendent over the + * actual storage structure, and read it in as an integer. + */ + parse->ndistinct = pg_strtoint64_safe(token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + parse->state = NDIST_EXPECT_KEY; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Unexpected scalar."))); + + return JSON_SEM_ACTION_FAILED; +} + + +/* + * Free allocations of an MVNDistinct + */ +void +free_pg_ndistinct(MVNDistinct *ndistinct) +{ + for (int i = 0; i < ndistinct->nitems; i++) + pfree(ndistinct->items[i].attributes); + + pfree(ndistinct); +} + + +/* + * pg_ndistinct_in + * input routine for type pg_ndistinct + * + * example input: + * [{"attributes": [6, -1], "ndistinct": 14}, + * {"attributes": [6, -2], "ndistinct": 9143}, + * {"attributes": [-1,-2], "ndistinct": 13454}, + * {"attributes": [6, -1, -2], "ndistinct": 14549}] + */ +Datum +pg_ndistinct_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + + ndistinctParseState parse_state; + JsonParseErrorType result; + JsonLexContext *lex; + JsonSemAction sem_action; + + /* initialize semantic state */ + parse_state.str = str; + parse_state.state = NDIST_EXPECT_START; + parse_state.distinct_items = NIL; + parse_state.escontext = fcinfo->context; + parse_state.found_attributes = false; + parse_state.found_ndistinct = false; + parse_state.attnum_list = NIL; + parse_state.ndistinct = 0; + + /* set callbacks */ + sem_action.semstate = (void *) &parse_state; + sem_action.object_start = ndistinct_object_start; + sem_action.object_end = ndistinct_object_end; + sem_action.array_start = ndistinct_array_start; + sem_action.array_end = ndistinct_array_end; + sem_action.object_field_start = ndistinct_object_field_start; + sem_action.object_field_end = NULL; + sem_action.array_element_start = ndistinct_array_element_start; + sem_action.array_element_end = NULL; + sem_action.scalar = ndistinct_scalar; + + lex = makeJsonLexContextCstringLen(NULL, str, strlen(str), + PG_UTF8, true); + result = pg_parse_json(lex, &sem_action); + freeJsonLexContext(lex); + + if (result == JSON_SUCCESS) + { + MVNDistinct *ndistinct; + int nitems = parse_state.distinct_items->length; + bytea *bytes; + + ndistinct = palloc(offsetof(MVNDistinct, items) + + nitems * sizeof(MVNDistinctItem)); + + ndistinct->magic = STATS_NDISTINCT_MAGIC; + ndistinct->type = STATS_NDISTINCT_TYPE_BASIC; + ndistinct->nitems = nitems; + + for (int i = 0; i < nitems; i++) + { + MVNDistinctItem *item = parse_state.distinct_items->elements[i].ptr_value; + + ndistinct->items[i].ndistinct = item->ndistinct; + ndistinct->items[i].nattributes = item->nattributes; + ndistinct->items[i].attributes = item->attributes; + + /* + * free the MVNDistinctItem, but not the attributes we're still + * using + */ + pfree(item); + } + bytes = statext_ndistinct_serialize(ndistinct); + + list_free(parse_state.distinct_items); + for (int i = 0; i < nitems; i++) + pfree(ndistinct->items[i].attributes); + pfree(ndistinct); + + PG_RETURN_BYTEA_P(bytes); + } + else if (result == JSON_SEM_ACTION_FAILED) + PG_RETURN_NULL(); /* escontext already set */ + + /* Anything else is a generic JSON parse error */ + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", str), + errdetail("Must be valid JSON."))); + PG_RETURN_NULL(); +} + +/* + * pg_ndistinct + * output routine for type pg_ndistinct + * + * Produces a human-readable representation of the value, in the format: + * [{"attributes": [attnum,. ..], "ndistinct": int}, ...] + * + */ +Datum +pg_ndistinct_out(PG_FUNCTION_ARGS) +{ + bytea *data = PG_GETARG_BYTEA_PP(0); + MVNDistinct *ndist = statext_ndistinct_deserialize(data); + int i; + StringInfoData str; + + initStringInfo(&str); + appendStringInfoChar(&str, '['); + + for (i = 0; i < ndist->nitems; i++) + { + MVNDistinctItem item = ndist->items[i]; + + if (i > 0) + appendStringInfoString(&str, ", "); + + if (item.nattributes <= 0) + elog(ERROR, "invalid zero-length attribute array in MVNDistinct"); + + appendStringInfo(&str, "{\"%s\": [%d", attributes, item.attributes[0]); + + for (int j = 1; j < item.nattributes; j++) + appendStringInfo(&str, ", %d", item.attributes[j]); + + appendStringInfo(&str, "], \"%s\": %d}", ndistinct, (int) item.ndistinct); + } + + appendStringInfoChar(&str, ']'); + + PG_RETURN_CSTRING(str.data); +} + +/* + * pg_ndistinct_recv + * binary input routine for type pg_ndistinct + */ +Datum +pg_ndistinct_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_ndistinct_send + * binary output routine for type pg_ndistinct + * + * n-distinct is serialized into a bytea value, so let's send that. + */ +Datum +pg_ndistinct_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} -- 2.51.1