From 624ab4a4fc27984d35d878a6760080322a79dee5 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Thu, 7 Apr 2022 23:36:50 -0400 Subject: [PATCH v3 09/11] Documentation for SQL/JSON features Nikita Glukhov Reviewers have included (in no particular order) Andres Freund, Alexander Korotkov, Pavel Stehule, Andrew Alsup, Erik Rijkers, Zhihong Yu, Himanshu Upadhyaya, Daniel Gustafsson, Justin Pryzby. Discussion: https://postgr.es/m/cd0bb935-0158-78a7-08b5-904886deac4b@postgrespro.ru Discussion: https://postgr.es/m/7e2cb85d-24cf-4abb-30a5-1a33715959bd@postgrespro.ru --- doc/src/sgml/func.sgml | 1061 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 1057 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index e09e289a43..fe6007e93b 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -17596,7 +17596,937 @@ $.* ? (@ like_regex "^\\d+$") - + + + + SQL/JSON Functions and Expressions + + SQL/JSON + functions and expressions + + + + To provide native support for JSON data types within the SQL environment, + PostgreSQL implements the + SQL/JSON data model. + This model comprises sequences of items. Each item can hold SQL scalar + values, with an additional SQL/JSON null value, and composite data structures + that use JSON arrays and objects. The model is a formalization of the implied + data model in the JSON specification + RFC 7159. + + + + SQL/JSON allows you to handle JSON data alongside regular SQL data, + with transaction support, including: + + + + + + Uploading JSON data into the database and storing it in + regular SQL columns as character or binary strings. + + + + + Generating JSON objects and arrays from relational data. + + + + + Querying JSON data using SQL/JSON query functions and + SQL/JSON path language expressions. + + + + + + There are two groups of SQL/JSON functions. + Constructor functions + generate JSON data from values of SQL types. + Query functions + evaluate SQL/JSON path language expressions against JSON values + and produce values of SQL/JSON types, which are converted to SQL types. + + + + Many SQL/JSON functions have an optional FORMAT + clause. This is provided to conform with the SQL standard, but has no + effect except where noted otherwise. + + + + lists the SQL/JSON + Constructor functions. Each function has a RETURNING + clause specifying the data type returned. For the json and + json_scalar functions, this needs to be either json or + jsonb. For the other constructor functions it must be one of json, + jsonb, bytea, a character string type (text, char, + varchar, or nchar), or a type for which there is a cast + from json to that type. + By default, the json type is returned. + + + + + Many of the results that can be obtained from the SQL/JSON Constructor + functions can also be obtained by calling + PostgreSQL-specific functions detailed in + and + . + + + + + SQL/JSON Constructor Functions + + + + + Function signature + + + Description + + + Example(s) + + + + + + + json constructor + json ( + expression + FORMAT JSON ENCODING UTF8 + { WITH | WITHOUT } UNIQUE KEYS + RETURNING json_data_type ) + + + The expression can be any text type or a + bytea in UTF8 encoding. If the + expression is NULL, an + SQL null value is returned. + If WITH UNIQUE is specified, the + expression must not contain any duplicate + object keys. + + + json('{"a":123, "b":[true,"foo"], "a":"bar"}') + {"a":123, "b":[true,"foo"], "a":"bar"} + + + json('{"a":123,"b":[true,"foo"],"a":"bar"}' returning jsonb) + {"a": "bar", "b": [true, "foo"]} + + + + + json_scalar + json_scalar (expression + RETURNING json_data_type ) + + + Returns a JSON scalar value representing + expression. + If the input is NULL, an SQL NULL is returned. If the input is a number + or a boolean value, a corresponding JSON number or boolean value is + returned. For any other value a JSON string is returned. + + + json_scalar(123.45) + 123.45 + + + json_scalar(CURRENT_TIMESTAMP) + "2022-05-10T10:51:04.62128-04:00" + + + + + json_object + json_object ( + { key_expression { VALUE | ':' } + value_expression FORMAT JSON ENCODING UTF8 }, ... + { NULL | ABSENT } ON NULL + { WITH | WITHOUT } UNIQUE KEYS + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + Constructs a JSON object of all the key value pairs given, + or an empty object if none are given. + key_expression is a scalar expression + defining the JSON key, which is + converted to the text type. + It cannot be NULL nor can it + belong to a type that has a cast to the json. + If WITH UNIQUE is specified, there must not + be any duplicate key_expression. + If ABSENT ON NULL is specified, the entire + pair is omitted if the value_expression + is NULL. + + + json_object('code' VALUE 'P123', 'title': 'Jaws') + {"code" : "P123", "title" : "Jaws"} + + + + + json_objectagg + json_objectagg ( + { key_expression { VALUE | ':' } value_expression } + { NULL | ABSENT } ON NULL + { WITH | WITHOUT } UNIQUE KEYS + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + Behaves like json_object above, but as an + aggregate function, so it only takes one + key_expression and one + value_expression parameter. + + + SELECT json_objectagg(k:v) FROM (VALUES ('a'::text,current_date),('b',current_date + 1)) AS t(k,v) + { "a" : "2022-05-10", "b" : "2022-05-11" } + + + + + json_array + json_array ( + { value_expression FORMAT JSON } , ... + { NULL | ABSENT } ON NULL + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + json_array ( + query_expression + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + Constructs a JSON array from either a series of + value_expression parameters or from the results + of query_expression, + which must be a SELECT query returning a single column. If + ABSENT ON NULL is specified, NULL values are ignored. + This is always the case if a + query_expression is used. + + + json_array(1,true,json '{"a":null}') + [1, true, {"a":null}] + + + json_array(SELECT * FROM (VALUES(1),(2)) t) + [1, 2] + + + + + json_arrayagg + json_arrayagg ( + value_expression + ORDER BY sort_expression + { NULL | ABSENT } ON NULL + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + Behaves in the same way as json_array + but as an aggregate function so it only takes one + value_expression parameter. + If ABSENT ON NULL is specified, any NULL + values are omitted. + If ORDER BY is specified, the elements will + appear in the array in that order rather than in the input order. + + + SELECT json_arrayagg(v) FROM (VALUES(2),(1)) t(v) + [2, 1] + + + + +
+ + + details SQL/JSON + facilities for testing and serializing JSON. + + + + SQL/JSON Testing and Serializing Functions + + + + + Function signature + + + Description + + + Example(s) + + + + + + + IS JSON + expression IS NOT JSON + { VALUE | SCALAR | ARRAY | OBJECT } + { WITH | WITHOUT } UNIQUE KEYS + + + This predicate tests whether expression can be + parsed as JSON, possibly of a specified type. + If SCALAR or ARRAY or + OBJECT is specified, the + test is whether or not the JSON is of that particular type. If + WITH UNIQUE is specified, then an any object in the + expression is also tested to see if it + has duplicate keys. + + + +SELECT js, + js IS JSON "json?", + js IS JSON SCALAR "scalar?", + js IS JSON OBJECT "object?", + js IS JSON ARRAY "array?" +FROM +(VALUES ('123'), ('"abc"'), ('{"a": "b"}'), +('[1,2]'),('abc')) foo(js); + js | json? | scalar? | object? | array? +------------+-------+---------+---------+-------- + 123 | t | t | f | f + "abc" | t | t | f | f + {"a": "b"} | t | f | t | f + [1,2] | t | f | f | t + abc | f | f | f | f + + + + + + json_serialize ( + expression FORMAT JSON ENCODING UTF8 + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + Transforms an SQL/JSON value into a character or binary string. The + expression can be of any JSON type, any + character string type, or bytea in UTF8 encoding. + The returned type can be any character string type or + bytea. The default is text. + + + json_serialize('{ "a" : 1 } ' RETURNING bytea) + \x7b20226122203a2031207d20 + + + + +
+ + + details the SQL/JSON + functions that can be used to query JSON data, except + for json_table. + + + + + SQL/JSON paths can only be applied to the jsonb type, so it + might be necessary to cast the context_item + argument of these functions to jsonb. + + + + + SQL/JSON Query Functions + + + + + Function signature + + + Description + + + Example(s) + + + + + + + json_exists + json_exists ( + context_item, path_expression PASSING { value AS varname } , ... + RETURNING data_type + { TRUE | FALSE | UNKNOWN | ERROR } ON ERROR ) + + + Returns true if the SQL/JSON path_expression + applied to the context_item using the + values yields any items. + The ON ERROR clause specifies what is returned if + an error occurs. Note that if the path_expression + is strict, an error is generated if it yields no items. + The default value is UNKNOWN which causes a NULL + result. + + + json_exists(jsonb '{"key1": [1,2,3]}', 'strict $.key1[*] ? (@ > 2)') + t + + + json_exists(jsonb '{"a": [1,2,3]}', 'lax $.a[5]' ERROR ON ERROR) + f + + + json_exists(jsonb '{"a": [1,2,3]}', 'strict $.a[5]' ERROR ON ERROR) + ERROR: jsonpath array subscript is out of bounds + + + + + json_value + json_value ( + context_item, path_expression + PASSING { value AS varname } , ... + RETURNING data_type + { ERROR | NULL | DEFAULT expression } ON EMPTY + { ERROR | NULL | DEFAULT expression } ON ERROR ) + + + Returns the result of applying the + path_expression to the + context_item using the + values. The extracted value must be + a single SQL/JSON scalar item. For results that + are objects or arrays, use the json_query + instead. + The returned data_type has the same semantics + as for constructor functions like json_objectagg. + The default returned type is text. + The ON EMPTY clause specifies the behavior if the + path_expression yields no value at all. + The ON ERROR clause specifies the behavior if an + error occurs, as a result of either the evaluation or the application + of the ON EMPTY clause. + + + json_value(jsonb '"123.45"', '$' RETURNING float) + 123.45 + + + json_value(jsonb '"03:04 2015-02-01"', '$.datetime("HH24:MI YYYY-MM-DD")' RETURNING date) + 2015-02-01 + + + json_value(jsonb '[1,2]', 'strict $[*]' DEFAULT 9 ON ERROR) + 9 + + + + + json_query + json_query ( + context_item, path_expression PASSING { value AS varname } , ... + RETURNING data_type FORMAT JSON ENCODING UTF8 + { WITHOUT | WITH { CONDITIONAL | UNCONDITIONAL } } ARRAY WRAPPER + { KEEP | OMIT } QUOTES ON SCALAR STRING + { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT expression } ON EMPTY + { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT expression } ON ERROR ) + + + Returns the result of applying the + path_expression to the + context_item using the + values. + This function must return a JSON string, so if the path expression + returns multiple SQL/JSON items, you must wrap the result using the + WITH WRAPPER clause. If the wrapper is + UNCONDITIONAL, an array wrapper will always + be applied, even if the returned value is already a single JSON object + or array, but if it is CONDITIONAL it will not be + applied to a single array or object. UNCONDITIONAL + is the default. + If the result is a scalar string, by default the value returned will have + surrounding quotes making it a valid JSON value. However, this behavior + is reversed if OMIT QUOTES is specified. + The ON ERROR and ON EMPTY + clauses have similar semantics to those clauses for + json_value. + The returned data_type has the same semantics + as for constructor functions like json_objectagg. + The default returned type is text. + + + json_query(jsonb '[1,[2,3],null]', 'lax $[*][1]' WITH CONDITIONAL WRAPPER) + [3] + + + + +
+ +
+ + + JSON_TABLE + + json_table + + + + json_table is an SQL/JSON function which + queries JSON data + and presents the results as a relational view, which can be accessed as a + regular SQL table. You can only use json_table inside the + FROM clause of a SELECT statement. + + + + Taking JSON data as input, json_table uses + a path expression to extract a part of the provided data that + will be used as a row pattern for the + constructed view. Each SQL/JSON item at the top level of the row pattern serves + as the source for a separate row in the constructed relational view. + + + + To split the row pattern into columns, json_table + provides the COLUMNS clause that defines the + schema of the created view. For each column to be constructed, + this clause provides a separate path expression that evaluates + the row pattern, extracts a JSON item, and returns it as a + separate SQL value for the specified column. If the required value + is stored in a nested level of the row pattern, it can be extracted + using the NESTED PATH subclause. Joining the + columns returned by NESTED PATH can add multiple + new rows to the constructed view. Such rows are called + child rows, as opposed to the parent row + that generates them. + + + + The rows produced by JSON_TABLE are laterally + joined to the row that generated them, so you do not have to explicitly join + the constructed view with the original table holding JSON + data. Optionally, you can specify how to join the columns returned + by NESTED PATH using the PLAN clause. + + + + Each NESTED PATH clause can generate one or more + columns. Columns produced by NESTED PATHs at the + same level are considered to be siblings, + while a column produced by a NESTED PATH is + considered to be a child of the column produced by and + NESTED PATH or row expression at a higher level. + Sibling columns are always joined first. Once they are processed, + the resulting rows are joined to the parent row. + + + + + + context_item, path_expression AS json_path_name PASSING { value AS varname } , ... + + + + The input data to query, the JSON path expression defining the query, + and an optional PASSING clause, which can provide data + values to the path_expression. + The result of the input data + evaluation is called the row pattern. The row + pattern is used as the source for row values in the constructed view. + + + + + + + COLUMNS( json_table_column , ... ) + + + + + The COLUMNS clause defining the schema of the + constructed view. In this clause, you must specify all the columns + to be filled with SQL/JSON items. + The json_table_column + expression has the following syntax variants: + + + + + + name type + PATH json_path_specification + + + + + Inserts a single SQL/JSON item into each row of + the specified column. + + + The provided PATH expression parses the + row pattern defined by json_api_common_syntax + and fills the column with produced SQL/JSON items, one for each row. + If the PATH expression is omitted, + JSON_TABLE uses the + $.name path expression, + where name is the provided column name. + In this case, the column name must correspond to one of the + keys within the SQL/JSON item produced by the row pattern. + + + Optionally, you can add ON EMPTY and + ON ERROR clauses to define how to handle missing values + or structural errors. + WRAPPER and QUOTES clauses can only + be used with JSON, array, and composite types. + These clauses have the same syntax and semantics as for + json_value and json_query. + + + + + + + name type FORMAT json_representation + PATH json_path_specification + + + + + Generates a column and inserts a composite SQL/JSON + item into each row of this column. + + + The provided PATH expression parses the + row pattern defined by json_api_common_syntax + and fills the column with produced SQL/JSON items, one for each row. + If the PATH expression is omitted, + JSON_TABLE uses the + $.name path expression, + where name is the provided column name. + In this case, the column name must correspond to one of the + keys within the SQL/JSON item produced by the row pattern. + + + Optionally, you can add WRAPPER, QUOTES, + ON EMPTY and ON ERROR clauses + to define additional settings for the returned SQL/JSON items. + These clauses have the same syntax and semantics as + for json_query. + + + + + + + name type + EXISTS PATH json_path_specification + + + + + Generates a column and inserts a boolean item into each row of this column. + + + The provided PATH expression parses the + row pattern defined by json_api_common_syntax, + checks whether any SQL/JSON items were returned, and fills the column with + resulting boolean value, one for each row. + The specified type should have cast from + boolean. + If the PATH expression is omitted, + JSON_TABLE uses the + $.name path expression, + where name is the provided column name. + + + Optionally, you can add ON ERROR clause to define + error behavior. This clause has the same syntax and semantics as + for json_exists. + + + + + + + NESTED PATH json_path_specification AS json_path_name + COLUMNS ( json_table_column , ... ) + + + + + Extracts SQL/JSON items from nested levels of the row pattern, + generates one or more columns as defined by the COLUMNS + subclause, and inserts the extracted SQL/JSON items into each row of these columns. + The json_table_column expression in the + COLUMNS subclause uses the same syntax as in the + parent COLUMNS clause. + + + + The NESTED PATH syntax is recursive, + so you can go down multiple nested levels by specifying several + NESTED PATH subclauses within each other. + It allows to unnest the hierarchy of JSON objects and arrays + in a single function invocation rather than chaining several + JSON_TABLE expressions in an SQL statement. + + + + You can use the PLAN clause to define how + to join the columns returned by NESTED PATH clauses. + + + + + + + name FOR ORDINALITY + + + + + Adds an ordinality column that provides sequential row numbering. + You can have only one ordinality column per table. Row numbering + is 1-based. For child rows that result from the NESTED PATH + clauses, the parent row number is repeated. + + + + + + + + + + + AS json_path_name + + + + + The optional json_path_name serves as an + identifier of the provided json_path_specification. + The path name must be unique and distinct from the column names. + When using the PLAN clause, you must specify the names + for all the paths, including the row pattern. Each path name can appear in + the PLAN clause only once. + + + + + + + PLAN ( json_table_plan ) + + + + + Defines how to join the data returned by NESTED PATH + clauses to the constructed view. + + + To join columns with parent/child relationship, you can use: + + + + + INNER + + + + + Use INNER JOIN, so that the parent row + is omitted from the output if it does not have any child rows + after joining the data returned by NESTED PATH. + + + + + + + OUTER + + + + + Use LEFT OUTER JOIN, so that the parent row + is always included into the output even if it does not have any child rows + after joining the data returned by NESTED PATH, with NULL values + inserted into the child columns if the corresponding + values are missing. + + + This is the default option for joining columns with parent/child relationship. + + + + + + + To join sibling columns, you can use: + + + + + + UNION + + + + + Generate one row for each value produced by each of the sibling + columns. The columns from the other siblings are set to null. + + + This is the default option for joining sibling columns. + + + + + + + CROSS + + + + + Generate one row for each combination of values from the sibling columns. + + + + + + + + + + + + PLAN DEFAULT ( OUTER | INNER , UNION | CROSS ) + + + + The terms can also be specified in reverse order. The + INNER or OUTER option defines the + joining plan for parent/child columns, while UNION or + CROSS affects joins of sibling columns. This form + of PLAN overrides the default plan for + all columns at once. Even though the path names are not included in the + PLAN DEFAULT form, to conform to the SQL/JSON standard + they must be provided for all the paths if the PLAN + clause is used. + + + PLAN DEFAULT is simpler than specifying a complete + PLAN, and is often all that is required to get the desired + output. + + + + + + Examples + + + In these examples the following small table storing some JSON data will be used: + +CREATE TABLE my_films ( js jsonb ); + +INSERT INTO my_films VALUES ( +'{ "favorites" : [ + { "kind" : "comedy", "films" : [ + { "title" : "Bananas", + "director" : "Woody Allen"}, + { "title" : "The Dinner Game", + "director" : "Francis Veber" } ] }, + { "kind" : "horror", "films" : [ + { "title" : "Psycho", + "director" : "Alfred Hitchcock" } ] }, + { "kind" : "thriller", "films" : [ + { "title" : "Vertigo", + "director" : "Alfred Hitchcock" } ] }, + { "kind" : "drama", "films" : [ + { "title" : "Yojimbo", + "director" : "Akira Kurosawa" } ] } + ] }'); + + + + Query the my_films table holding + some JSON data about the films and create a view that + distributes the film genre, title, and director between separate columns: + +SELECT jt.* FROM + my_films, + JSON_TABLE ( js, '$.favorites[*]' COLUMNS ( + id FOR ORDINALITY, + kind text PATH '$.kind', + NESTED PATH '$.films[*]' COLUMNS ( + title text PATH '$.title', + director text PATH '$.director'))) AS jt; +----+----------+------------------+------------------- + id | kind | title | director +----+----------+------------------+------------------- + 1 | comedy | Bananas | Woody Allen + 1 | comedy | The Dinner Game | Francis Veber + 2 | horror | Psycho | Alfred Hitchcock + 3 | thriller | Vertigo | Alfred Hitchcock + 4 | drama | Yojimbo | Akira Kurosawa + (5 rows) + + + + + Find a director that has done films in two different genres: + +SELECT + director1 AS director, title1, kind1, title2, kind2 +FROM + my_films, + JSON_TABLE ( js, '$.favorites' AS favs COLUMNS ( + NESTED PATH '$[*]' AS films1 COLUMNS ( + kind1 text PATH '$.kind', + NESTED PATH '$.films[*]' AS film1 COLUMNS ( + title1 text PATH '$.title', + director1 text PATH '$.director') + ), + NESTED PATH '$[*]' AS films2 COLUMNS ( + kind2 text PATH '$.kind', + NESTED PATH '$.films[*]' AS film2 COLUMNS ( + title2 text PATH '$.title', + director2 text PATH '$.director' + ) + ) + ) + PLAN (favs OUTER ((films1 INNER film1) CROSS (films2 INNER film2))) + ) AS jt + WHERE kind1 > kind2 AND director1 = director2; + + director | title1 | kind1 | title2 | kind2 +------------------+---------+----------+--------+-------- + Alfred Hitchcock | Vertigo | thriller | Psycho | horror +(1 row) + + + + @@ -19974,6 +20904,29 @@ SELECT NULLIF(value, '(none)') ... No + + + + json_agg_strict + + json_agg_strict ( anyelement ) + json + + + + jsonb_agg_strict + + jsonb_agg_strict ( anyelement ) + jsonb + + + Collects all the input values, skipping nulls, into a JSON array. + Values are converted to JSON as per to_json + or to_jsonb. + + No + + @@ -19995,9 +20948,97 @@ SELECT NULLIF(value, '(none)') ... Collects all the key/value pairs into a JSON object. Key arguments - are coerced to text; value arguments are converted as - per to_json or to_jsonb. - Values can be null, but not keys. + are coerced to text; value arguments are converted as per + to_json or to_jsonb + Values can be null, but keys cannot. + + No + + + + + + json_object_agg_strict + + json_object_agg_strict ( + key "any", + value "any" ) + json + + + + jsonb_object_agg_strict + + jsonb_object_agg_strict ( + key "any", + value "any" ) + jsonb + + + Collects all the key/value pairs into a JSON object. Key arguments + are coerced to text; value arguments are converted as per + to_json or to_jsonb. + The key can not be null. If the + value is null then the entry is skipped, + + No + + + + + + json_object_agg_unique + + json_object_agg_unique ( + key "any", + value "any" ) + json + + + + jsonb_object_agg_unique + + jsonb_object_agg_unique ( + key "any", + value "any" ) + jsonb + + + Collects all the key/value pairs into a JSON object. Key arguments + are coerced to text; value arguments are converted as per + to_json or to_jsonb. + Values can be null, but keys cannot. + If there is a duplicate key an error is thrown. + + No + + + + + + json_object_agg_unique_strict + + json_object_agg_unique_strict ( + key "any", + value "any" ) + json + + + + jsonb_object_agg_unique_strict + + jsonb_object_agg_unique_strict ( + key "any", + value "any" ) + jsonb + + + Collects all the key/value pairs into a JSON object. Key arguments + are coerced to text; value arguments are converted as per + to_json or to_jsonb. + The key can not be null. If the + value is null then the entry is skipped, + If there is a duplicate key an error is thrown. No @@ -20175,7 +21216,12 @@ SELECT NULLIF(value, '(none)') ... The aggregate functions array_agg, json_agg, jsonb_agg, + json_agg_strict, jsonb_agg_strict, json_object_agg, jsonb_object_agg, + json_object_agg_strict, jsonb_object_agg_strict, + json_object_agg_unique, jsonb_object_agg_unique, + json_object_agg_unique_strict, + jsonb_object_agg_unique_strict, string_agg, and xmlagg, as well as similar user-defined aggregate functions, produce meaningfully different result values @@ -20195,6 +21241,13 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; subquery's output to be reordered before the aggregate is computed. + + + In addition to the JSON aggregates shown here, see the json_objectagg + and json_arrayagg constructors in . + + + ANY -- 2.35.3