diff --git a/contrib/json/Makefile b/contrib/json/Makefile new file mode 100644 index 0000000..d8b5410 --- /dev/null +++ b/contrib/json/Makefile @@ -0,0 +1,18 @@ +MODULE_big = json +OBJS = json.o json_io.o json_op.o compat.o + +EXTENSION = json +DATA = json--0.1.sql json--unpackaged--0.1.sql uninstall_json.sql + +REGRESS = init json validate condense json_stringify escape_unicode relocatable + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/json +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/json/compat.c b/contrib/json/compat.c new file mode 100644 index 0000000..fe6b62c --- /dev/null +++ b/contrib/json/compat.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * compat.c + * Compatibility routines to let the JSON module work in PostgreSQL 8.4 + * + * Copyright (c) 2011, PostgreSQL Global Development Group + * Arranged by Joey Adams . + * + *------------------------------------------------------------------------- + */ + +#include "compat.h" + +#if PG_VERSION_NUM < 80500 + +#include "access/htup.h" +#include "catalog/pg_proc.h" +#include "utils/syscache.h" + +/* + * get_func_namespace (lifted from 9.1devel) + * + * Returns the pg_namespace OID associated with a given function. + */ +Oid +json_compat_get_func_namespace(Oid funcid) +{ + HeapTuple tp; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_proc functup = (Form_pg_proc) GETSTRUCT(tp); + Oid result; + + result = functup->pronamespace; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +#endif /* PG_VERSION_NUM < 80500 */ + +#if PG_VERSION_NUM < 90100 + +/* + * Lifted from 9.1devel + * + * Convert a UTF-8 character to a Unicode code point. + * This is a one-character version of pg_utf2wchar_with_len. + * + * No error checks here, c must point to a long-enough string. + */ +pg_wchar +json_compat_utf8_to_unicode(const unsigned char *c) +{ + if ((*c & 0x80) == 0) + return (pg_wchar) c[0]; + else if ((*c & 0xe0) == 0xc0) + return (pg_wchar) (((c[0] & 0x1f) << 6) | + (c[1] & 0x3f)); + else if ((*c & 0xf0) == 0xe0) + return (pg_wchar) (((c[0] & 0x0f) << 12) | + ((c[1] & 0x3f) << 6) | + (c[2] & 0x3f)); + else if ((*c & 0xf8) == 0xf0) + return (pg_wchar) (((c[0] & 0x07) << 18) | + ((c[1] & 0x3f) << 12) | + ((c[2] & 0x3f) << 6) | + (c[3] & 0x3f)); + else + /* that is an invalid code on purpose */ + return 0xffffffff; +} + +#endif /* PG_VERSION_NUM < 90100 */ diff --git a/contrib/json/compat.h b/contrib/json/compat.h new file mode 100644 index 0000000..b384120 --- /dev/null +++ b/contrib/json/compat.h @@ -0,0 +1,43 @@ +/*------------------------------------------------------------------------- + * + * compat.h + * Compatibility routines to let the JSON module work in PostgreSQL 8.3 + * + * Copyright (c) 2011, PostgreSQL Global Development Group + * Arranged by Joey Adams . + * + *------------------------------------------------------------------------- + */ + +#ifndef JSON_COMPAT_H +#define JSON_COMPAT_H + +#include "postgres.h" +#include "mb/pg_wchar.h" + +#ifndef SearchSysCache1 +#define SearchSysCache1(cacheId, key1) \ + SearchSysCache(cacheId, key1, 0, 0, 0) +#endif + +#ifndef SearchSysCacheList1 +#define SearchSysCacheList1(cacheId, key1) \ + SearchSysCacheList(cacheId, 1, key1, 0, 0, 0) +#endif + +#ifndef GetSysCacheOid2 +#define GetSysCacheOid2(cacheId, key1, key2) \ + GetSysCacheOid(cacheId, key1, key2, 0, 0) +#endif + +#if PG_VERSION_NUM < 80500 +#define get_func_namespace json_compat_get_func_namespace +extern Oid json_compat_get_func_namespace(Oid funcid); +#endif + +#if PG_VERSION_NUM < 90100 +#define utf8_to_unicode json_compat_utf8_to_unicode +extern pg_wchar json_compat_utf8_to_unicode(const unsigned char *c); +#endif + +#endif diff --git a/contrib/json/expected/condense.out b/contrib/json/expected/condense.out new file mode 100644 index 0000000..8d8a2d9 --- /dev/null +++ b/contrib/json/expected/condense.out @@ -0,0 +1,83 @@ +SELECT json('"hello"'); + json +--------- + "hello" +(1 row) + +SELECT json($$"hello\u266Bworld"$$); + json +--------------- + "hello♫world" +(1 row) + +SELECT json($$"hello\u266bworld"$$); + json +--------------- + "hello♫world" +(1 row) + +SELECT json($$"hello♫world"$$); + json +--------------- + "hello♫world" +(1 row) + +SELECT json($$ "hello world" $$); + json +--------------- + "hello world" +(1 row) + +SELECT json($$ { "hello" : "world"} $$); + json +------------------- + {"hello":"world"} +(1 row) + +SELECT json($$ { "hello" : "world", "bye": 0.0001 } $$); + json +-------------------------------- + {"hello":"world","bye":0.0001} +(1 row) + +SELECT json($$ { "hello" : "world", + "bye": 0.0000001 +} $$); + json +----------------------------------- + {"hello":"world","bye":0.0000001} +(1 row) + +SELECT json($$ { "hello" : "world" +, +"bye" +: [-0.1234e1, 12345e0] } $$); + json +--------------------------------------------- + {"hello":"world","bye":[-0.1234e1,12345e0]} +(1 row) + +SELECT json($$"\u007E\u007F\u0080"$$); + json +----------------- + "~\u007F\u0080" +(1 row) + +SELECT json($$"\u00FE\u00FF\u0100"$$); + json +------- + "þÿĀ" +(1 row) + +SELECT json($$"\uD835\uDD4E"$$); + json +------ + "𝕎" +(1 row) + +SELECT json($$"\uD835\uD835"$$); + json +---------------- + "\uD835\uD835" +(1 row) + diff --git a/contrib/json/expected/escape_unicode.out b/contrib/json/expected/escape_unicode.out new file mode 100644 index 0000000..43affa2 --- /dev/null +++ b/contrib/json/expected/escape_unicode.out @@ -0,0 +1,80 @@ +SET client_encoding TO UTF8; +CREATE TABLE escape_unicode_test (json JSON); +INSERT INTO escape_unicode_test VALUES ($$"\u266b\uD835\uDD4E"$$); +-- Output should not be escaped. +SELECT json FROM escape_unicode_test; + json +------ + "♫𝕎" +(1 row) + +SELECT json::TEXT FROM escape_unicode_test; + json +------ + "♫𝕎" +(1 row) + +SELECT json_stringify(json) FROM escape_unicode_test; + json_stringify +---------------- + "♫𝕎" +(1 row) + +SELECT json_stringify(json, '') FROM escape_unicode_test; + json_stringify +---------------- + "♫𝕎" +(1 row) + +SET client_encoding TO SQL_ASCII; +-- Output should still not be escaped. +SELECT json FROM escape_unicode_test; + json +----------- + "♫𝕎" +(1 row) + +SELECT json::TEXT FROM escape_unicode_test; + json +----------- + "♫𝕎" +(1 row) + +SELECT json_stringify(json) FROM escape_unicode_test; + json_stringify +---------------- + "♫𝕎" +(1 row) + +SELECT json_stringify(json, '') FROM escape_unicode_test; + json_stringify +---------------- + "♫𝕎" +(1 row) + +SET client_encoding TO LATIN1; +-- Output should be escaped now. +SELECT json FROM escape_unicode_test; + json +---------------------- + "\u266B\uD835\uDD4E" +(1 row) + +SELECT json::TEXT FROM escape_unicode_test; + json +---------------------- + "\u266B\uD835\uDD4E" +(1 row) + +SELECT json_stringify(json) FROM escape_unicode_test; + json_stringify +---------------------- + "\u266B\uD835\uDD4E" +(1 row) + +SELECT json_stringify(json, '') FROM escape_unicode_test; + json_stringify +---------------------- + "\u266B\uD835\uDD4E" +(1 row) + diff --git a/contrib/json/expected/init.out b/contrib/json/expected/init.out new file mode 100644 index 0000000..25fdbb1 --- /dev/null +++ b/contrib/json/expected/init.out @@ -0,0 +1 @@ +\set ECHO none diff --git a/contrib/json/expected/json.out b/contrib/json/expected/json.out new file mode 100644 index 0000000..722788b --- /dev/null +++ b/contrib/json/expected/json.out @@ -0,0 +1,167 @@ +SELECT '[]'::JSON; + json +------ + [] +(1 row) + +SELECT '['::JSON; +ERROR: invalid input syntax for JSON +LINE 1: SELECT '['::JSON; + ^ +SELECT '[1,2,3]'::JSON; + json +--------- + [1,2,3] +(1 row) + +SELECT '[1,2,3]'::JSON::TEXT; + text +--------- + [1,2,3] +(1 row) + +SELECT '[1,2,3 ]'::JSON; + json +--------- + [1,2,3] +(1 row) + +SELECT '[1,2,3 ,4]'::JSON; + json +----------- + [1,2,3,4] +(1 row) + +SELECT '[1,2,3 ,4.0]'::JSON; + json +------------- + [1,2,3,4.0] +(1 row) + +SELECT '[1,2,3 ,4]'::JSON; + json +----------- + [1,2,3,4] +(1 row) + +SELECT 'true'::JSON; + json +------ + true +(1 row) + +SELECT 'true'::TEXT::JSON; + json +------ + true +(1 row) + +SELECT 'false'::JSON; + json +------- + false +(1 row) + +SELECT 'null'::JSON; + json +------ + null +(1 row) + +SELECT '1.1'::JSON; + json +------ + 1.1 +(1 row) + +SELECT '"string"'::JSON; + json +---------- + "string" +(1 row) + +SELECT '{"key1":"value1", "key2":"value2"}'::JSON; + json +----------------------------------- + {"key1":"value1","key2":"value2"} +(1 row) + +SELECT 15::JSON; + json +------ + 15 +(1 row) + +SELECT json_get_type('[]'); + json_get_type +--------------- + array +(1 row) + +SELECT json_get_type('{}'); + json_get_type +--------------- + object +(1 row) + +SELECT json_get_type('true'); + json_get_type +--------------- + bool +(1 row) + +SELECT json_get_type('false'); + json_get_type +--------------- + bool +(1 row) + +SELECT json_get_type('null'); + json_get_type +--------------- + null +(1 row) + +CREATE TABLE testjson (j JSON); +INSERT INTO testjson VALUES ('[1,2,3,4]'); +INSERT INTO testjson VALUES ('{"key":"value"}'); +INSERT INTO testjson VALUES ('{"key":"value"'); +ERROR: invalid input syntax for JSON +LINE 1: INSERT INTO testjson VALUES ('{"key":"value"'); + ^ +INSERT INTO testjson VALUES (''); +ERROR: invalid input syntax for JSON +LINE 1: INSERT INTO testjson VALUES (''); + ^ +INSERT INTO testjson VALUES ('""'); +INSERT INTO testjson VALUES ('true'); +INSERT INTO testjson VALUES ('false'); +INSERT INTO testjson VALUES ('null'); +INSERT INTO testjson VALUES ('[]'); +INSERT INTO testjson VALUES ('{}'); +SELECT * FROM testjson; + j +----------------- + [1,2,3,4] + {"key":"value"} + "" + true + false + null + [] + {} +(8 rows) + +SELECT json_get_type(j) FROM testjson; + json_get_type +--------------- + array + object + string + bool + bool + null + array + object +(8 rows) + diff --git a/contrib/json/expected/json_stringify.out b/contrib/json/expected/json_stringify.out new file mode 100644 index 0000000..6b39fb3 --- /dev/null +++ b/contrib/json/expected/json_stringify.out @@ -0,0 +1,162 @@ +-- Use unaligned output so results are consistent between PostgreSQL 8 and 9. +\a +SELECT json_stringify('false', ' '); +json_stringify +false +(1 row) +SELECT json_stringify('true', ' '); +json_stringify +true +(1 row) +SELECT json_stringify('null', ' '); +json_stringify +null +(1 row) +SELECT json_stringify('""', ' '); +json_stringify +"" +(1 row) +SELECT json_stringify('[]', ' '); +json_stringify +[ +] +(1 row) +SELECT json_stringify('[1]', ' '); +json_stringify +[ + 1 +] +(1 row) +SELECT json_stringify('[1,2]', ' '); +json_stringify +[ + 1, + 2 +] +(1 row) +SELECT json_stringify('{}', ' '); +json_stringify +{ +} +(1 row) +SELECT json_stringify('{"k":"v"}', ' '); +json_stringify +{ + "k": "v" +} +(1 row) +SELECT json_stringify('{"null":null, "boolean":true, "boolean" :false,"array":[1,2,3], "empty array":[], "empty object":{}}', ' '); +json_stringify +{ + "null": null, + "boolean": true, + "boolean": false, + "array": [ + 1, + 2, + 3 + ], + "empty array": [ + ], + "empty object": { + } +} +(1 row) +SELECT json_stringify(json(string)) FROM test_strings WHERE json_validate(string); +json_stringify +"\uD800\uD800" +"\uD800\uDBFF" +"\uDB00" +"\uDB00\uDBFF" +"￾" +"￿" +"" +[] +{} +0.5 +{"1":{}} +{"1":2} +{"1":2,"2.5":[3,4,{},{"5":["6"]}]} +{"1":2,"2.5":[3,4,{},{"5":["6"],"7":[8]}]} +1234 +-1234 +{"1":2,"3":4} +123.456e14234 +123.456e-14234 +123.456e+14234 +"1 " +"1 1" +2 +3 +"3" +[3] +3.2e+1 +[3,[4]] +[3,[4,[5]]] +[3,[4,[5],6]] +[3,[4,[5],6],7] +[3,[4,[5],6],7,8] +[3,[4,[5],6],7,8,9] +3e+1 +[7] +[7] +[7] +"hello" +["hello"] +["hello","bye"] +["hello","bye\n"] +["hello","bye\n\r\t"] +["hello","bye\n\r\t\b"] +["hello","bye\n\r\t\b",true] +["hello","bye\n\r\t\b",true,false] +["hello","bye\n\r\t\b",true,false,null] +{"hello":true} +{"hello":true,"bye":false} +{"hello":true,"bye":false,"foo":["one","two","three"]} +"hi" +["hi"] +["hi","bye"] +{"hi":"bye"} +["hi","bye",3] +["hi","bye[",3] +"\u0007" +"\u0008" +"\u0009" +"\u0010" +" " +"က0" +"ሴ" +"香9" +"𐀀" +"𐀀" +"𝄞" +"" +"�" +"￿" +[32,1] +"𐀀" +"\n" +"hello" +"hello\u0009world" +"hello" +"hello\n" +"hello" +3 +0.3 +0.3e+5 +0.3e-5 +0.3e5 +"hello" +-3 +-3.1 +0.5 +"\"" +"\"3.5" +"\"." +"\"....." +["\"\"\"\"",0.5] +["\".5",".5\"",0.5] +"'" +(93 rows) +-- Turn aligned output back on. +\a diff --git a/contrib/json/expected/relocatable.out b/contrib/json/expected/relocatable.out new file mode 100644 index 0000000..3af3f02 --- /dev/null +++ b/contrib/json/expected/relocatable.out @@ -0,0 +1,127 @@ +-- This test needs to be run last, and will only pass on PostgreSQL 9.1 and up. +DROP EXTENSION json CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table testjson column j +drop cascades to table escape_unicode_test column json +CREATE SCHEMA othernamespace; +CREATE EXTENSION json WITH SCHEMA othernamespace; +/* + * json_get_type uses its own OID to figure out what schema the type + * json_type is in so it can look up its enum label OIDs. + */ +SELECT othernamespace.json_get_type('[]'); + json_get_type +--------------- + array +(1 row) + +SELECT othernamespace.json_get_type('{}'); + json_get_type +--------------- + object +(1 row) + +SELECT othernamespace.json_get_type('"string"'); + json_get_type +--------------- + string +(1 row) + +SELECT othernamespace.json_get_type('3.14'); + json_get_type +--------------- + number +(1 row) + +SELECT othernamespace.json_get_type('true'); + json_get_type +--------------- + bool +(1 row) + +SELECT othernamespace.json_get_type('false'); + json_get_type +--------------- + bool +(1 row) + +SELECT othernamespace.json_get_type('null'); + json_get_type +--------------- + null +(1 row) + +CREATE TABLE temp (json othernamespace.JSON); +INSERT INTO temp VALUES ('[]'); +INSERT INTO temp VALUES ('{}'); +INSERT INTO temp VALUES ('"string"'); +INSERT INTO temp VALUES ('3.14'); +INSERT INTO temp VALUES ('true'); +INSERT INTO temp VALUES ('null'); +SELECT othernamespace.json_get_type(json) FROM temp; + json_get_type +--------------- + array + object + string + number + bool + null +(6 rows) + +ALTER EXTENSION json SET SCHEMA public; +SELECT json_get_type('[]'); + json_get_type +--------------- + array +(1 row) + +SELECT json_get_type('{}'); + json_get_type +--------------- + object +(1 row) + +SELECT json_get_type('"string"'); + json_get_type +--------------- + string +(1 row) + +SELECT json_get_type('3.14'); + json_get_type +--------------- + number +(1 row) + +SELECT json_get_type('true'); + json_get_type +--------------- + bool +(1 row) + +SELECT json_get_type('false'); + json_get_type +--------------- + bool +(1 row) + +SELECT json_get_type('null'); + json_get_type +--------------- + null +(1 row) + +SELECT json_get_type(json) FROM temp; + json_get_type +--------------- + array + object + string + number + bool + null +(6 rows) + +DROP TABLE temp; +DROP SCHEMA othernamespace CASCADE; diff --git a/contrib/json/expected/validate.out b/contrib/json/expected/validate.out new file mode 100644 index 0000000..9750cba --- /dev/null +++ b/contrib/json/expected/validate.out @@ -0,0 +1,220 @@ +SELECT json_validate(string), string FROM test_strings; + json_validate | string +---------------+---------------------------------------------------------- + f | + f | + f | " + f | [,] + f | [) + f | []] + f | [} + f | {,} + f | {] + f | ["1":2] + f | [1,2,] + f | [1:2} + f | {"1":2,} + f | {1:2} + f | {"1":2, "2.5" : [3, 4, {}, {"5": ["6"], [7 ]}]} + f | {"1":2, "2.5" : [3, 4, {}, {"5": ["6"], [7]}]} + f | {"1":2, "2.5" : [3, 4, {}, {"5": ["6"], "7" :[8 ]}] + f | {"1":2, "2.5" : [3, 4, {}, {"5": ["6"], "7" :[8 ]}]] + f | {"1":2, "3":4 + f | "1\u2" + f | [,2] + f | "3 + f | "3" "4" + f | [3[4] + f | [3[4]] + f | [3, [4, [5], 6] 7, 8 9] + f | [3, [4, [5], 6] 7, 8, 9] + f | [3, [4, [5], 6], 7, 8 9] + f | {"hello":true, "bye":false, null} + f | {"hello":true, "bye":false, null:null} + f | "hi + f | "hi""" + f | {"hi": "bye"] + t | "\uD800\uD800" + t | "\uD800\uDBFF" + f | "\UD834\UDD1E" + t | "\uDB00" + t | "\uDB00\uDBFF" + t | "\uFFFE" + t | "\uFFFF" + f | . + t | "" + t | [] + t | {} + f | +. + t | 0.5 + f | 0.e1 + t | {"1":{}} + t | {"1":2} + t | {"1":2, "2.5" : [3, 4, {}, {"5": ["6"]}]} + t | {"1":2, "2.5" : [3, 4, {}, {"5": ["6"], "7" :[8 ]}]} + t | 1234 + t | -1234 + t | {"1":2, "3":4} + f | +1234 + f | ++1234 + t | 123.456e14234 + t | 123.456e-14234 + t | 123.456e+14234 + f | 123.e-14234 + t | "1\u2000" + t | "1\u20001" + t | 2 + f | .246e-14234 + f | .2e-14234 + t | 3 + f | .3 + t | "3" + t | [3] + f | +3. + t | 3.2e+1 + t | [3, [4]] + t | [3, [4, [5]]] + t | [3, [4, [5], 6]] + t | [3, [4, [5], 6], 7] + t | [3, [4, [5], 6], 7, 8] + t | [3, [4, [5], 6], 7, 8, 9] + f | +3.5 + f | .3e + f | .3e1 + f | .3e-1 + f | .3e+1 + f | 3.e1 + f | 3.e+1 + t | 3e+1 + f | .5 + f | +.5 + f | .5e+1 + t | [ 7] + t | [7 ] + t | [7] + f | .e-14234 + t | "hello" + t | ["hello"] + t | ["hello", "bye"] + t | ["hello", "bye\n"] + t | ["hello", "bye\n\r\t"] + t | ["hello", "bye\n\r\t\b"] + t | ["hello", "bye\n\r\t\b",true] + t | ["hello", "bye\n\r\t\b",true , false] + t | ["hello", "bye\n\r\t\b",true , false, null] + f | ["hello", "bye\n\r\t\v"] + t | {"hello":true} + t | {"hello":true, "bye":false} + t | {"hello":true, "bye":false, "foo":["one","two","three"]} + t | "hi" + t | ["hi"] + t | ["hi", "bye"] + t | {"hi": "bye"} + t | ["hi", "bye", 3] + t | ["hi", "bye[", 3] + t | "\u0007" + t | "\u0008" + t | "\u0009" + t | "\u0010" + t | "\u0020" + t | "\u10000" + t | "\u1234" + t | "\u99999" + t | "\ud800\udc00" + t | "\uD800\uDC00" + t | "\uD834\uDD1E" + t | "\uDBFF\uDFFF" + t | "\uFFFD" + t | "\uFFFF" + f | hello + t | [32, 1] + f | [32, + t | "\uD800\uDC00" + t | "\n" + t | "hello" + t | "hello\u0009world" + t | "hello" + t | "hello\n" + t | "hello" + t | 3 + f | 3. + f | .3 + t | 0.3 + f | 0.3e + f | 0.3e+ + t | 0.3e+5 + t | 0.3e-5 + t | 0.3e5 + t | "hello" + f | +3 + t | -3 + f | -3. + t | -3.1 + f | .5 + f | 5. + f | 5.e1 + t | 0.5 + f | .3e1 + f | .3e+1 + f | .3e-1 + f | .3e-1 .5 + f | .3e-1.5 + f | .3e+1.5 + f | .3e+. + f | .3e+.5 + f | .3e+1.5 + f | 9.3e+1.5 + f | 9.e+1.5 + f | 9.e+ + f | 9.e+1 + t | "\"" + t | "\"3.5" + t | "\"." + f | "\".". + t | "\"....." + f | "\"\"\"\""" + f | ["\"\"\"\"", .5] + f | [.5] + t | ["\"\"\"\"", 0.5] + f | ["\"\"\"\"", .5] + f | ["\"\"\"\"",.5] + f | ["\"",.5] + f | ["\".5",.5] + f | ["\".5",".5\"".5] + f | ["\".5",".5\"", .5] + f | ["\".5",".5\"",.5] + t | ["\".5",".5\"",0.5] + f | {"key":/*comment*/"value"} + f | {"key":/*comment"value"} + f | {"key":"value"}/* + f | {"key":"value"}/**/ + f | {"key":"value"}/***/ + f | {"key":"value"}/**// + f | {"key":"value"}/**/// + f | {"key":"value"}/**///---- + f | {"key":"value"}# + f | {"key":"value"}#{ + f | {"key":"value"}#{} + f | {"key":"value"}#, + f | {"key":"value"/**/, "k2":"v2"} + t | "\u0027" + f | "hello\'" + f | 'hello\'' + f | 'hello' + f | 'hell\'o' + f | '\'hello' + f | '\'hello\'' + f | \'hello\' + f | 'hello\' + f | ['hello\'] + f | ['hello\''] + f | ['hello"'] + f | ['hello\"'] + f | ['hello"o'] + f | ['"'] + f | '"' + f | '"hello"' + f | '"hello' + f | '"hi"' +(215 rows) + diff --git a/contrib/json/json--0.1.sql b/contrib/json/json--0.1.sql new file mode 100644 index 0000000..9a54e2e --- /dev/null +++ b/contrib/json/json--0.1.sql @@ -0,0 +1,45 @@ +CREATE TYPE json; + +CREATE OR REPLACE FUNCTION json_in(cstring) +RETURNS json +AS '$libdir/json' +LANGUAGE C STRICT IMMUTABLE; + +CREATE OR REPLACE FUNCTION json_out(json) +RETURNS cstring +AS '$libdir/json' +LANGUAGE C STRICT IMMUTABLE; + +CREATE TYPE json ( + INPUT = json_in, + OUTPUT = json_out, + INTERNALLENGTH = VARIABLE, + STORAGE = extended, + + -- make it a non-preferred member of string type category, as citext does + CATEGORY = 'S', + PREFERRED = false +); + +-- Keep the labels of this enum in sync with enum_type_names[] in json_ops.c . +CREATE TYPE json_type AS ENUM ('null', 'string', 'number', 'bool', 'object', 'array'); + +CREATE OR REPLACE FUNCTION json_get_type(json) +RETURNS json_type +AS '$libdir/json','json_get_type_f' +LANGUAGE C STRICT IMMUTABLE; + +CREATE OR REPLACE FUNCTION json_validate(text) +RETURNS boolean +AS '$libdir/json','json_validate_f' +LANGUAGE C STRICT IMMUTABLE; + +CREATE OR REPLACE FUNCTION json_stringify(json) +RETURNS text +AS '$libdir/json','json_stringify_f' +LANGUAGE C STRICT IMMUTABLE; + +CREATE OR REPLACE FUNCTION json_stringify(json, text) +RETURNS text +AS '$libdir/json','json_stringify_space' +LANGUAGE C STRICT IMMUTABLE; diff --git a/contrib/json/json--unpackaged--0.1.sql b/contrib/json/json--unpackaged--0.1.sql new file mode 100644 index 0000000..b4479f9 --- /dev/null +++ b/contrib/json/json--unpackaged--0.1.sql @@ -0,0 +1,8 @@ +ALTER EXTENSION json ADD type json; +ALTER EXTENSION json ADD function json_in(cstring); +ALTER EXTENSION json ADD function json_out(json); +ALTER EXTENSION json ADD type json_type; +ALTER EXTENSION json ADD function json_get_type(json); +ALTER EXTENSION json ADD function json_validate(text); +ALTER EXTENSION json ADD function json_stringify(json); +ALTER EXTENSION json ADD function json_stringify(json, text); diff --git a/contrib/json/json.c b/contrib/json/json.c new file mode 100644 index 0000000..b753c1b --- /dev/null +++ b/contrib/json/json.c @@ -0,0 +1,848 @@ +/*------------------------------------------------------------------------- + * + * json.c + * Core JSON manipulation routines used by JSON data type support. + * + * Copyright (c) 2011, PostgreSQL Global Development Group + * Written by Joey Adams . + * + *------------------------------------------------------------------------- + */ + +#include "json.h" +#include "compat.h" + +#define is_space(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || (c) == ' ') +#define is_digit(c) ((c) >= '0' && (c) <= '9') +#define is_hex_digit(c) (((c) >= '0' && (c) <= '9') || \ + ((c) >= 'A' && (c) <= 'F') || \ + ((c) >= 'a' && (c) <= 'f')) + +static unsigned int read_hex16(const char *in); +static void write_hex16(char *out, unsigned int val); +static pg_wchar from_surrogate_pair(unsigned int uc, unsigned int lc); +static void to_surrogate_pair(pg_wchar unicode, unsigned int *uc, unsigned int *lc); +static void appendStringInfoUtf8(StringInfo str, pg_wchar unicode); +static void appendStringInfoEscape(StringInfo str, unsigned int c); + +static const char *stringify_value(StringInfo buf, const char *s, const char *e, + const char *space, size_t space_length, + int indent); +static void append_indent(StringInfo buf, const char *space, size_t space_length, + int indent); + +/* + * Parsing functions and macros + * + * The functions and macros that follow are used to simplify the implementation + * of the recursive descent parser used for JSON validation. See the + * implementation of expect_object() for a good example of them in action. + * + * These functions/macros use a few unifying concepts: + * + * * const char *s and const char *e form a "slice" within a string, + * where s points to the first character and e points after the last + * character. Hence, the length of a slice is e - s. Although it would be + * simpler to just get rid of const char *e and rely on strings being + * null-terminated, varlena texts are not guaranteed to be null-terminated, + * meaning we would have to copy them to parse them. + * + * * Every expect_* function sees if the beginning of a slice matches what it + * expects, and returns the end of the slice on success. To illustrate: + * + * s expect_number(s, e) e + * | | | + * {"pi": 3.14159265, "e": 2.71828183, "phi": 1.61803399} + * + * * When a parse error occurs, s is set to NULL. Moreover, the parser + * functions and macros check to see if s is NULL before using it. + * This means parser functions built entirely of parser functions can proceed + * with the illusion that the input will always be valid, rather than having + * to do a NULL check on every line (see expect_number, which has no explicit + * checks). However, one must ensure that the parser will always halt, + * even in the NULL case. + * + * Bear in mind that while pop_*, optional_*, and skip_* update s, + * the expect_* functions do not. Example: + * + * s = expect_char(s, e, '{'); + * s = expect_space(s, e); + * + * if (optional_char(s, e, '}')) + * return s; + * + * Also, note that functions traversing an already-validated JSON text + * can take advantage of the assumption that the input is valid. + * For example, stringify_value does not perform NULL checks, nor does it + * check if s < e before dereferencing s. + */ + +static const char *expect_value(const char *s, const char *e); +static const char *expect_object(const char *s, const char *e); +static const char *expect_array(const char *s, const char *e); +static const char *expect_string(const char *s, const char *e); +static const char *expect_number(const char *s, const char *e); + +static const char *expect_literal(const char *s, const char *e, const char *literal); +static const char *expect_space(const char *s, const char *e); + +/* + * All of these macros evaluate s multiple times. + * + * Macros ending in _pred take a function or macro of the form: + * + * bool pred(char c); + * + * Macros ending in _cond take an expression, where *s is the character in question. + */ + +/* + * expect_char: Expect the next character to be @c, and consume it. + * expect_char_pred: Expect pred(next character) to hold, and consume it. + * expect_char_cond: Expect a character to be available and cond to hold, and consume. + * expect_eof: Expect there to be no more input left. + * + * These macros, like any expect_ macros/functions, return a new pointer + * rather than updating @s. + */ +#define expect_char(s, e, c) expect_char_cond(s, e, *(s) == (c)) +#define expect_char_pred(s, e, pred) expect_char_cond(s, e, pred(*(s))) +#define expect_char_cond(s, e, cond) \ + ((s) != NULL && (s) < (e) && (cond) ? (s) + 1 : NULL) +#define expect_eof(s, e) ((s) != NULL && (s) == (e) ? (s) : NULL) + +/* + * next_char: Get the next character, but do not consume it. + * next_char_pred: Apply pred to the next character. + * next_char_cond: Evaluate cond if a character is available. + * + * On EOF or error, next_char returns EOF, and + * next_char_pred and next_char_cond return false. + */ +#define next_char(s, e) \ + ((s) != NULL && (s) < (e) ? (int)(unsigned char) *(s) : (int) EOF) +#define next_char_pred(s, e, pred) next_char_cond(s, e, pred(*(s))) +#define next_char_cond(s, e, cond) ((s) != NULL && (s) < (e) ? (cond) : false) + +/* + * pop_char: Consume the next character, and return it. + * pop_char_pred: Consume the next character, and apply pred to it. + * pop_char_cond is impossible to implement portably. + * + * On EOF or error, these macros do nothing, + * pop_char returns EOF, and pop_char_cond returns false. + */ +#define pop_char(s, e) ((s) != NULL && (s) < (e) ? (int)(unsigned char) *(s)++ : (int) EOF) +#define pop_char_pred(s, e) \ + ((s) != NULL && (s) < (e) ? (s)++, pred((s)[-1]) : false) + +/* + * optional_char: If the next character is @c, consume it. + * optional_char_pred: If pred(next character) holds, consume it. + * optional_char_cond: If a character is available, and cond holds, consume. + * + * These macros, when they consume, update @s and return true. + * Otherwise, they do nothing and return false. + */ +#define optional_char(s, e, c) optional_char_cond(s, e, *(s) == (c)) +#define optional_char_pred(s, e, pred) optional_char_cond(s, e, pred(*(s))) +#define optional_char_cond(s, e, cond) \ + ((s) != NULL && (s) < (e) && (cond) ? (s)++, true : false) + +/* + * skip_pred: Skip zero or more characters matching pred. + * skip1_pred: Skip one or more characters matching pred. + * skip_cond: Skip zero or more characters where cond holds. + * skip1_cond: Skip one or more characters where cond holds. + */ +#define skip_pred(s, e, pred) skip_cond(s, e, pred(*(s))) +#define skip1_pred(s, e, pred) skip1_cond(s, e, pred(*(s))) +#define skip_cond(s, e, cond) do { \ + while (next_char_cond(s, e, cond)) \ + (s)++; \ + } while (0) +#define skip1_cond(s, e, cond) do { \ + if (next_char_cond(s, e, cond)) \ + { \ + (s)++; \ + while (next_char_cond(s, e, cond)) \ + (s)++; \ + } \ + else \ + { \ + (s) = NULL; \ + } \ + } while (0) + +/* + * json_validate - Test if text is valid JSON. + * + * Note: scalar values (strings, numbers, booleans, and nulls) + * are considered valid by this function, and by the JSON datatype. + */ +bool +json_validate(const char *str, size_t length) +{ + const char *s = str; + const char *e = str + length; + + s = expect_space(s, e); + s = expect_value(s, e); + s = expect_space(s, e); + s = expect_eof(s, e); + + return s != NULL; +} + +/* + * json_validate_nospace - Test if text is valid JSON and has no whitespace around tokens. + * + * JSON data is condensed on input, meaning spaces around tokens are removed. + * The fact that these spaces are gone is exploited in functions that + * traverse and manipulate JSON. + */ +bool json_validate_nospace(const char *str, size_t length) +{ + const char *s = str; + const char *e = str + length; + + if (!json_validate(str, length)) + return false; + + while (s < e) + { + if (*s == '"') + { + s = expect_string(s, e); + if (s == NULL) /* should never happen */ + return false; + } + else if (is_space(*s)) + { + return false; + } + s++; + } + + return true; +} + +/* + * json_condense - Make JSON content shorter by removing spaces + * and unescaping characters. + */ +char * +json_condense(const char *json_str, size_t length, size_t *out_length) +{ + const char *s = json_str; + const char *e = s + length; + StringInfoData buf; + bool inside_string = false; + bool server_encoding_is_utf8 = GetDatabaseEncoding() == PG_UTF8; + + Assert(json_validate(json_str, length)); + + initStringInfo(&buf); + + while (s < e) + { + /* + * To make sense of this procedural mess, think of it as a flow chart + * that branches based on the characters that follow. + * + * When the algorithm wants to unescape a character, + * it will append the unescaped character, advance s, + * then continue. Otherwise, it will perform the default + * behavior and emit the character as is. + */ + if (inside_string) + { + /* When we are inside a string literal, convert escapes + * to the characters they represent when possible. */ + if (*s == '\\' && s+1 < e) + { + /* Change \/ to / */ + if (s[1] == '/') + { + appendStringInfoChar(&buf, '/'); + s += 2; + continue; + } + + /* Emit single-character escape as is now + * to avoid getting mixed up by \\ and \" */ + if (s[1] != 'u') + { + appendStringInfoChar(&buf, s[0]); + appendStringInfoChar(&buf, s[1]); + s += 2; + continue; + } + + /* Unescape \uXXXX if it is possible and feasible. */ + if (s+5 < e && s[1] == 'u') + { + unsigned int uc = read_hex16(s+2); + + /* If a \uXXXX escape stands for a non-control ASCII + * character, unescape it. */ + if (uc >= 0x20 && uc <= 0x7E) + { + s += 6; + appendStringInfoChar(&buf, uc); + continue; + } + + /* Do not unescape 0x7F (DEL) because, although + * the JSON RFC does not mention it, it is in fact + * a control character. */ + + /* Unescape Unicode characters only if + * the server encoding is UTF-8. */ + if (uc > 0x7F && server_encoding_is_utf8) + { + if (uc >= 0xD800 && uc <= 0xDFFF) + { + /* Unescape a UTF-16 surrogate pair, + * but only if it's present and valid. */ + if (s+11 < e && s[6] == '\\' && s[7] == 'u') + { + unsigned int lc = read_hex16(s+8); + + if (uc >= 0xD800 && uc <= 0xDBFF && + lc >= 0xDC00 && lc <= 0xDFFF) + { + s += 12; + appendStringInfoUtf8(&buf, from_surrogate_pair(uc, lc)); + continue; + } + } + } + else + { + s += 6; + appendStringInfoUtf8(&buf, uc); + continue; + } + } + } + } + } + else + { + /* When we are not in a string literal, remove spaces. */ + if (is_space(*s)) + { + do s++; while (s < e && is_space(*s)); + continue; + } + } + + /* If we get here, it means we want to emit this character as is. */ + appendStringInfoChar(&buf, *s); + if (*s++ == '"') + inside_string = !inside_string; + } + + if (out_length != NULL) + *out_length = buf.len; + return buf.data; +} + +/* + * json_need_to_escape_unicode + * Determine whether we need to convert non-ASCII characters + * to \uXXXX escapes to prevent transcoding errors. + * + * If any of the following hold, no escaping needs to be done: + * + * * The client encoding is UTF-8. Escaping is not necessary because + * the client can encode all Unicode codepoints. + * + * * The client encoding and the server encoding are the same. + * Escaping is not necessary because the client can encode all + * codepoints the server can encode. + * + * * The server encoding is SQL_ASCII. This encoding tells PostgreSQL + * to shirk transcoding in favor of speed. It wasn't unescaped on input, + * so don't worry about escaping on output. + * + * * The client encoding is SQL_ASCII. This encoding tells PostgreSQL + * to not perform encoding conversion. + * + * Otherwise, (no matter how expensive it is) all non-ASCII characters are escaped. + */ +bool json_need_to_escape_unicode(void) +{ + int server_encoding = GetDatabaseEncoding(); + int client_encoding = pg_get_client_encoding(); + + if (client_encoding == PG_UTF8 || client_encoding == server_encoding || + server_encoding == PG_SQL_ASCII || client_encoding == PG_SQL_ASCII) + return false; + + return true; +} + +/* + * json_escape_unicode - Convert non-ASCII characters to \uXXXX escapes. + */ +char * +json_escape_unicode(const char *json, size_t length, size_t *out_length) +{ + const char *s; + const char *e; + StringInfoData buf; + + /* Convert to UTF-8, if necessary. */ + { + const char *orig = json; + json = (const char *) + pg_do_encoding_conversion((unsigned char *) json, length, + GetDatabaseEncoding(), PG_UTF8); + if (json != orig) + length = strlen(json); + } + + Assert(json_validate(json, length)); + s = json; + e = json + length; + initStringInfo(&buf); + + while (s < e) + { + if ((unsigned char) *s > 0x7F) + { + int len; + pg_wchar u; + + len = pg_utf_mblen((const unsigned char *) s); + if (s + len > e) + { + Assert(false); + appendStringInfoChar(&buf, *s); + s++; + continue; + } + + u = utf8_to_unicode((const unsigned char *) s); + s += len; + + if (u <= 0xFFFF) + { + appendStringInfoEscape(&buf, u); + } + else + { + unsigned int uc, lc; + to_surrogate_pair(u, &uc, &lc); + appendStringInfoEscape(&buf, uc); + appendStringInfoEscape(&buf, lc); + } + } + else + { + appendStringInfoChar(&buf, *s); + s++; + } + } + + if (out_length != NULL) + *out_length = buf.len; + return buf.data; +} + +/* + * json_stringify - Format JSON into text with indentation. + * + * Input must be valid, condensed JSON. + */ +char * +json_stringify(const char *json, size_t length, + const char *space, size_t space_length, + size_t *out_length) +{ + const char *s = json; + const char *e = json + length; + StringInfoData buf; + + if (!json_validate_nospace(json, length)) + report_corrupt_json(); + + initStringInfo(&buf); + s = stringify_value(&buf, s, e, space, space_length, 0); + Assert(s == e); + + if (out_length != NULL) + *out_length = buf.len; + return buf.data; +} + +static const char * +stringify_value(StringInfo buf, const char *s, const char *e, + const char *space, size_t space_length, int indent) +{ + const char *s2; + + Assert(s < e); + + switch (*s) + { + case '[': + appendStringInfoString(buf, "[\n"); + s++; + if (*s != ']') + { + for (;;) + { + append_indent(buf, space, space_length, indent + 1); + s = stringify_value(buf, s, e, space, space_length, indent + 1); + Assert(s < e && (*s == ',' || *s == ']')); + if (*s == ']') + break; + appendStringInfoString(buf, ",\n"); + s++; + } + appendStringInfoChar(buf, '\n'); + } + append_indent(buf, space, space_length, indent); + appendStringInfoChar(buf, ']'); + return s + 1; + + case '{': + appendStringInfoString(buf, "{\n"); + s++; + if (*s != '}') + { + for (;;) + { + append_indent(buf, space, space_length, indent + 1); + s2 = expect_string(s, e); + appendBinaryStringInfo(buf, s, s2 - s); + s = s2; + Assert(s < e && *s == ':'); + appendStringInfoString(buf, ": "); + s++; + + s = stringify_value(buf, s, e, space, space_length, indent + 1); + Assert(s < e && (*s == ',' || *s == '}')); + if (*s == '}') + break; + appendStringInfoString(buf, ",\n"); + s++; + } + appendStringInfoChar(buf, '\n'); + } + append_indent(buf, space, space_length, indent); + appendStringInfoChar(buf, '}'); + return s + 1; + + default: + s2 = expect_value(s, e); + appendBinaryStringInfo(buf, s, s2 - s); + return s2; + } +} + +static void +append_indent(StringInfo buf, const char *space, size_t space_length, int indent) +{ + int i; + + for (i = 0; i < indent; i++) + appendBinaryStringInfo(buf, space, space_length); +} + +/* + * json_get_type - Determine the type of JSON content + * given the first non-space character. + * + * Return JSON_INVALID if the first character is not recognized. + */ +JsonType +json_get_type(int c) +{ + switch (c) + { + case 'n': + return JSON_NULL; + case '"': + return JSON_STRING; + case '-': + return JSON_NUMBER; + case 'f': + case 't': + return JSON_BOOL; + case '{': + return JSON_OBJECT; + case '[': + return JSON_ARRAY; + default: + if (is_digit(c)) + return JSON_NUMBER; + + return JSON_INVALID; + } +} + +/* + * Reads exactly 4 hex characters (capital or lowercase). + * Expects in[0..3] to be in bounds, and expects them to be hexadecimal characters. + */ +static unsigned int +read_hex16(const char *in) +{ + unsigned int i; + unsigned int tmp; + unsigned int ret = 0; + + for (i = 0; i < 4; i++) + { + char c = *in++; + + Assert(is_hex_digit(c)); + + if (c >= '0' && c <= '9') + tmp = c - '0'; + else if (c >= 'A' && c <= 'F') + tmp = c - 'A' + 10; + else /* if (c >= 'a' && c <= 'f') */ + tmp = c - 'a' + 10; + + ret <<= 4; + ret += tmp; + } + + return ret; +} + +/* + * Encodes a 16-bit number in hexadecimal, writing exactly 4 hex characters. + */ +static void +write_hex16(char *out, unsigned int val) +{ + const char *hex = "0123456789ABCDEF"; + + *out++ = hex[(val >> 12) & 0xF]; + *out++ = hex[(val >> 8) & 0xF]; + *out++ = hex[(val >> 4) & 0xF]; + *out++ = hex[val & 0xF]; +} + +/* Compute the Unicode codepoint of a UTF-16 surrogate pair. */ +static pg_wchar +from_surrogate_pair(unsigned int uc, unsigned int lc) +{ + Assert(uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF); + return 0x10000 + ((((pg_wchar)uc & 0x3FF) << 10) | (lc & 0x3FF)); +} + +/* Construct a UTF-16 surrogate pair given a Unicode codepoint. */ +static void +to_surrogate_pair(pg_wchar unicode, unsigned int *uc, unsigned int *lc) +{ + pg_wchar n = unicode - 0x10000; + *uc = ((n >> 10) & 0x3FF) | 0xD800; + *lc = (n & 0x3FF) | 0xDC00; +} + +/* Append a Unicode character by converting it to UTF-8. */ +static void +appendStringInfoUtf8(StringInfo str, pg_wchar unicode) +{ + if (str->len + 4 >= str->maxlen) + enlargeStringInfo(str, 4); + + unicode_to_utf8(unicode, (unsigned char *) &str->data[str->len]); + str->len += pg_utf_mblen((const unsigned char *) &str->data[str->len]); + str->data[str->len] = '\0'; +} + +static void +appendStringInfoEscape(StringInfo str, unsigned int c) +{ + if (str->len + 6 >= str->maxlen) + enlargeStringInfo(str, 6); + + str->data[str->len++] = '\\'; + str->data[str->len++] = 'u'; + write_hex16(str->data + str->len, c); + str->len += 4; + str->data[str->len] = '\0'; +} + +static const char * +expect_value(const char *s, const char *e) +{ + int c = next_char(s, e); + + switch (c) + { + case '{': + return expect_object(s, e); + case '[': + return expect_array(s, e); + case '"': + return expect_string(s, e); + case '-': + return expect_number(s, e); + case 'n': + return expect_literal(s, e, "null"); + case 'f': + return expect_literal(s, e, "false"); + case 't': + return expect_literal(s, e, "true"); + default: + if (is_digit(c)) + return expect_number(s, e); + return NULL; + } +} + +static const char * +expect_object(const char *s, const char *e) +{ + s = expect_char(s, e, '{'); + s = expect_space(s, e); + + if (optional_char(s, e, '}')) + return s; + + while (s != NULL) + { + s = expect_string(s, e); + s = expect_space(s, e); + s = expect_char(s, e, ':'); + s = expect_space(s, e); + s = expect_value(s, e); + s = expect_space(s, e); + + if (optional_char(s, e, '}')) + return s; + + s = expect_char(s, e, ','); + s = expect_space(s, e); + } + + return NULL; +} + +static const char * +expect_array(const char *s, const char *e) +{ + s = expect_char(s, e, '['); + s = expect_space(s, e); + + if (optional_char(s, e, ']')) + return s; + + while (s != NULL) + { + s = expect_value(s, e); + s = expect_space(s, e); + + if (optional_char(s, e, ']')) + return s; + + s = expect_char(s, e, ','); + s = expect_space(s, e); + } + + return NULL; +} + +static const char * +expect_string(const char *s, const char *e) +{ + s = expect_char(s, e, '"'); + + for (;;) + { + int c = pop_char(s, e); + + if (c <= 0x1F) /* Control character, EOF, or error */ + return NULL; + + if (c == '"') + return s; + + if (c == '\\') + { + switch (pop_char(s, e)) + { + case '"': + case '\\': + case '/': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + break; + + case 'u': + { + int i; + + for (i = 0; i < 4; i++) + { + c = pop_char(s, e); + if (!is_hex_digit(c)) + return NULL; + } + } + break; + + default: + return NULL; + } + } + } +} + +static const char * +expect_number(const char *s, const char *e) +{ + optional_char(s, e, '-'); + + if (!optional_char(s, e, '0')) + skip1_pred(s, e, is_digit); + + if (optional_char(s, e, '.')) + skip1_pred(s, e, is_digit); + + if (optional_char_cond(s, e, *s == 'E' || *s == 'e')) + { + optional_char_cond(s, e, *s == '+' || *s == '-'); + skip1_pred(s, e, is_digit); + } + + return s; +} + +static const char * +expect_literal(const char *s, const char *e, const char *literal) +{ + if (s == NULL) + return NULL; + + while (*literal != '\0') + if (s >= e || *s++ != *literal++) + return NULL; + + return s; +} + +/* Accepts *zero* or more spaces. */ +static const char * +expect_space(const char *s, const char *e) +{ + if (s == NULL) + return NULL; + + for (; s < e && is_space(*s); s++) + {} + + return s; +} diff --git a/contrib/json/json.control b/contrib/json/json.control new file mode 100644 index 0000000..60e5c75 --- /dev/null +++ b/contrib/json/json.control @@ -0,0 +1,4 @@ +# json extension +comment = 'data type for storing and manipulating JSON content' +default_version = '0.1' +relocatable = true diff --git a/contrib/json/json.h b/contrib/json/json.h new file mode 100644 index 0000000..a4c0822 --- /dev/null +++ b/contrib/json/json.h @@ -0,0 +1,49 @@ +#ifndef JSON2_H +#define JSON2_H + +#include "postgres.h" + +#include "lib/stringinfo.h" +#include "mb/pg_wchar.h" + +typedef struct varlena json_varlena; + +#define DatumGetJSONPP(X) ((json_varlena *) PG_DETOAST_DATUM_PACKED(X)) +#define JSONPGetDatum(X) PointerGetDatum(X) + +#define PG_GETARG_JSON_PP(n) DatumGetJSONPP(PG_GETARG_DATUM(n)) +#define PG_RETURN_JSON_P(x) PG_RETURN_POINTER(x) + +/* Keep the order of these enum entries in sync with + * enum_type_names[] in json_op.c . */ +typedef enum +{ + JSON_NULL, + JSON_STRING, + JSON_NUMBER, + JSON_BOOL, + JSON_OBJECT, + JSON_ARRAY, + JSON_TYPE_COUNT = JSON_ARRAY + 1, + + JSON_INVALID +} JsonType; + +#define json_type_is_valid(type) ((type) >= 0 && (type) < JSON_TYPE_COUNT) + +bool json_validate(const char *str, size_t length); +bool json_validate_nospace(const char *str, size_t length); +char *json_condense(const char *json_str, size_t length, size_t *out_length); + +bool json_need_to_escape_unicode(void); +char *json_escape_unicode(const char *json, size_t length, size_t *out_length); + +char *json_stringify(const char *json, size_t length, + const char *space, size_t space_length, + size_t *out_length); + +JsonType json_get_type(int c); + +void report_corrupt_json(void); + +#endif diff --git a/contrib/json/json_io.c b/contrib/json/json_io.c new file mode 100644 index 0000000..29a5aaa --- /dev/null +++ b/contrib/json/json_io.c @@ -0,0 +1,123 @@ +/*------------------------------------------------------------------------- + * + * json_io.c + * Primary input/output and conversion procedures + * for JSON data type. + * + * Copyright (c) 2011, PostgreSQL Global Development Group + * Written by Joey Adams . + * + *------------------------------------------------------------------------- + */ + +#include "json.h" + +#include "funcapi.h" +#include "utils/builtins.h" + +PG_MODULE_MAGIC; + +void +report_corrupt_json(void) +{ + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("corrupted JSON value"))); +} + +PG_FUNCTION_INFO_V1(json_in); +Datum json_in(PG_FUNCTION_ARGS); +Datum +json_in(PG_FUNCTION_ARGS) +{ + char *string = PG_GETARG_CSTRING(0); + size_t length = strlen(string); + char *condensed; + size_t condensed_length; + + if (!json_validate(string, length)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for JSON"))); + + condensed = json_condense(string, length, &condensed_length); + + PG_RETURN_JSON_P(cstring_to_text_with_len(condensed, condensed_length)); +} + +PG_FUNCTION_INFO_V1(json_out); +Datum json_out(PG_FUNCTION_ARGS); +Datum +json_out(PG_FUNCTION_ARGS) +{ + char *string = TextDatumGetCString(PG_GETARG_DATUM(0)); + + Assert(json_validate(string, strlen(string))); + + if (json_need_to_escape_unicode()) + string = json_escape_unicode(string, strlen(string), NULL); + + PG_RETURN_CSTRING(string); +} + +/* json_stringify(json). Renamed to avoid clashing with C function. */ +PG_FUNCTION_INFO_V1(json_stringify_f); +Datum json_stringify_f(PG_FUNCTION_ARGS); +Datum +json_stringify_f(PG_FUNCTION_ARGS) +{ + if (json_need_to_escape_unicode()) + { + json_varlena *json = PG_GETARG_JSON_PP(0); + char *escaped; + size_t escaped_length; + + escaped = json_escape_unicode(VARDATA_ANY(json), + VARSIZE_ANY_EXHDR(json), &escaped_length); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(escaped, escaped_length)); + } + else + { + /* text and json_varlena are binary-compatible */ + PG_RETURN_DATUM(PG_GETARG_DATUM(0)); + } +} + +/* + * json_stringify(json, space) - Format a JSON value into text with indentation. + */ +PG_FUNCTION_INFO_V1(json_stringify_space); +Datum json_stringify_space(PG_FUNCTION_ARGS); +Datum +json_stringify_space(PG_FUNCTION_ARGS) +{ + json_varlena *json = PG_GETARG_JSON_PP(0); + text *space = PG_GETARG_TEXT_PP(1); + char *stringified; + size_t stringified_length; + + if (json_need_to_escape_unicode()) + { + char *escaped; + size_t escaped_length; + + escaped = json_escape_unicode(VARDATA_ANY(json), + VARSIZE_ANY_EXHDR(json), &escaped_length); + stringified = json_stringify(escaped, escaped_length, + VARDATA_ANY(space), VARSIZE_ANY_EXHDR(space), + &stringified_length); + pfree(escaped); + } + else + { + stringified = json_stringify(VARDATA_ANY(json), VARSIZE_ANY_EXHDR(json), + VARDATA_ANY(space), VARSIZE_ANY_EXHDR(space), + &stringified_length); + } + + if (stringified == NULL) + report_corrupt_json(); + + PG_RETURN_TEXT_P(cstring_to_text_with_len(stringified, stringified_length)); +} diff --git a/contrib/json/json_op.c b/contrib/json/json_op.c new file mode 100644 index 0000000..545c2a7 --- /dev/null +++ b/contrib/json/json_op.c @@ -0,0 +1,221 @@ +/*------------------------------------------------------------------------- + * + * json_op.c + * Manipulation procedures for JSON data type. + * + * Copyright (c) 2011, PostgreSQL Global Development Group + * Written by Joey Adams . + * + *------------------------------------------------------------------------- + */ + +#include "json.h" +#include "compat.h" + +#include "catalog/namespace.h" +#include "catalog/pg_enum.h" +#include "funcapi.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + +#define PG_DETOAST_DATUM_FIRST_CHAR(datum) \ + pg_detoast_datum_first_char((struct varlena *) DatumGetPointer(datum)) + +typedef struct +{ + int index; + const char *label; +} EnumLabel; + +static int pg_detoast_datum_first_char(struct varlena * datum); +static bool getEnumLabelOids(const char *typname, Oid typnamespace, + EnumLabel labels[], Oid oid_out[], int count); +static int enum_label_cmp(const void *left, const void *right); + +/* Keep the order of these entries in sync with the enum in json.h . */ +static EnumLabel enum_labels[JSON_TYPE_COUNT] = +{ + {JSON_NULL, "null"}, + {JSON_STRING, "string"}, + {JSON_NUMBER, "number"}, + {JSON_BOOL, "bool"}, + {JSON_OBJECT, "object"}, + {JSON_ARRAY, "array"} +}; + +/* json_validate(text). Renamed to avoid clashing + * with the C function json_validate. */ +PG_FUNCTION_INFO_V1(json_validate_f); +Datum json_validate_f(PG_FUNCTION_ARGS); +Datum +json_validate_f(PG_FUNCTION_ARGS) +{ + text *txt = PG_GETARG_TEXT_PP(0); + + PG_RETURN_BOOL(json_validate(VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt))); +} + +/* json_get_type(json). Renamed to avoid clashing + * with the C function json_get_type. */ +PG_FUNCTION_INFO_V1(json_get_type_f); +Datum json_get_type_f(PG_FUNCTION_ARGS); +Datum +json_get_type_f(PG_FUNCTION_ARGS) +{ + int first_char; + JsonType type; + Oid *label_oids; + + /* Because JSON is condensed on input, leading spaces are removed, + * meaning we can determine the type merely by looking at the + * first character. */ + first_char = PG_DETOAST_DATUM_FIRST_CHAR(PG_GETARG_DATUM(0)); + type = json_get_type(first_char); + + if (!json_type_is_valid(type)) + report_corrupt_json(); + + label_oids = fcinfo->flinfo->fn_extra; + if (label_oids == NULL) + { + label_oids = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + JSON_TYPE_COUNT * sizeof(Oid)); + if (!getEnumLabelOids("json_type", + get_func_namespace(fcinfo->flinfo->fn_oid), + enum_labels, label_oids, JSON_TYPE_COUNT)) + { + /* This should never happen, but if it does... */ + Oid namespace_oid = get_func_namespace(fcinfo->flinfo->fn_oid); + const char *namespace = get_namespace_name(namespace_oid); + + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not read enum %s.json_type", + namespace ? namespace : "(missing namespace)"))); + } + + fcinfo->flinfo->fn_extra = label_oids; + } + + PG_RETURN_OID(label_oids[type]); +} + +/* + * pg_detoast_datum_first_char - Efficiently get the first character of a varlena. + * + * Return -1 if the varlena is empty. + * Otherwise, return the first character casted to an unsigned char. + */ +static int +pg_detoast_datum_first_char(struct varlena * datum) +{ + struct varlena *slice; + + if (VARATT_IS_COMPRESSED(datum) || VARATT_IS_EXTERNAL(datum)) + slice = pg_detoast_datum_slice(datum, 0, 1); + else + slice = datum; + + if (VARSIZE_ANY_EXHDR(slice) < 1) + return -1; + + return (unsigned char) VARDATA_ANY(slice)[0]; +} + +/* + * getEnumLabelOids + * Look up the OIDs of enum labels. Enum label OIDs are needed to + * return values of a custom enum type from a C function. + * + * Callers should typically cache the OIDs produced by this function + * using fn_extra, as retrieving enum label OIDs is somewhat expensive. + * + * Every labels[i].index must be between 0 and count, and oid_out + * must be allocated to hold count items. Note that getEnumLabelOids + * sorts the labels[] array passed to it. + * + * Any labels not found in the enum will have their corresponding + * oid_out entries set to InvalidOid. + * + * If the entire operation fails (most likely when the type does not exist), + * this function will return false. + * + * Sample usage: + * + * -- SQL -- + * CREATE TYPE colors AS ENUM ('red', 'green', 'blue'); + * + * -- C -- + * enum Colors {RED, GREEN, BLUE, COLOR_COUNT}; + * + * static EnumLabel enum_labels[COLOR_COUNT] = + * { + * {RED, "red"}, + * {GREEN, "green"}, + * {BLUE, "blue"} + * }; + * + * Oid *label_oids = palloc(COLOR_COUNT * sizeof(Oid)); + * if (!getEnumLabelOids("colors", PG_PUBLIC_NAMESPACE, + * enum_labels, label_oids, COLOR_COUNT)) + * elog(ERROR, "could not read enum colors"); + * + * PG_RETURN_OID(label_oids[GREEN]); + */ +static bool +getEnumLabelOids(const char *typname, Oid typnamespace, + EnumLabel labels[], Oid oid_out[], int count) +{ + CatCList *list; + Oid enumtypoid; + int total; + int i; + EnumLabel key; + EnumLabel *found; + + if (!OidIsValid(typnamespace)) + return false; + enumtypoid = GetSysCacheOid2(TYPENAMENSP, PointerGetDatum(typname), + ObjectIdGetDatum(typnamespace)); + if (!OidIsValid(enumtypoid)) + return false; + + qsort(labels, count, sizeof(EnumLabel), enum_label_cmp); + + for (i = 0; i < count; i++) + { + /* Initialize oid_out items to InvalidOid. */ + oid_out[i] = InvalidOid; + + /* Make sure EnumLabel indices are in range. */ + Assert(labels[i].index >= 0 && labels[i].index < count); + } + + list = SearchSysCacheList1(ENUMTYPOIDNAME, + ObjectIdGetDatum(enumtypoid)); + total = list->n_members; + + for (i = 0; i < total; i++) + { + HeapTuple tup = &list->members[i]->tuple; + Oid oid = HeapTupleGetOid(tup); + Form_pg_enum en = (Form_pg_enum) GETSTRUCT(tup); + + key.label = NameStr(en->enumlabel); + found = bsearch(&key, labels, count, sizeof(EnumLabel), enum_label_cmp); + if (found != NULL) + oid_out[found->index] = oid; + } + + ReleaseCatCacheList(list); + return true; +} + +static int +enum_label_cmp(const void *left, const void *right) +{ + const char *l = ((EnumLabel *) left)->label; + const char *r = ((EnumLabel *) right)->label; + + return strcmp(l, r); +} diff --git a/contrib/json/sql/condense.sql b/contrib/json/sql/condense.sql new file mode 100644 index 0000000..4117ab8 --- /dev/null +++ b/contrib/json/sql/condense.sql @@ -0,0 +1,18 @@ +SELECT json('"hello"'); +SELECT json($$"hello\u266Bworld"$$); +SELECT json($$"hello\u266bworld"$$); +SELECT json($$"hello♫world"$$); +SELECT json($$ "hello world" $$); +SELECT json($$ { "hello" : "world"} $$); +SELECT json($$ { "hello" : "world", "bye": 0.0001 } $$); +SELECT json($$ { "hello" : "world", + "bye": 0.0000001 +} $$); +SELECT json($$ { "hello" : "world" +, +"bye" +: [-0.1234e1, 12345e0] } $$); +SELECT json($$"\u007E\u007F\u0080"$$); +SELECT json($$"\u00FE\u00FF\u0100"$$); +SELECT json($$"\uD835\uDD4E"$$); +SELECT json($$"\uD835\uD835"$$); diff --git a/contrib/json/sql/escape_unicode.sql b/contrib/json/sql/escape_unicode.sql new file mode 100644 index 0000000..345713b --- /dev/null +++ b/contrib/json/sql/escape_unicode.sql @@ -0,0 +1,26 @@ +SET client_encoding TO UTF8; + +CREATE TABLE escape_unicode_test (json JSON); +INSERT INTO escape_unicode_test VALUES ($$"\u266b\uD835\uDD4E"$$); + +-- Output should not be escaped. +SELECT json FROM escape_unicode_test; +SELECT json::TEXT FROM escape_unicode_test; +SELECT json_stringify(json) FROM escape_unicode_test; +SELECT json_stringify(json, '') FROM escape_unicode_test; + +SET client_encoding TO SQL_ASCII; + +-- Output should still not be escaped. +SELECT json FROM escape_unicode_test; +SELECT json::TEXT FROM escape_unicode_test; +SELECT json_stringify(json) FROM escape_unicode_test; +SELECT json_stringify(json, '') FROM escape_unicode_test; + +SET client_encoding TO LATIN1; + +-- Output should be escaped now. +SELECT json FROM escape_unicode_test; +SELECT json::TEXT FROM escape_unicode_test; +SELECT json_stringify(json) FROM escape_unicode_test; +SELECT json_stringify(json, '') FROM escape_unicode_test; diff --git a/contrib/json/sql/init.sql b/contrib/json/sql/init.sql new file mode 100644 index 0000000..446437d --- /dev/null +++ b/contrib/json/sql/init.sql @@ -0,0 +1,14 @@ +\set ECHO none +SET client_min_messages = fatal; + +CREATE EXTENSION json; +\i json--0.1.sql + +SET client_min_messages = warning; + +\i sql/test_strings.sql +CREATE TABLE valid_test_strings AS + SELECT string FROM test_strings WHERE json_validate(string); + +RESET client_min_messages; +\set ECHO all diff --git a/contrib/json/sql/json.sql b/contrib/json/sql/json.sql new file mode 100644 index 0000000..4e3e0be --- /dev/null +++ b/contrib/json/sql/json.sql @@ -0,0 +1,38 @@ +SELECT '[]'::JSON; +SELECT '['::JSON; +SELECT '[1,2,3]'::JSON; +SELECT '[1,2,3]'::JSON::TEXT; +SELECT '[1,2,3 ]'::JSON; +SELECT '[1,2,3 ,4]'::JSON; +SELECT '[1,2,3 ,4.0]'::JSON; +SELECT '[1,2,3 ,4]'::JSON; +SELECT 'true'::JSON; +SELECT 'true'::TEXT::JSON; +SELECT 'false'::JSON; +SELECT 'null'::JSON; +SELECT '1.1'::JSON; +SELECT '"string"'::JSON; +SELECT '{"key1":"value1", "key2":"value2"}'::JSON; +SELECT 15::JSON; + +SELECT json_get_type('[]'); +SELECT json_get_type('{}'); +SELECT json_get_type('true'); +SELECT json_get_type('false'); +SELECT json_get_type('null'); + +CREATE TABLE testjson (j JSON); +INSERT INTO testjson VALUES ('[1,2,3,4]'); +INSERT INTO testjson VALUES ('{"key":"value"}'); +INSERT INTO testjson VALUES ('{"key":"value"'); +INSERT INTO testjson VALUES (''); +INSERT INTO testjson VALUES ('""'); +INSERT INTO testjson VALUES ('true'); +INSERT INTO testjson VALUES ('false'); +INSERT INTO testjson VALUES ('null'); +INSERT INTO testjson VALUES ('[]'); +INSERT INTO testjson VALUES ('{}'); + +SELECT * FROM testjson; + +SELECT json_get_type(j) FROM testjson; diff --git a/contrib/json/sql/json_stringify.sql b/contrib/json/sql/json_stringify.sql new file mode 100644 index 0000000..e475638 --- /dev/null +++ b/contrib/json/sql/json_stringify.sql @@ -0,0 +1,18 @@ +-- Use unaligned output so results are consistent between PostgreSQL 8 and 9. +\a + +SELECT json_stringify('false', ' '); +SELECT json_stringify('true', ' '); +SELECT json_stringify('null', ' '); +SELECT json_stringify('""', ' '); +SELECT json_stringify('[]', ' '); +SELECT json_stringify('[1]', ' '); +SELECT json_stringify('[1,2]', ' '); +SELECT json_stringify('{}', ' '); +SELECT json_stringify('{"k":"v"}', ' '); +SELECT json_stringify('{"null":null, "boolean":true, "boolean" :false,"array":[1,2,3], "empty array":[], "empty object":{}}', ' '); + +SELECT json_stringify(json(string)) FROM test_strings WHERE json_validate(string); + +-- Turn aligned output back on. +\a diff --git a/contrib/json/sql/relocatable.sql b/contrib/json/sql/relocatable.sql new file mode 100644 index 0000000..722d8a2 --- /dev/null +++ b/contrib/json/sql/relocatable.sql @@ -0,0 +1,41 @@ +-- This test needs to be run last, and will only pass on PostgreSQL 9.1 and up. + +DROP EXTENSION json CASCADE; + +CREATE SCHEMA othernamespace; +CREATE EXTENSION json WITH SCHEMA othernamespace; + +/* + * json_get_type uses its own OID to figure out what schema the type + * json_type is in so it can look up its enum label OIDs. + */ +SELECT othernamespace.json_get_type('[]'); +SELECT othernamespace.json_get_type('{}'); +SELECT othernamespace.json_get_type('"string"'); +SELECT othernamespace.json_get_type('3.14'); +SELECT othernamespace.json_get_type('true'); +SELECT othernamespace.json_get_type('false'); +SELECT othernamespace.json_get_type('null'); + +CREATE TABLE temp (json othernamespace.JSON); +INSERT INTO temp VALUES ('[]'); +INSERT INTO temp VALUES ('{}'); +INSERT INTO temp VALUES ('"string"'); +INSERT INTO temp VALUES ('3.14'); +INSERT INTO temp VALUES ('true'); +INSERT INTO temp VALUES ('null'); +SELECT othernamespace.json_get_type(json) FROM temp; + +ALTER EXTENSION json SET SCHEMA public; + +SELECT json_get_type('[]'); +SELECT json_get_type('{}'); +SELECT json_get_type('"string"'); +SELECT json_get_type('3.14'); +SELECT json_get_type('true'); +SELECT json_get_type('false'); +SELECT json_get_type('null'); +SELECT json_get_type(json) FROM temp; + +DROP TABLE temp; +DROP SCHEMA othernamespace CASCADE; diff --git a/contrib/json/sql/test_strings.sql b/contrib/json/sql/test_strings.sql new file mode 100644 index 0000000..a477209 --- /dev/null +++ b/contrib/json/sql/test_strings.sql @@ -0,0 +1,220 @@ +BEGIN; + +CREATE TABLE test_strings (string TEXT); +INSERT INTO test_strings VALUES ($$$$); +INSERT INTO test_strings VALUES ($$ $$); +INSERT INTO test_strings VALUES ($$"$$); +INSERT INTO test_strings VALUES ($$[,]$$); +INSERT INTO test_strings VALUES ($$[)$$); +INSERT INTO test_strings VALUES ($$[]]$$); +INSERT INTO test_strings VALUES ($$[}$$); +INSERT INTO test_strings VALUES ($${,}$$); +INSERT INTO test_strings VALUES ($${]$$); +INSERT INTO test_strings VALUES ($$["1":2]$$); +INSERT INTO test_strings VALUES ($$[1,2,]$$); +INSERT INTO test_strings VALUES ($$[1:2}$$); +INSERT INTO test_strings VALUES ($${"1":2,}$$); +INSERT INTO test_strings VALUES ($${1:2}$$); +INSERT INTO test_strings VALUES ($${"1":2, "2.5" : [3, 4, {}, {"5": ["6"], [7 ]}]}$$); +INSERT INTO test_strings VALUES ($${"1":2, "2.5" : [3, 4, {}, {"5": ["6"], [7]}]}$$); +INSERT INTO test_strings VALUES ($${"1":2, "2.5" : [3, 4, {}, {"5": ["6"], "7" :[8 ]}]$$); +INSERT INTO test_strings VALUES ($${"1":2, "2.5" : [3, 4, {}, {"5": ["6"], "7" :[8 ]}]]$$); +INSERT INTO test_strings VALUES ($${"1":2, "3":4$$); +INSERT INTO test_strings VALUES ($$"1\u2"$$); +INSERT INTO test_strings VALUES ($$[,2]$$); +INSERT INTO test_strings VALUES ($$"3$$); +INSERT INTO test_strings VALUES ($$"3" "4"$$); +INSERT INTO test_strings VALUES ($$[3[4]$$); +INSERT INTO test_strings VALUES ($$[3[4]]$$); +INSERT INTO test_strings VALUES ($$[3, [4, [5], 6] 7, 8 9]$$); +INSERT INTO test_strings VALUES ($$[3, [4, [5], 6] 7, 8, 9]$$); +INSERT INTO test_strings VALUES ($$[3, [4, [5], 6], 7, 8 9]$$); +INSERT INTO test_strings VALUES ($${"hello":true, "bye":false, null}$$); +INSERT INTO test_strings VALUES ($${"hello":true, "bye":false, null:null}$$); +INSERT INTO test_strings VALUES ($$"hi$$); +INSERT INTO test_strings VALUES ($$"hi"""$$); +INSERT INTO test_strings VALUES ($${"hi": "bye"]$$); +INSERT INTO test_strings VALUES ($$"\uD800\uD800"$$); +INSERT INTO test_strings VALUES ($$"\uD800\uDBFF"$$); +INSERT INTO test_strings VALUES ($$"\UD834\UDD1E"$$); +INSERT INTO test_strings VALUES ($$"\uDB00"$$); +INSERT INTO test_strings VALUES ($$"\uDB00\uDBFF"$$); +INSERT INTO test_strings VALUES ($$"\uFFFE"$$); +INSERT INTO test_strings VALUES ($$"\uFFFF"$$); +INSERT INTO test_strings VALUES ($$.$$); +INSERT INTO test_strings VALUES ($$""$$); +INSERT INTO test_strings VALUES ($$[]$$); +INSERT INTO test_strings VALUES ($${}$$); +INSERT INTO test_strings VALUES ($$+.$$); +INSERT INTO test_strings VALUES ($$0.5$$); +INSERT INTO test_strings VALUES ($$0.e1$$); +INSERT INTO test_strings VALUES ($${"1":{}}$$); +INSERT INTO test_strings VALUES ($${"1":2}$$); +INSERT INTO test_strings VALUES ($${"1":2, "2.5" : [3, 4, {}, {"5": ["6"]}]}$$); +INSERT INTO test_strings VALUES ($${"1":2, "2.5" : [3, 4, {}, {"5": ["6"], "7" :[8 ]}]}$$); +INSERT INTO test_strings VALUES ($$1234$$); +INSERT INTO test_strings VALUES ($$-1234$$); +INSERT INTO test_strings VALUES ($${"1":2, "3":4}$$); +INSERT INTO test_strings VALUES ($$+1234$$); +INSERT INTO test_strings VALUES ($$++1234$$); +INSERT INTO test_strings VALUES ($$123.456e14234$$); +INSERT INTO test_strings VALUES ($$123.456e-14234$$); +INSERT INTO test_strings VALUES ($$123.456e+14234$$); +INSERT INTO test_strings VALUES ($$123.e-14234$$); +INSERT INTO test_strings VALUES ($$"1\u2000"$$); +INSERT INTO test_strings VALUES ($$"1\u20001"$$); +INSERT INTO test_strings VALUES ($$2$$); +INSERT INTO test_strings VALUES ($$.246e-14234$$); +INSERT INTO test_strings VALUES ($$.2e-14234$$); +INSERT INTO test_strings VALUES ($$3$$); +INSERT INTO test_strings VALUES ($$.3$$); +INSERT INTO test_strings VALUES ($$"3"$$); +INSERT INTO test_strings VALUES ($$[3]$$); +INSERT INTO test_strings VALUES ($$+3.$$); +INSERT INTO test_strings VALUES ($$3.2e+1$$); +INSERT INTO test_strings VALUES ($$[3, [4]]$$); +INSERT INTO test_strings VALUES ($$[3, [4, [5]]]$$); +INSERT INTO test_strings VALUES ($$[3, [4, [5], 6]]$$); +INSERT INTO test_strings VALUES ($$[3, [4, [5], 6], 7]$$); +INSERT INTO test_strings VALUES ($$[3, [4, [5], 6], 7, 8]$$); +INSERT INTO test_strings VALUES ($$[3, [4, [5], 6], 7, 8, 9]$$); +INSERT INTO test_strings VALUES ($$+3.5$$); +INSERT INTO test_strings VALUES ($$.3e$$); +INSERT INTO test_strings VALUES ($$.3e1$$); +INSERT INTO test_strings VALUES ($$.3e-1$$); +INSERT INTO test_strings VALUES ($$.3e+1$$); +INSERT INTO test_strings VALUES ($$3.e1$$); +INSERT INTO test_strings VALUES ($$3.e+1$$); +INSERT INTO test_strings VALUES ($$3e+1$$); +INSERT INTO test_strings VALUES ($$.5$$); +INSERT INTO test_strings VALUES ($$+.5$$); +INSERT INTO test_strings VALUES ($$.5e+1$$); +INSERT INTO test_strings VALUES ($$[ 7]$$); +INSERT INTO test_strings VALUES ($$[7 ]$$); +INSERT INTO test_strings VALUES ($$[7]$$); +INSERT INTO test_strings VALUES ($$.e-14234$$); +INSERT INTO test_strings VALUES ($$"hello"$$); +INSERT INTO test_strings VALUES ($$["hello"]$$); +INSERT INTO test_strings VALUES ($$["hello", "bye"]$$); +INSERT INTO test_strings VALUES ($$["hello", "bye\n"]$$); +INSERT INTO test_strings VALUES ($$["hello", "bye\n\r\t"]$$); +INSERT INTO test_strings VALUES ($$["hello", "bye\n\r\t\b"]$$); +INSERT INTO test_strings VALUES ($$["hello", "bye\n\r\t\b",true]$$); +INSERT INTO test_strings VALUES ($$["hello", "bye\n\r\t\b",true , false]$$); +INSERT INTO test_strings VALUES ($$["hello", "bye\n\r\t\b",true , false, null]$$); +INSERT INTO test_strings VALUES ($$["hello", "bye\n\r\t\v"]$$); +INSERT INTO test_strings VALUES ($${"hello":true}$$); +INSERT INTO test_strings VALUES ($${"hello":true, "bye":false}$$); +INSERT INTO test_strings VALUES ($${"hello":true, "bye":false, "foo":["one","two","three"]}$$); +INSERT INTO test_strings VALUES ($$"hi"$$); +INSERT INTO test_strings VALUES ($$["hi"]$$); +INSERT INTO test_strings VALUES ($$["hi", "bye"]$$); +INSERT INTO test_strings VALUES ($${"hi": "bye"}$$); +INSERT INTO test_strings VALUES ($$["hi", "bye", 3]$$); +INSERT INTO test_strings VALUES ($$["hi", "bye[", 3]$$); +INSERT INTO test_strings VALUES ($$"\u0007"$$); +INSERT INTO test_strings VALUES ($$"\u0008"$$); +INSERT INTO test_strings VALUES ($$"\u0009"$$); +INSERT INTO test_strings VALUES ($$"\u0010"$$); +INSERT INTO test_strings VALUES ($$"\u0020"$$); +INSERT INTO test_strings VALUES ($$"\u10000"$$); +INSERT INTO test_strings VALUES ($$"\u1234"$$); +INSERT INTO test_strings VALUES ($$"\u99999"$$); +INSERT INTO test_strings VALUES ($$"\ud800\udc00"$$); +INSERT INTO test_strings VALUES ($$"\uD800\uDC00"$$); +INSERT INTO test_strings VALUES ($$"\uD834\uDD1E"$$); +INSERT INTO test_strings VALUES ($$"\uDBFF\uDFFF"$$); +INSERT INTO test_strings VALUES ($$"\uFFFD"$$); +INSERT INTO test_strings VALUES ($$"\uFFFF"$$); +INSERT INTO test_strings VALUES ($$hello$$); +INSERT INTO test_strings VALUES ($$[32, 1]$$); +INSERT INTO test_strings VALUES ($$[32, $$); +INSERT INTO test_strings VALUES ($$"\uD800\uDC00"$$); +INSERT INTO test_strings VALUES ($$"\n"$$); +INSERT INTO test_strings VALUES ($$"hello"$$); +INSERT INTO test_strings VALUES ($$"hello\u0009world"$$); +INSERT INTO test_strings VALUES ($$"hello"$$); +INSERT INTO test_strings VALUES ($$"hello\n"$$); +INSERT INTO test_strings VALUES ($$"hello"$$); +INSERT INTO test_strings VALUES ($$3$$); +INSERT INTO test_strings VALUES ($$3.$$); +INSERT INTO test_strings VALUES ($$.3$$); +INSERT INTO test_strings VALUES ($$0.3$$); +INSERT INTO test_strings VALUES ($$0.3e$$); +INSERT INTO test_strings VALUES ($$0.3e+$$); +INSERT INTO test_strings VALUES ($$0.3e+5$$); +INSERT INTO test_strings VALUES ($$0.3e-5$$); +INSERT INTO test_strings VALUES ($$0.3e5$$); +INSERT INTO test_strings VALUES ($$"hello"$$); +INSERT INTO test_strings VALUES ($$+3$$); +INSERT INTO test_strings VALUES ($$-3$$); +INSERT INTO test_strings VALUES ($$-3.$$); +INSERT INTO test_strings VALUES ($$-3.1$$); +INSERT INTO test_strings VALUES ($$.5$$); +INSERT INTO test_strings VALUES ($$5.$$); +INSERT INTO test_strings VALUES ($$5.e1$$); +INSERT INTO test_strings VALUES ($$0.5$$); +INSERT INTO test_strings VALUES ($$.3e1$$); +INSERT INTO test_strings VALUES ($$.3e+1$$); +INSERT INTO test_strings VALUES ($$.3e-1$$); +INSERT INTO test_strings VALUES ($$.3e-1 .5$$); +INSERT INTO test_strings VALUES ($$.3e-1.5$$); +INSERT INTO test_strings VALUES ($$.3e+1.5$$); +INSERT INTO test_strings VALUES ($$.3e+.$$); +INSERT INTO test_strings VALUES ($$.3e+.5$$); +INSERT INTO test_strings VALUES ($$.3e+1.5$$); +INSERT INTO test_strings VALUES ($$9.3e+1.5$$); +INSERT INTO test_strings VALUES ($$9.e+1.5$$); +INSERT INTO test_strings VALUES ($$9.e+$$); +INSERT INTO test_strings VALUES ($$9.e+1$$); +INSERT INTO test_strings VALUES ($$"\""$$); +INSERT INTO test_strings VALUES ($$"\"3.5"$$); +INSERT INTO test_strings VALUES ($$"\"."$$); +INSERT INTO test_strings VALUES ($$"\".".$$); +INSERT INTO test_strings VALUES ($$"\"....."$$); +INSERT INTO test_strings VALUES ($$"\"\"\"\"""$$); +INSERT INTO test_strings VALUES ($$["\"\"\"\"", .5]$$); +INSERT INTO test_strings VALUES ($$[.5]$$); +INSERT INTO test_strings VALUES ($$["\"\"\"\"", 0.5]$$); +INSERT INTO test_strings VALUES ($$["\"\"\"\"", .5]$$); +INSERT INTO test_strings VALUES ($$["\"\"\"\"",.5]$$); +INSERT INTO test_strings VALUES ($$["\"",.5]$$); +INSERT INTO test_strings VALUES ($$["\".5",.5]$$); +INSERT INTO test_strings VALUES ($$["\".5",".5\"".5]$$); +INSERT INTO test_strings VALUES ($$["\".5",".5\"", .5]$$); +INSERT INTO test_strings VALUES ($$["\".5",".5\"",.5]$$); +INSERT INTO test_strings VALUES ($$["\".5",".5\"",0.5]$$); +INSERT INTO test_strings VALUES ($${"key":/*comment*/"value"}$$); +INSERT INTO test_strings VALUES ($${"key":/*comment"value"}$$); +INSERT INTO test_strings VALUES ($${"key":"value"}/*$$); +INSERT INTO test_strings VALUES ($${"key":"value"}/**/$$); +INSERT INTO test_strings VALUES ($${"key":"value"}/***/$$); +INSERT INTO test_strings VALUES ($${"key":"value"}/**//$$); +INSERT INTO test_strings VALUES ($${"key":"value"}/**///$$); +INSERT INTO test_strings VALUES ($${"key":"value"}/**///----$$); +INSERT INTO test_strings VALUES ($${"key":"value"}#$$); +INSERT INTO test_strings VALUES ($${"key":"value"}#{$$); +INSERT INTO test_strings VALUES ($${"key":"value"}#{}$$); +INSERT INTO test_strings VALUES ($${"key":"value"}#,$$); +INSERT INTO test_strings VALUES ($${"key":"value"/**/, "k2":"v2"}$$); +INSERT INTO test_strings VALUES ($$"\u0027"$$); +INSERT INTO test_strings VALUES ($$"hello\'"$$); +INSERT INTO test_strings VALUES ($$'hello\''$$); +INSERT INTO test_strings VALUES ($$'hello'$$); +INSERT INTO test_strings VALUES ($$'hell\'o'$$); +INSERT INTO test_strings VALUES ($$'\'hello'$$); +INSERT INTO test_strings VALUES ($$'\'hello\''$$); +INSERT INTO test_strings VALUES ($$\'hello\'$$); +INSERT INTO test_strings VALUES ($$'hello\'$$); +INSERT INTO test_strings VALUES ($$['hello\']$$); +INSERT INTO test_strings VALUES ($$['hello\'']$$); +INSERT INTO test_strings VALUES ($$['hello"']$$); +INSERT INTO test_strings VALUES ($$['hello\"']$$); +INSERT INTO test_strings VALUES ($$['hello"o']$$); +INSERT INTO test_strings VALUES ($$['"']$$); +INSERT INTO test_strings VALUES ($$'"'$$); +INSERT INTO test_strings VALUES ($$'"hello"'$$); +INSERT INTO test_strings VALUES ($$'"hello'$$); +INSERT INTO test_strings VALUES ($$'"hi"'$$); + +COMMIT; diff --git a/contrib/json/sql/validate.sql b/contrib/json/sql/validate.sql new file mode 100644 index 0000000..32da7f7 --- /dev/null +++ b/contrib/json/sql/validate.sql @@ -0,0 +1 @@ +SELECT json_validate(string), string FROM test_strings; diff --git a/contrib/json/uninstall_json.sql b/contrib/json/uninstall_json.sql new file mode 100644 index 0000000..46cf213 --- /dev/null +++ b/contrib/json/uninstall_json.sql @@ -0,0 +1,7 @@ +-- Adjust this setting to control where the objects get dropped. +SET search_path = public; + +DROP TYPE json CASCADE; +DROP TYPE json_type; + +DROP FUNCTION json_validate(text); diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index adf09ca..a126e6d 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -106,6 +106,7 @@ CREATE EXTENSION module_name FROM unpackaged; &intagg; &intarray; &isn; + &json; &lo; <ree; &oid2name; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 1a60796..7d62811 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -112,6 +112,7 @@ + diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml new file mode 100644 index 0000000..0fb8c24 --- /dev/null +++ b/doc/src/sgml/json.sgml @@ -0,0 +1,136 @@ + + json + + + json + + + + This module implements the json data type for storing + JSON data in PostgreSQL. + The advantage of using the json type over storing JSON data in a + text field is that it performs JSON validation automatically, and + there will be several type-safe functions for manipulating JSON content. + + + + The json type stores valid JSON "values" as defined by + json.org. That is, a json + field can hold a string, number, object, array, 'true', + 'false', or 'null'. Be warned, though, + that the JSON standard + defines a top-level JSON text as an object or array, and many JSON libraries + will only accept an object or array. + + + + The json datatype is stored internally as JSON-formatted text and + condensed, on input, to the smallest size possible + If the server encoding is not UTF-8, escapes representing non-ASCII + characters (e.g. "\u266B") are not converted to their + respective characters (even when the server encoding has them) because it + would introduce a performance penalty. + . + For example, SELECT ' "json\u0020string" '::json; will + yield '"json string"'. Also, bear in mind that JSON null + ('null'::json) and SQL NULL (NULL::json) + are two different things. + + + + The json module is currently under development. + + + + <type>json</> Functions + + + <type>json</type> Functions + + + + + Function + Return Type + Description + Example + Result + + + + + + json_validate(text) + boolean + Determine if text is valid JSON. + json_validate('{key: "value"}') + false + + + json_validate('{"key": "value"}') + true + + + json_get_type(json) + json_type -  one of: + +'null' +'string' +'number' +'bool' +'object' +'array' + + + Get the type of a json value. + json_get_type('{"pi": "3.14159", "e": "2.71828"}') + 'object' + + + json_stringify(json) + text + Convert json to text. Currently, + json_stringify(x) is equivalent to + x::text. + + json_stringify('{"key":"value","array":[1,2,3]}') + {"key":"value","array":[1,2,3]} + + + json_stringify(json, indent text) + text + Convert json to text, adding spaces and indentation for readability. + json_stringify('{"a":true,"b":false,"array":[1,2,3]}', ' ') + + +{ + "a": true, + "b": false, + "array": [ + 1, + 2, + 3 + ] +} + + + + + +
+ +
+ + + Author + + + Joey Adams joeyadams3.14159@gmail.com + + + + Development of this module was sponsored by Google through its Google Summer of Code program (code.google.com/soc). + + + +