commit 1b540015f48a05680666b81129aba468a5118ff9 Author: Alexander Korotkov Date: Thu Mar 21 16:21:44 2019 +0300 Get rid of backtracking in jsonpath_scan.l Non-backtracking flex parsers work faster than backtracking ones. So, this commit gets rid of backtracking in jsonpath_scan.l. That required explicit handling of some cases as well as manual backtracking for some cases. More regression tests for numerics are added. Discussion: https://mail.google.com/mail/u/0?ik=a20b091faa&view=om&permmsgid=msg-f%3A1628425344167939063 Author: John Naylor, Nikita Gluknov, Alexander Korotkov diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index b64ab4ed88a..4ef769749da 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -34,6 +34,7 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \ windowfuncs.o xid.o xml.o jsonpath_scan.c: FLEXFLAGS = -CF -p -p +jsonpath_scan.c: FLEX_NO_BACKUP=yes # Force these dependencies to be known even without dependency info built: jsonpath_gram.o: jsonpath_scan.c diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index 590517cafb2..844ea5e7cde 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -75,9 +75,20 @@ fprintf_to_ereport(const char *fmt, const char *msg) special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f] blank [ \t\n\r\f] + +digit [0-9] +integer {digit}+ +decimal {digit}*\.{digit}+ +decimalfail {digit}+\. +real ({integer}|{decimal})[Ee][-+]?{digit}+ +realfail1 ({integer}|{decimal})[Ee] +realfail2 ({integer}|{decimal})[Ee][-+] + hex_dig [0-9A-Fa-f] unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\}) +unicodefail \\u({hex_dig}{0,3}|\{{hex_dig}{0,6}) hex_char \\x{hex_dig}{2} +hex_fail \\x{hex_dig}{0,1} %% @@ -128,9 +139,21 @@ hex_char \\x{hex_dig}{2} {hex_char}+ { parseHexChars(yytext, yyleng); } -\\x { yyerror(NULL, "Hex character sequence is invalid"); } +{unicode}*{unicodefail} { yyerror(NULL, "Unicode sequence is invalid"); } -\\u { yyerror(NULL, "Unicode sequence is invalid"); } +{hex_char}*{hex_fail} { yyerror(NULL, "Hex character sequence is invalid"); } + +{unicode}+\\ { + /* throw back the \\, and treat as unicode */ + yyless(yyleng - 1); + parseUnicode(yytext, yyleng); + } + +{hex_char}+\\ { + /* throw back the \\, and treat as hex */ + yyless(yyleng - 1); + parseHexChars(yytext, yyleng); + } \\. { yyerror(NULL, "Escape sequence is invalid"); } @@ -211,34 +234,38 @@ hex_char \\x{hex_dig}{2} BEGIN xc; } -[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */ +{real} { addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return NUMERIC_P; } -\.[0-9]+[eE][+-]?[0-9]+ { /* float */ +{decimal} { addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return NUMERIC_P; } -([0-9]+)?\.[0-9]+ { +{integer} { addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; - return NUMERIC_P; + return INT_P; } -[0-9]+ { +{decimalfail} { + /* throw back the ., and treat as integer */ + yyless(yyleng - 1); addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return INT_P; } +({realfail1}|{realfail2}) { yyerror(NULL, "Floating point number is invalid"); } + {any}+ { addstring(true, yytext, yyleng); BEGIN xnq; @@ -567,7 +594,7 @@ addUnicode(int ch, int *hi_surrogate) static void parseUnicode(char *s, int l) { - int i; + int i = 2; int hi_surrogate = -1; for (i = 2; i < l; i += 2) /* skip '\u' */ diff --git a/src/test/regress/expected/jsonb_jsonpath.out b/src/test/regress/expected/jsonb_jsonpath.out index e604bae6a3f..4a84d9157fa 100644 --- a/src/test/regress/expected/jsonb_jsonpath.out +++ b/src/test/regress/expected/jsonb_jsonpath.out @@ -1297,7 +1297,7 @@ select jsonb_path_query('null', 'true.type()'); "boolean" (1 row) -select jsonb_path_query('null', '123.type()'); +select jsonb_path_query('null', '(123).type()'); jsonb_path_query ------------------ "number" diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out index baaf9e36670..b7de4915038 100644 --- a/src/test/regress/expected/jsonpath.out +++ b/src/test/regress/expected/jsonpath.out @@ -365,6 +365,18 @@ select '1.type()'::jsonpath; 1.type() (1 row) +select '(1).type()'::jsonpath; + jsonpath +---------- + 1.type() +(1 row) + +select '1.2.type()'::jsonpath; + jsonpath +------------ + 1.2.type() +(1 row) + select '"aaa".type()'::jsonpath; jsonpath -------------- @@ -804,3 +816,159 @@ select '$ ? (@.a < +10.1e+1)'::jsonpath; $?(@."a" < 101) (1 row) +select '0'::jsonpath; + jsonpath +---------- + 0 +(1 row) + +select '00'::jsonpath; + jsonpath +---------- + 0 +(1 row) + +select '0.0'::jsonpath; + jsonpath +---------- + 0.0 +(1 row) + +select '0.000'::jsonpath; + jsonpath +---------- + 0.000 +(1 row) + +select '0.000e1'::jsonpath; + jsonpath +---------- + 0.00 +(1 row) + +select '0.000e2'::jsonpath; + jsonpath +---------- + 0.0 +(1 row) + +select '0.000e3'::jsonpath; + jsonpath +---------- + 0 +(1 row) + +select '0.0010'::jsonpath; + jsonpath +---------- + 0.0010 +(1 row) + +select '0.0010e-1'::jsonpath; + jsonpath +---------- + 0.00010 +(1 row) + +select '0.0010e+1'::jsonpath; + jsonpath +---------- + 0.010 +(1 row) + +select '0.0010e+2'::jsonpath; + jsonpath +---------- + 0.10 +(1 row) + +select '1e'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '1e'::jsonpath; + ^ +DETAIL: Floating point number is invalid at or near "1e" +select '1.e'::jsonpath; + jsonpath +---------- + 1."e" +(1 row) + +select '1.2e'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '1.2e'::jsonpath; + ^ +DETAIL: Floating point number is invalid at or near "1.2e" +select '1.2.e'::jsonpath; + jsonpath +---------- + 1.2."e" +(1 row) + +select '(1.2).e'::jsonpath; + jsonpath +---------- + 1.2."e" +(1 row) + +select '1e3'::jsonpath; + jsonpath +---------- + 1000 +(1 row) + +select '1.e3'::jsonpath; + jsonpath +---------- + 1."e3" +(1 row) + +select '1.e3.e'::jsonpath; + jsonpath +------------ + 1."e3"."e" +(1 row) + +select '1.e3.e4'::jsonpath; + jsonpath +------------- + 1."e3"."e4" +(1 row) + +select '1.2e3'::jsonpath; + jsonpath +---------- + 1200 +(1 row) + +select '1.2.e3'::jsonpath; + jsonpath +---------- + 1.2."e3" +(1 row) + +select '(1.2).e3'::jsonpath; + jsonpath +---------- + 1.2."e3" +(1 row) + +select '1..e'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '1..e'::jsonpath; + ^ +DETAIL: syntax error, unexpected '.' at or near "." +select '1..e3'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '1..e3'::jsonpath; + ^ +DETAIL: syntax error, unexpected '.' at or near "." +select '(1.).e'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '(1.).e'::jsonpath; + ^ +DETAIL: syntax error, unexpected ')' at or near ")" +select '(1.).e3'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '(1.).e3'::jsonpath; + ^ +DETAIL: syntax error, unexpected ')' at or near ")" diff --git a/src/test/regress/sql/jsonb_jsonpath.sql b/src/test/regress/sql/jsonb_jsonpath.sql index 41b346b2d4d..28c861bb179 100644 --- a/src/test/regress/sql/jsonb_jsonpath.sql +++ b/src/test/regress/sql/jsonb_jsonpath.sql @@ -269,7 +269,7 @@ select jsonb_path_query('[null,1,true,"a",[],{}]', 'lax $.type()'); select jsonb_path_query('[null,1,true,"a",[],{}]', '$[*].type()'); select jsonb_path_query('null', 'null.type()'); select jsonb_path_query('null', 'true.type()'); -select jsonb_path_query('null', '123.type()'); +select jsonb_path_query('null', '(123).type()'); select jsonb_path_query('null', '"123".type()'); select jsonb_path_query('{"a": 2}', '($.a - 5).abs() + 10'); diff --git a/src/test/regress/sql/jsonpath.sql b/src/test/regress/sql/jsonpath.sql index e5f3391a666..9171ddbc6cd 100644 --- a/src/test/regress/sql/jsonpath.sql +++ b/src/test/regress/sql/jsonpath.sql @@ -66,6 +66,8 @@ select '$[$[0] ? (last > 0)]'::jsonpath; select 'null.type()'::jsonpath; select '1.type()'::jsonpath; +select '(1).type()'::jsonpath; +select '1.2.type()'::jsonpath; select '"aaa".type()'::jsonpath; select 'true.type()'::jsonpath; select '$.double().floor().ceiling().abs()'::jsonpath; @@ -145,3 +147,31 @@ select '$ ? (@.a < +0.1e+1)'::jsonpath; select '$ ? (@.a < 10.1e+1)'::jsonpath; select '$ ? (@.a < -10.1e+1)'::jsonpath; select '$ ? (@.a < +10.1e+1)'::jsonpath; + +select '0'::jsonpath; +select '00'::jsonpath; +select '0.0'::jsonpath; +select '0.000'::jsonpath; +select '0.000e1'::jsonpath; +select '0.000e2'::jsonpath; +select '0.000e3'::jsonpath; +select '0.0010'::jsonpath; +select '0.0010e-1'::jsonpath; +select '0.0010e+1'::jsonpath; +select '0.0010e+2'::jsonpath; +select '1e'::jsonpath; +select '1.e'::jsonpath; +select '1.2e'::jsonpath; +select '1.2.e'::jsonpath; +select '(1.2).e'::jsonpath; +select '1e3'::jsonpath; +select '1.e3'::jsonpath; +select '1.e3.e'::jsonpath; +select '1.e3.e4'::jsonpath; +select '1.2e3'::jsonpath; +select '1.2.e3'::jsonpath; +select '(1.2).e3'::jsonpath; +select '1..e'::jsonpath; +select '1..e3'::jsonpath; +select '(1.).e'::jsonpath; +select '(1.).e3'::jsonpath;