From 2387de10cf241f86f6987ba310f59594cae6b64f Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Fri, 22 Mar 2019 15:15:38 +0300 Subject: [PATCH] Fix parsing of identifiers in jsonpath --- src/backend/utils/adt/jsonpath.c | 11 ++- src/backend/utils/adt/jsonpath_gram.y | 6 +- src/backend/utils/adt/jsonpath_scan.l | 125 +++++++++++++++-------------- src/test/regress/expected/jsonpath.out | 138 +++++++++++++++++++++++++++++---- src/test/regress/sql/jsonpath.sql | 21 +++++ 5 files changed, 216 insertions(+), 85 deletions(-) diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c index 7f32248..f43aeef 100644 --- a/src/backend/utils/adt/jsonpath.c +++ b/src/backend/utils/adt/jsonpath.c @@ -494,9 +494,14 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey, escape_json(buf, jspGetString(v, NULL)); break; case jpiVariable: - appendStringInfoChar(buf, '$'); - escape_json(buf, jspGetString(v, NULL)); - break; + { + int32 len; + char *name = jspGetString(v, &len); + + appendStringInfoChar(buf, '$'); + appendBinaryStringInfo(buf, name, len); + break; + } case jpiNumeric: appendStringInfoString(buf, DatumGetCString(DirectFunctionCall1(numeric_out, diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index 1725502..196a191 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -334,8 +334,10 @@ makeItemVariable(JsonPathString *s) JsonPathParseItem *v; v = makeItemType(jpiVariable); - v->value.string.val = s->val; - v->value.string.len = s->len; + + /* skip leading '$' */ + v->value.string.val = &s->val[1]; + v->value.string.len = s->len - 1; return v; } diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index 2165ffc..2c79bf0 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -20,6 +20,8 @@ #include "mb/pg_wchar.h" #include "nodes/pg_list.h" +#define JSONPATH_SPECIAL_CHARS "?%.[]{}()|&!=<>@#,*:-+/~`;\\\"' \b\f\n\r\t\v" + static JsonPathString scanstring; /* Handles to the buffer that the lexer uses internally */ @@ -63,7 +65,7 @@ fprintf_to_ereport(const char *fmt, const char *msg) * quoted variable names and C-tyle comments. * Exclusive states: * - quoted strings - * - non-quoted strings + * - non-quoted identifiers * - quoted variable names * - single-quoted strings * - C-style comment @@ -75,9 +77,12 @@ fprintf_to_ereport(const char *fmt, const char *msg) %x xsq %x xc -special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] -any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f] -blank [ \t\n\r\f] +special [\?\%\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\~\`\;] +id_start [^\?\%\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\~\`\;\\\"\' \b\f\n\r\t\v(0-9)] +blank [ \b\f\n\r\t\v] + +id_cont ({id_start}|[0-9]) +id {id_start}{id_cont}* digit [0-9] integer (0|[1-9]{digit}*) @@ -95,68 +100,60 @@ hex_fail \\x{hex_dig}{0,1} %% -{any}+ { - addstring(false, yytext, yyleng); - } +\\[\"\'\\] { addchar(false, yytext[1]); } -{blank}+ { - yylval->str = scanstring; - BEGIN INITIAL; - return checkKeyword(); - } +\\b { addchar(false, '\b'); } +\\f { addchar(false, '\f'); } -\/\* { - yylval->str = scanstring; - BEGIN xc; - } +\\n { addchar(false, '\n'); } -({special}|\"|\') { - yylval->str = scanstring; - yyless(0); - BEGIN INITIAL; - return checkKeyword(); - } +\\r { addchar(false, '\r'); } -<> { - yylval->str = scanstring; - BEGIN INITIAL; - return checkKeyword(); - } +\\t { addchar(false, '\t'); } -\\[\"\'\\] { addchar(false, yytext[1]); } +\\v { addchar(false, '\v'); } -\\b { addchar(false, '\b'); } +{unicode}+ { parseUnicode(yytext, yyleng); } -\\f { addchar(false, '\f'); } +{hex_char} { parseHexChar(yytext); } -\\n { addchar(false, '\n'); } +{hex_fail} { yyerror(NULL, "invalid hex character sequence"); } -\\r { addchar(false, '\r'); } +{unicode}*{unicodefail} { + yyerror(NULL, "invalid unicode sequence"); + } -\\t { addchar(false, '\t'); } +{unicode}+\\ { + /* throw back the \\, and treat as unicode */ + yyless(yyleng - 1); + parseUnicode(yytext, yyleng); + } -\\v { addchar(false, '\v'); } +\\. { yyerror(NULL, "escape sequence is invalid"); } -{unicode}+ { parseUnicode(yytext, yyleng); } +\\ { yyerror(NULL, "unexpected end after backslash"); } -{hex_char} { parseHexChar(yytext); } +<> { yyerror(NULL, "unexpected end of quoted string"); } -{unicode}*{unicodefail} { yyerror(NULL, "invalid unicode sequence"); } -{hex_fail} { yyerror(NULL, "invalid hex character sequence"); } +{id_cont}+ { addstring(false, yytext, yyleng); } -{unicode}+\\ { - /* throw back the \\, and treat as unicode */ +({special}|{blank}|[\'\"]) { + /* throw back the special symbol and return id */ yyless(yyleng - 1); - parseUnicode(yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); } -\\. { yyerror(NULL, "escape sequence is invalid"); } - -\\ { yyerror(NULL, "unexpected end after backslash"); } - -<> { yyerror(NULL, "unexpected end of quoted string"); } +<> { + addchar(false, '\0'); + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); + } \" { yylval->str = scanstring; @@ -210,18 +207,6 @@ hex_fail \\x{hex_dig}{0,1} \> { return GREATER_P; } -\${any}+ { - addstring(true, yytext + 1, yyleng - 1); - addchar(false, '\0'); - yylval->str = scanstring; - return VARIABLE_P; - } - -\$\" { - addchar(true, '\0'); - BEGIN xvq; - } - {special} { return *yytext; } {blank}+ { /* ignore */ } @@ -263,11 +248,18 @@ hex_fail \\x{hex_dig}{0,1} ({realfail1}|{realfail2}) { yyerror(NULL, "invalid floating point number"); } -{any}+ { + +{id_start}{id_cont}* { addstring(true, yytext, yyleng); BEGIN xnq; } +{unicode}+ { + addstring(true, "", 0); + parseUnicode(yytext, yyleng); + BEGIN xnq; + } + \" { addchar(true, '\0'); BEGIN xq; @@ -278,11 +270,7 @@ hex_fail \\x{hex_dig}{0,1} BEGIN xsq; } -\\ { - yyless(0); - addchar(true, '\0'); - BEGIN xnq; - } +\\ { yyerror(NULL, "invalid escape sequence"); } <> { yyterminate(); } @@ -354,6 +342,12 @@ checkKeyword() *StopHigh = keywords + lengthof(keywords), *StopMiddle; + if (strcspn(scanstring.val, JSONPATH_SPECIAL_CHARS) < scanstring.len) + jsonpath_yyerror(NULL, "invalid characters in identifier"); + + if (scanstring.val[0] == '$') + return scanstring.len == 1 ? '$' : VARIABLE_P; + if (scanstring.len > keywords[lengthof(keywords) - 1].len) return res; @@ -604,6 +598,9 @@ parseUnicode(char *s, int l) while (s[++i] != '}' && i < l) ch = (ch << 4) | hexval(s[i]); i++; /* skip '}' */ + + if (ch > 0x10FFFF) + jsonpath_yyerror(NULL, "invalid Unicode escape value"); } else /* parse '\uXXXX' */ { diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out index ea42ae3..14221d4 100644 --- a/src/test/regress/expected/jsonpath.out +++ b/src/test/regress/expected/jsonpath.out @@ -190,11 +190,75 @@ select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath; (1 row) select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath; +ERROR: escape sequence is invalid at or near "\x" of jsonpath input +LINE 1: select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::json... + ^ +select '$."foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar"'::jsonpath; jsonpath --------------------- $."fooPgSQL\t\"bar" (1 row) +select '$.\u12345.a\u1234.b\u{12}34'::jsonpath; + jsonpath +------------------------- + $."ሴ5"."aሴ"."b\u001234" +(1 row) + +select '$.\u123'::jsonpath; +ERROR: invalid unicode sequence at or near "\u123" of jsonpath input +LINE 1: select '$.\u123'::jsonpath; + ^ +select '$.\u{}'::jsonpath; +ERROR: invalid unicode sequence at or near "\u{" of jsonpath input +LINE 1: select '$.\u{}'::jsonpath; + ^ +select '$.\u{1}'::jsonpath; + jsonpath +------------ + $."\u0001" +(1 row) + +select '$.\u{20}'::jsonpath; +ERROR: invalid characters in identifier at end of jsonpath input +LINE 1: select '$.\u{20}'::jsonpath; + ^ +select '$."\u{20}"'::jsonpath; + jsonpath +---------- + $." " +(1 row) + +select '$.\u{21}'::jsonpath; +ERROR: invalid characters in identifier at end of jsonpath input +LINE 1: select '$.\u{21}'::jsonpath; + ^ +select '$."\u{21}"'::jsonpath; + jsonpath +---------- + $."!" +(1 row) + +select '$.\u{12345}'::jsonpath; + jsonpath +---------- + $."𒍅" +(1 row) + +select '$.\u{10FFFF}'::jsonpath; + jsonpath +---------- + $."" +(1 row) + +select '$.\u{110000}'::jsonpath; +ERROR: invalid Unicode escape value at or near "\u{110000}" of jsonpath input +LINE 1: select '$.\u{110000}'::jsonpath; + ^ +select '$.\x123'::jsonpath; +ERROR: invalid escape sequence at or near "\" of jsonpath input +LINE 1: select '$.\x123'::jsonpath; + ^ select '$.g ? ($.a == 1)'::jsonpath; jsonpath -------------------- @@ -282,25 +346,59 @@ select '$.g ? (+@.x >= +-(+@.a + 2))'::jsonpath; select '$a'::jsonpath; jsonpath ---------- - $"a" + $a +(1 row) + +select '$_'::jsonpath; + jsonpath +---------- + $_ +(1 row) + +select '$123'::jsonpath; + jsonpath +---------- + $123 +(1 row) + +select '$$$'::jsonpath; + jsonpath +---------- + $$$ +(1 row) + +select '$_$$1_3a'::jsonpath; + jsonpath +---------- + $_$$1_3a +(1 row) + +select '$\u12345'::jsonpath; + jsonpath +---------- + $ሴ5 (1 row) +select '$\u{20}'::jsonpath; +ERROR: invalid characters in identifier at end of jsonpath input +LINE 1: select '$\u{20}'::jsonpath; + ^ select '$a.b'::jsonpath; jsonpath ---------- - $"a"."b" + $a."b" (1 row) select '$a[*]'::jsonpath; jsonpath ---------- - $"a"[*] + $a[*] (1 row) select '$.g ? (@.zip == $zip)'::jsonpath; - jsonpath ---------------------------- - $."g"?(@."zip" == $"zip") + jsonpath +------------------------- + $."g"?(@."zip" == $zip) (1 row) select '$.a[1,2, 3 to 16]'::jsonpath; @@ -310,9 +408,9 @@ select '$.a[1,2, 3 to 16]'::jsonpath; (1 row) select '$.a[$a + 1, ($b[*]) to -($[0] * 2)]'::jsonpath; - jsonpath ----------------------------------------- - $."a"[$"a" + 1,$"b"[*] to -($[0] * 2)] + jsonpath +------------------------------------ + $."a"[$a + 1,$b[*] to -($[0] * 2)] (1 row) select '$.a[$.a.size() - 3]'::jsonpath; @@ -408,9 +506,9 @@ select '$ ? (@ starts with "abc")'::jsonpath; (1 row) select '$ ? (@ starts with $var)'::jsonpath; - jsonpath --------------------------- - $?(@ starts with $"var") + jsonpath +------------------------ + $?(@ starts with $var) (1 row) select '$ ? (@ like_regex "(invalid pattern")'::jsonpath; @@ -481,9 +579,9 @@ select '$ < 1'::jsonpath; (1 row) select '($ < 1) || $.a.b <= $x'::jsonpath; - jsonpath ------------------------------- - ($ < 1 || $."a"."b" <= $"x") + jsonpath +---------------------------- + ($ < 1 || $."a"."b" <= $x) (1 row) select '@ + 1'::jsonpath; @@ -815,9 +913,17 @@ select '0'::jsonpath; (1 row) select '00'::jsonpath; -ERROR: syntax error, unexpected IDENT_P at end of jsonpath input +ERROR: syntax error, unexpected INT_P, expecting $end at or near "0" of jsonpath input LINE 1: select '00'::jsonpath; ^ +select '$.00'::jsonpath; +ERROR: syntax error, unexpected INT_P at or near "0" of jsonpath input +LINE 1: select '$.00'::jsonpath; + ^ +select '$.0a'::jsonpath; +ERROR: syntax error, unexpected INT_P at or near "0" of jsonpath input +LINE 1: select '$.0a'::jsonpath; + ^ select '0.0'::jsonpath; jsonpath ---------- diff --git a/src/test/regress/sql/jsonpath.sql b/src/test/regress/sql/jsonpath.sql index 29ea77a..9e44f1d 100644 --- a/src/test/regress/sql/jsonpath.sql +++ b/src/test/regress/sql/jsonpath.sql @@ -34,6 +34,19 @@ select '''\b\f\r\n\t\v\"\''\\'''::jsonpath; select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath; select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath; select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath; +select '$."foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar"'::jsonpath; +select '$.\u12345.a\u1234.b\u{12}34'::jsonpath; +select '$.\u123'::jsonpath; +select '$.\u{}'::jsonpath; +select '$.\u{1}'::jsonpath; +select '$.\u{20}'::jsonpath; +select '$."\u{20}"'::jsonpath; +select '$.\u{21}'::jsonpath; +select '$."\u{21}"'::jsonpath; +select '$.\u{12345}'::jsonpath; +select '$.\u{10FFFF}'::jsonpath; +select '$.\u{110000}'::jsonpath; +select '$.\x123'::jsonpath; select '$.g ? ($.a == 1)'::jsonpath; select '$.g ? (@ == 1)'::jsonpath; @@ -51,6 +64,12 @@ select '$.g ? ((@.x >= 123 || @.a == 4) && exists (@.x ? (@ == 14)))'::jsonpath; select '$.g ? (+@.x >= +-(+@.a + 2))'::jsonpath; select '$a'::jsonpath; +select '$_'::jsonpath; +select '$123'::jsonpath; +select '$$$'::jsonpath; +select '$_$$1_3a'::jsonpath; +select '$\u12345'::jsonpath; +select '$\u{20}'::jsonpath; select '$a.b'::jsonpath; select '$a[*]'::jsonpath; select '$.g ? (@.zip == $zip)'::jsonpath; @@ -153,6 +172,8 @@ select '$ ? (@.a < +10.1e+1)'::jsonpath; select '0'::jsonpath; select '00'::jsonpath; +select '$.00'::jsonpath; +select '$.0a'::jsonpath; select '0.0'::jsonpath; select '0.000'::jsonpath; select '0.000e1'::jsonpath; -- 2.7.4