diff --git a/src/backend/parser/Makefile b/src/backend/parser/Makefile index f14febdbda..3a2459cb72 100644 --- a/src/backend/parser/Makefile +++ b/src/backend/parser/Makefile @@ -40,7 +40,6 @@ gram.c: BISON_CHECK_CMD = $(PERL) $(srcdir)/check_keywords.pl $< $(top_srcdir)/s scan.c: FLEXFLAGS = -CF -p -p -scan.c: FLEX_NO_BACKUP=yes scan.c: FLEX_FIX_WARNING=yes diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index e1cae859e8..24f351229b 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -56,6 +56,8 @@ fprintf_to_ereport(const char *fmt, const char *msg) ereport(ERROR, (errmsg_internal("%s", msg))); } +static int state_before; + /* * GUC variables. This is a DIRECT violation of the warning given at the * head of gram.y, ie flex/bison code must not depend on any GUC variables; @@ -168,6 +170,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner); * delimited identifiers (double-quoted identifiers) * hexadecimal numeric string * standard quoted strings + * quote stop (detect continued strings) * extended quoted strings (support backslash escape sequences) * $foo$ quoted strings * quoted identifier with Unicode escapes @@ -185,6 +188,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner); %x xd %x xh %x xq +%x xqs %x xe %x xdolq %x xui @@ -231,19 +235,7 @@ special_whitespace ({space}+|{comment}{newline}) horiz_whitespace ({horiz_space}|{comment}) whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*) -/* - * To ensure that {quotecontinue} can be scanned without having to back up - * if the full pattern isn't matched, we include trailing whitespace in - * {quotestop}. This matches all cases where {quotecontinue} fails to match, - * except for {quote} followed by whitespace and just one "-" (not two, - * which would start a {comment}). To cover that we have {quotefail}. - * The actions for {quotestop} and {quotefail} must throw back characters - * beyond the quote proper. - */ quote ' -quotestop {quote}{whitespace}* -quotecontinue {quote}{whitespace_with_newline}{quote} -quotefail {quote}{whitespace}*"-" /* Bit string * It is tempting to scan the string for only those characters @@ -476,21 +468,10 @@ other . startlit(); addlitchar('b', yyscanner); } -{quotestop} | -{quotefail} { - yyless(1); - BEGIN(INITIAL); - yylval->str = litbufdup(yyscanner); - return BCONST; - } {xhinside} | {xbinside} { addlit(yytext, yyleng, yyscanner); } -{quotecontinue} | -{quotecontinue} { - /* ignore */ - } <> { yyerror("unterminated bit string literal"); } {xhstart} { @@ -505,13 +486,6 @@ other . startlit(); addlitchar('x', yyscanner); } -{quotestop} | -{quotefail} { - yyless(1); - BEGIN(INITIAL); - yylval->str = litbufdup(yyscanner); - return XCONST; - } <> { yyerror("unterminated hexadecimal string literal"); } {xnstart} { @@ -568,28 +542,65 @@ other . BEGIN(xus); startlit(); } -{quotestop} | -{quotefail} { - yyless(1); - BEGIN(INITIAL); + +{quote} { + state_before = YYSTATE; + BEGIN(xqs); + } +{whitespace_with_newline}{quote} { + /* resume scanning string that started on a previous line */ + BEGIN(state_before); + } +{whitespace}*{quote} { /* - * check that the data remains valid if it might have been - * made invalid by unescaping any chars. + * SQL requires at least one newline in the whitespace separating + * string literals that are to be concatenated, so throw an error + * if we see the start of a new string on the same line. */ - if (yyextra->saw_non_ascii) - pg_verifymbstr(yyextra->literalbuf, - yyextra->literallen, - false); - yylval->str = litbufdup(yyscanner); - return SCONST; + SET_YYLLOC(); + ADVANCE_YYLLOC(yyleng - 1); + yyerror("syntax error"); } -{quotestop} | -{quotefail} { - /* throw back all but the quote */ - yyless(1); - /* xusend state looks for possible UESCAPE */ - BEGIN(xusend); +<> | +{whitespace}*[^'] { + /* throw back everything and handle the string we just scanned */ + yyless(0); + + switch (state_before) + { + case xb: + BEGIN(INITIAL); + yylval->str = litbufdup(yyscanner); + return BCONST; + case xh: + BEGIN(INITIAL); + yylval->str = litbufdup(yyscanner); + return XCONST; + case xe: + /* fallthrough */ + case xq: + BEGIN(INITIAL); + + /* + * check that the data remains valid if it might have been + * made invalid by unescaping any chars. + */ + if (yyextra->saw_non_ascii) + pg_verifymbstr(yyextra->literalbuf, + yyextra->literallen, + false); + yylval->str = litbufdup(yyscanner); + return SCONST; + case xus: + /* xusend state looks for possible UESCAPE */ + BEGIN(xusend); + break; + default: + yyerror("unhandled previous state in quote continuation"); + } + } + {whitespace} { /* stay in xusend state over whitespace */ } @@ -693,9 +704,6 @@ other . if (c == '\0' || IS_HIGHBIT_SET(c)) yyextra->saw_non_ascii = true; } -{quotecontinue} { - /* ignore */ - } . { /* This is only needed for \ just before EOF */ addlitchar(yytext[0], yyscanner);