From 107e3c8a0b65b0196ea4370a724c8b2a1b0fdf79 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sun, 30 Sep 2018 12:51:41 +0700 Subject: [PATCH v1 1/4] First pass at syncing ECPG scanner with the core scanner. Adjust whitespace and formatting, clean up some comments, and move the block of whitespace rules. --- src/backend/parser/scan.l | 2 +- src/fe_utils/psqlscan.l | 2 +- src/interfaces/ecpg/preproc/pgc.l | 773 ++++++++++++++++-------------- 3 files changed, 408 insertions(+), 369 deletions(-) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 950b8b8591..a2454732a1 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -192,7 +192,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner); * XXX perhaps \f (formfeed) should be treated as a newline as well? * * XXX if you change the set of whitespace characters, fix scanner_isspace() - * to agree, and see also the plpgsql lexer. + * to agree. */ space [ \t\n\r\f] diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index fdf49875a7..25253b54ea 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -151,7 +151,7 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner); * XXX perhaps \f (formfeed) should be treated as a newline as well? * * XXX if you change the set of whitespace characters, fix scanner_isspace() - * to agree, and see also the plpgsql lexer. + * to agree. */ space [ \t\n\r\f] diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index 0792118cfe..b96f17ca20 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -108,16 +108,19 @@ static struct _if_value * We use exclusive states for quoted strings, extended comments, * and to eliminate parsing troubles for numeric strings. * Exclusive states: - * bit string literal - * extended C-style comments in C - * extended C-style comments in SQL - * delimited identifiers (double-quoted identifiers) - thomas 1997-10-27 - * hexadecimal numeric string - thomas 1997-11-16 - * standard quoted strings - thomas 1997-07-30 - * standard quoted strings in C - michael - * extended quoted strings (support backslash escape sequences) - * national character quoted strings + * bit string literal + * extended C-style comments in C + * extended C-style comments in SQL + * delimited identifiers (double-quoted identifiers) + * + * hexadecimal numeric string + * standard quoted strings + * extended quoted strings (support backslash escape sequences) + * national character quoted strings + * standard quoted strings in C * $foo$ quoted strings + * + * * quoted identifier with Unicode escapes * quoted string with Unicode escapes */ @@ -138,6 +141,48 @@ static struct _if_value %x xui %x xus +/* + * In order to make the world safe for Windows and Mac clients as well as + * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n + * sequence will be seen as two successive newlines, but that doesn't cause + * any problems. SQL-style comments, which start with -- and extend to the + * next newline, are treated as equivalent to a single whitespace character. + * + * NOTE a fine point: if there is no newline following --, we will absorb + * everything to the end of the input as a comment. This is correct. Older + * versions of Postgres failed to recognize -- as a comment if the input + * did not end with a newline. + * + * XXX perhaps \f (formfeed) should be treated as a newline as well? + * + * XXX if you change the set of whitespace characters, fix ecpg_isspace() + * to agree. + */ + +space [ \t\n\r\f] +horiz_space [ \t\f] +newline [\n\r] +non_newline [^\n\r] + +comment ("--"{non_newline}*) + +whitespace ({space}+|{comment}) + +/* + * SQL requires at least one newline in the whitespace separating + * string literals that are to be concatenated. Silly, but who are we + * to argue? Note that {whitespace_with_newline} should not have * after + * it, whereas {whitespace} should generally have a * after it... + */ + +horiz_whitespace ({horiz_space}|{comment}) +whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*) + +quote ' +quotestop {quote}{whitespace}* +quotecontinue {quote}{whitespace_with_newline}{quote} +quotefail {quote}{whitespace}*"-" + /* Bit string */ xbstart [bB]{quote} @@ -216,17 +261,17 @@ xdcinside ({xdcqq}|{xdcqdq}|{xdcother}) * The "extended comment" syntax closely resembles allowable operator syntax. * The tricky part here is to get lex to recognize a string starting with * slash-star as a comment, when interpreting it as an operator would produce - * a longer match --- remember lex will prefer a longer match! Also, if we + * a longer match --- remember lex will prefer a longer match! Also, if we * have something like plus-slash-star, lex will think this is a 3-character * operator whereas we want to see it as a + operator and a comment start. * The solution is two-fold: * 1. append {op_chars}* to xcstart so that it matches as much text as - * {operator} would. Then the tie-breaker (first matching rule of same - * length) ensures xcstart wins. We put back the extra stuff with yyless() - * in case it contains a star-slash that should terminate the comment. + * {operator} would. Then the tie-breaker (first matching rule of same + * length) ensures xcstart wins. We put back the extra stuff with yyless() + * in case it contains a star-slash that should terminate the comment. * 2. In the operator rule, check for slash-star within the operator, and - * if found throw it back with yyless(). This handles the plus-slash-star - * problem. + * if found throw it back with yyless(). This handles the plus-slash-star + * problem. * Dash-dash comments have similar interactions with the operator rule. */ xcstart \/\*{op_chars}* @@ -262,7 +307,7 @@ not_equals "!=" /* * "self" is the set of chars that should be returned as single-character - * tokens. "op_chars" is the set of chars that can make up "Op" tokens, + * tokens. "op_chars" is the set of chars that can make up "Op" tokens, * which can be one or more characters long (but if a single-char token * appears in the "self" set, it is not to be returned as an Op). Note * that the sets overlap, but each has some chars that are not in the other. @@ -290,50 +335,6 @@ realfail2 ({integer}|{decimal})[Ee][-+] param \${integer} -/* - * In order to make the world safe for Windows and Mac clients as well as - * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n - * sequence will be seen as two successive newlines, but that doesn't cause - * any problems. SQL-style comments, which start with -- and extend to the - * next newline, are treated as equivalent to a single whitespace character. - * - * NOTE a fine point: if there is no newline following --, we will absorb - * everything to the end of the input as a comment. This is correct. Older - * versions of Postgres failed to recognize -- as a comment if the input - * did not end with a newline. - * - * XXX perhaps \f (formfeed) should be treated as a newline as well? - * - * XXX if you change the set of whitespace characters, fix ecpg_isspace() - * to agree. - */ - -ccomment "//".*\n - -space [ \t\n\r\f] -horiz_space [ \t\f] -newline [\n\r] -non_newline [^\n\r] - -comment ("--"{non_newline}*) - -whitespace ({space}+|{comment}) - -/* - * SQL requires at least one newline in the whitespace separating - * string literals that are to be concatenated. Silly, but who are we - * to argue? Note that {whitespace_with_newline} should not have * after - * it, whereas {whitespace} should generally have a * after it... - */ - -horiz_whitespace ({horiz_space}|{comment}) -whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*) - -quote ' -quotestop {quote}{whitespace}* -quotecontinue {quote}{whitespace_with_newline}{quote} -quotefail {quote}{whitespace}*"-" - /* special characters for other dbms */ /* we have to react differently in compat mode */ informix_special [\$] @@ -349,6 +350,8 @@ include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT] import [iI][mM][pP][oO][rR][tT] undef [uU][nN][dD][eE][fF] +ccomment "//".*\n + if [iI][fF] ifdef [iI][fF][dD][eE][fF] ifndef [iI][fF][nN][dD][eE][fF] @@ -375,14 +378,14 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ /* * Dollar quoted strings are totally opaque, and no escaping is done on them. * Other quoted strings must allow some special characters such as single-quote - * and newline. + * and newline. * Embedded single-quotes are implemented both in the SQL standard - * style of two adjacent single quotes "''" and in the Postgres/Java style - * of escaped-quote "\'". + * style of two adjacent single quotes "''" and in the Postgres/Java style + * of escaped-quote "\'". * Other embedded escaped characters are matched explicitly and the leading - * backslash is dropped from the string. - thomas 1997-09-24 + * backslash is dropped from the string. * Note that xcstart must appear before operator, as explained above! - * Also whitespace (comment) must appear before operator. + * Also whitespace (comment) must appear before operator. */ %% @@ -392,7 +395,9 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ token_start = NULL; %} -{whitespace} { /* ignore */ } +{whitespace} { + /* ignore */ + } {xcstart} { token_start = yytext; @@ -437,9 +442,16 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ BEGIN(state_before); token_start = NULL; } -{xcinside} { ECHO; } -{op_chars} { ECHO; } -\*+ { ECHO; } + +{xcinside} { + ECHO; + } +{op_chars} { + ECHO; + } +\*+ { + ECHO; + } <> { mmfatal(PARSE_ERROR, "unterminated /* comment"); } @@ -449,8 +461,8 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ startlit(); addlitchar('b'); } -{quotestop} | -{quotefail} { +{quotestop} | +{quotefail} { yyless(1); BEGIN(SQL); if (literalbuf[strspn(literalbuf, "01") + 1] != '\0') @@ -460,9 +472,13 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ } {xhinside} | -{xbinside} { addlit(yytext, yyleng); } +{xbinside} { + addlit(yytext, yyleng); + } {quotecontinue} | -{quotecontinue} { /* ignore */ } +{quotecontinue} { + /* ignore */ + } <> { mmfatal(PARSE_ERROR, "unterminated bit string literal"); } {xhstart} { @@ -472,176 +488,194 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ addlitchar('x'); } {quotestop} | -{quotefail} { - yyless(1); - BEGIN(SQL); - base_yylval.str = mm_strdup(literalbuf); - return XCONST; - } +{quotefail} { + yyless(1); + BEGIN(SQL); + base_yylval.str = mm_strdup(literalbuf); + return XCONST; + } <> { mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); } {xnstart} { - /* National character. - * Transfer it as-is to the backend. - */ - token_start = yytext; - state_before = YYSTATE; - BEGIN(xn); - startlit(); - } + /* National character. + * Transfer it as-is to the backend. + */ + token_start = yytext; + state_before = YYSTATE; + BEGIN(xn); + startlit(); + } {xqstart} { - token_start = yytext; - state_before = YYSTATE; - BEGIN(xqc); - startlit(); - } + token_start = yytext; + state_before = YYSTATE; + BEGIN(xqc); + startlit(); + } {xqstart} { - token_start = yytext; - state_before = YYSTATE; - BEGIN(xq); - startlit(); - } + token_start = yytext; + state_before = YYSTATE; + BEGIN(xq); + startlit(); + } {xestart} { - token_start = yytext; - state_before = YYSTATE; - BEGIN(xe); - startlit(); - } + token_start = yytext; + state_before = YYSTATE; + BEGIN(xe); + startlit(); + } {xusstart} { - token_start = yytext; - state_before = YYSTATE; - BEGIN(xus); - startlit(); - addlit(yytext, yyleng); - } + token_start = yytext; + state_before = YYSTATE; + BEGIN(xus); + startlit(); + addlit(yytext, yyleng); + } {quotestop} | {quotefail} { - yyless(1); - BEGIN(state_before); - base_yylval.str = mm_strdup(literalbuf); - return SCONST; - } + yyless(1); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return SCONST; + } {quotestop} | {quotefail} { - yyless(1); - BEGIN(state_before); - base_yylval.str = mm_strdup(literalbuf); - return ECONST; - } + yyless(1); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return ECONST; + } {quotestop} | {quotefail} { - yyless(1); - BEGIN(state_before); - base_yylval.str = mm_strdup(literalbuf); - return NCONST; - } + yyless(1); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return NCONST; + } {xusstop} { - addlit(yytext, yyleng); - BEGIN(state_before); - base_yylval.str = mm_strdup(literalbuf); - return UCONST; - } + addlit(yytext, yyleng); + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return UCONST; + } {xqdouble} { addlitchar('\''); } -{xqcquote} { - addlitchar('\\'); - addlitchar('\''); - } +{xqcquote} { + addlitchar('\\'); + addlitchar('\''); + } {xqinside} { addlit(yytext, yyleng); } -{xeinside} { addlit(yytext, yyleng); } -{xeunicode} { addlit(yytext, yyleng); } -{xeescape} { addlit(yytext, yyleng); } -{xeoctesc} { addlit(yytext, yyleng); } -{xehexesc} { addlit(yytext, yyleng); } +{xeinside} { + addlit(yytext, yyleng); + } +{xeunicode} { + addlit(yytext, yyleng); + } +{xeescape} { + addlit(yytext, yyleng); + } +{xeoctesc} { + addlit(yytext, yyleng); + } +{xehexesc} { + addlit(yytext, yyleng); + } {quotecontinue} { /* ignore */ } -. { - /* This is only needed for \ just before EOF */ - addlitchar(yytext[0]); - } +. { + /* This is only needed for \ just before EOF */ + addlitchar(yytext[0]); + } <> { mmfatal(PARSE_ERROR, "unterminated quoted string"); } {dolqfailed} { - /* throw back all but the initial "$" */ - yyless(1); - /* and treat it as {other} */ - return yytext[0]; - } + /* throw back all but the initial "$" */ + yyless(1); + /* and treat it as {other} */ + return yytext[0]; + } {dolqdelim} { - token_start = yytext; - if (dolqstart) - free(dolqstart); - dolqstart = mm_strdup(yytext); - BEGIN(xdolq); - startlit(); - addlit(yytext, yyleng); - } -{dolqdelim} { - if (strcmp(yytext, dolqstart) == 0) - { + token_start = yytext; + if (dolqstart) + free(dolqstart); + dolqstart = mm_strdup(yytext); + BEGIN(xdolq); + startlit(); addlit(yytext, yyleng); - free(dolqstart); - dolqstart = NULL; - BEGIN(SQL); - base_yylval.str = mm_strdup(literalbuf); - return DOLCONST; - } - else - { - /* - * When we fail to match $...$ to dolqstart, transfer - * the $... part to the output, but put back the final - * $ for rescanning. Consider $delim$...$junk$delim$ - */ - addlit(yytext, yyleng-1); - yyless(yyleng-1); } - } -{dolqinside} { addlit(yytext, yyleng); } -{dolqfailed} { addlit(yytext, yyleng); } -{other} { - /* single quote or dollar sign */ - addlitchar(yytext[0]); - } -<> { base_yyerror("unterminated dollar-quoted string"); } -{xdstart} { - state_before = YYSTATE; - BEGIN(xd); - startlit(); - } -{xuistart} { - state_before = YYSTATE; - BEGIN(xui); - startlit(); +{dolqdelim} { + if (strcmp(yytext, dolqstart) == 0) + { addlit(yytext, yyleng); - } -{xdstop} { - BEGIN(state_before); - if (literallen == 0) - mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); - /* The backend will truncate the identifier here. We do not as it does not change the result. */ - base_yylval.str = mm_strdup(literalbuf); - return CSTRING; - } -{xdstop} { - BEGIN(state_before); + free(dolqstart); + dolqstart = NULL; + BEGIN(SQL); base_yylval.str = mm_strdup(literalbuf); - return CSTRING; + return DOLCONST; } -{xuistop} { - BEGIN(state_before); - if (literallen == 2) /* "U&" */ - mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); - /* The backend will truncate the identifier here. We do not as it does not change the result. */ - addlit(yytext, yyleng); - base_yylval.str = mm_strdup(literalbuf); - return UIDENT; + else + { + /* + * When we fail to match $...$ to dolqstart, transfer + * the $... part to the output, but put back the final + * $ for rescanning. Consider $delim$...$junk$delim$ + */ + addlit(yytext, yyleng-1); + yyless(yyleng-1); } -{xddouble} { addlitchar('"'); } -{xdinside} { addlit(yytext, yyleng); } -<> { mmfatal(PARSE_ERROR, "unterminated quoted identifier"); } + } +{dolqinside} { + addlit(yytext, yyleng); + } +{dolqfailed} { + addlit(yytext, yyleng); + } +. { + /* single quote or dollar sign */ + addlitchar(yytext[0]); + } +<> { base_yyerror("unterminated dollar-quoted string"); } +{xdstart} { + state_before = YYSTATE; + BEGIN(xd); + startlit(); + } +{xuistart} { + state_before = YYSTATE; + BEGIN(xui); + startlit(); + addlit(yytext, yyleng); + } +{xdstop} { + BEGIN(state_before); + if (literallen == 0) + mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); + /* The backend will truncate the identifier here. We do not as it does not change the result. */ + base_yylval.str = mm_strdup(literalbuf); + return CSTRING; + } +{xdstop} { + BEGIN(state_before); + base_yylval.str = mm_strdup(literalbuf); + return CSTRING; + } +{xuistop} { + BEGIN(state_before); + if (literallen == 2) /* "U&" */ + mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); + /* The backend will truncate the identifier here. We do not as it does not change the result. */ + addlit(yytext, yyleng); + base_yylval.str = mm_strdup(literalbuf); + return UIDENT; + } +{xddouble} { + addlitchar('"'); + } +{xdinside} { + addlit(yytext, yyleng); + } +<> { mmfatal(PARSE_ERROR, "unterminated quoted identifier"); } {xdstart} { - state_before = YYSTATE; - BEGIN(xdc); - startlit(); - } + state_before = YYSTATE; + BEGIN(xdc); + startlit(); + } {xdcinside} { addlit(yytext, yyleng); } {typecast} { return TYPECAST; } {dot_dot} { return DOT_DOT; } @@ -660,184 +694,189 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ else return yytext[0]; } -{self} { /* - * We may find a ';' inside a structure - * definition in a TYPE or VAR statement. - * This is not an EOL marker. - */ - if (yytext[0] == ';' && struct_level == 0) - BEGIN(C); - return yytext[0]; - } -{operator} { - /* - * Check for embedded slash-star or dash-dash; those - * are comment starts, so operator must stop there. - * Note that slash-star or dash-dash at the first - * character will match a prior rule, not this one. - */ - int nchars = yyleng; - char *slashstar = strstr(yytext, "/*"); - char *dashdash = strstr(yytext, "--"); +{self} { + /* + * We may find a ';' inside a structure + * definition in a TYPE or VAR statement. + * This is not an EOL marker. + */ + if (yytext[0] == ';' && struct_level == 0) + BEGIN(C); + return yytext[0]; + } +{operator} { + /* + * Check for embedded slash-star or dash-dash; those + * are comment starts, so operator must stop there. + * Note that slash-star or dash-dash at the first + * character will match a prior rule, not this one. + */ + int nchars = yyleng; + char *slashstar = strstr(yytext, "/*"); + char *dashdash = strstr(yytext, "--"); - if (slashstar && dashdash) - { - /* if both appear, take the first one */ - if (slashstar > dashdash) - slashstar = dashdash; - } - else if (!slashstar) + if (slashstar && dashdash) + { + /* if both appear, take the first one */ + if (slashstar > dashdash) slashstar = dashdash; - if (slashstar) - nchars = slashstar - yytext; + } + else if (!slashstar) + slashstar = dashdash; + if (slashstar) + nchars = slashstar - yytext; - /* - * For SQL compatibility, '+' and '-' cannot be the - * last char of a multi-char operator unless the operator - * contains chars that are not in SQL operators. - * The idea is to lex '=-' as two operators, but not - * to forbid operator names like '?-' that could not be - * sequences of SQL operators. - */ - if (nchars > 1 && - (yytext[nchars - 1] == '+' || - yytext[nchars - 1] == '-')) - { - int ic; + /* + * For SQL compatibility, '+' and '-' cannot be the + * last char of a multi-char operator unless the operator + * contains chars that are not in SQL operators. + * The idea is to lex '=-' as two operators, but not + * to forbid operator names like '?-' that could not be + * sequences of SQL operators. + */ + if (nchars > 1 && + (yytext[nchars - 1] == '+' || + yytext[nchars - 1] == '-')) + { + int ic; - for (ic = nchars - 2; ic >= 0; ic--) - { - char c = yytext[ic]; - if (c == '~' || c == '!' || c == '@' || - c == '#' || c == '^' || c == '&' || - c == '|' || c == '`' || c == '?' || - c == '%') - break; - } - if (ic < 0) - { - /* - * didn't find a qualifying character, so remove - * all trailing [+-] - */ - do { - nchars--; - } while (nchars > 1 && - (yytext[nchars - 1] == '+' || - yytext[nchars - 1] == '-')); - } + for (ic = nchars - 2; ic >= 0; ic--) + { + char c = yytext[ic]; + if (c == '~' || c == '!' || c == '@' || + c == '#' || c == '^' || c == '&' || + c == '|' || c == '`' || c == '?' || + c == '%') + break; } - - if (nchars < yyleng) + if (ic < 0) { - /* Strip the unwanted chars from the token */ - yyless(nchars); - /* - * If what we have left is only one char, and it's - * one of the characters matching "self", then - * return it as a character token the same way - * that the "self" rule would have. - */ - if (nchars == 1 && - strchr(",()[].;:+-*/%^<>=", yytext[0])) - return yytext[0]; /* - * Likewise, if what we have left is two chars, and - * those match the tokens ">=", "<=", "=>", "<>" or - * "!=", then we must return the appropriate token - * rather than the generic Op. + * didn't find a qualifying character, so remove + * all trailing [+-] */ - if (nchars == 2) - { - if (yytext[0] == '=' && yytext[1] == '>') - return EQUALS_GREATER; - if (yytext[0] == '>' && yytext[1] == '=') - return GREATER_EQUALS; - if (yytext[0] == '<' && yytext[1] == '=') - return LESS_EQUALS; - if (yytext[0] == '<' && yytext[1] == '>') - return NOT_EQUALS; - if (yytext[0] == '!' && yytext[1] == '=') - return NOT_EQUALS; - } + do { + nchars--; + } while (nchars > 1 && + (yytext[nchars - 1] == '+' || + yytext[nchars - 1] == '-')); } - - base_yylval.str = mm_strdup(yytext); - return Op; } -{param} { - base_yylval.ival = atol(yytext+1); - return PARAM; - } -{integer} { - int val; - char* endptr; - errno = 0; - val = strtoint(yytext, &endptr, 10); - if (*endptr != '\0' || errno == ERANGE) + if (nchars < yyleng) + { + /* Strip the unwanted chars from the token */ + yyless(nchars); + /* + * If what we have left is only one char, and it's + * one of the characters matching "self", then + * return it as a character token the same way + * that the "self" rule would have. + */ + if (nchars == 1 && + strchr(",()[].;:+-*/%^<>=", yytext[0])) + return yytext[0]; + /* + * Likewise, if what we have left is two chars, and + * those match the tokens ">=", "<=", "=>", "<>" or + * "!=", then we must return the appropriate token + * rather than the generic Op. + */ + if (nchars == 2) { - errno = 0; - base_yylval.str = mm_strdup(yytext); - return FCONST; + if (yytext[0] == '=' && yytext[1] == '>') + return EQUALS_GREATER; + if (yytext[0] == '>' && yytext[1] == '=') + return GREATER_EQUALS; + if (yytext[0] == '<' && yytext[1] == '=') + return LESS_EQUALS; + if (yytext[0] == '<' && yytext[1] == '>') + return NOT_EQUALS; + if (yytext[0] == '!' && yytext[1] == '=') + return NOT_EQUALS; } - base_yylval.ival = val; - return ICONST; } -{ip} { - base_yylval.str = mm_strdup(yytext); - return IP; + + base_yylval.str = mm_strdup(yytext); + return Op; } -{decimal} { +{param} { + base_yylval.ival = atol(yytext+1); + return PARAM; + } +{integer} { + int val; + char* endptr; + + errno = 0; + val = strtoint(yytext, &endptr, 10); + if (*endptr != '\0' || errno == ERANGE) + { + errno = 0; base_yylval.str = mm_strdup(yytext); return FCONST; - } -{real} { + } + base_yylval.ival = val; + return ICONST; + } +{ip} { + base_yylval.str = mm_strdup(yytext); + return IP; + } +{decimal} { + base_yylval.str = mm_strdup(yytext); + return FCONST; + } +{real} { base_yylval.str = mm_strdup(yytext); return FCONST; - } + } {realfail1} { - yyless(yyleng-1); - base_yylval.str = mm_strdup(yytext); - return FCONST; - } + yyless(yyleng-1); + base_yylval.str = mm_strdup(yytext); + return FCONST; + } {realfail2} { - yyless(yyleng-2); - base_yylval.str = mm_strdup(yytext); - return FCONST; - } + yyless(yyleng-2); + base_yylval.str = mm_strdup(yytext); + return FCONST; + } :{identifier}((("->"|\.){identifier})|(\[{array}\]))* { - base_yylval.str = mm_strdup(yytext+1); - return CVARIABLE; - } + base_yylval.str = mm_strdup(yytext+1); + return CVARIABLE; + } {identifier} { - const ScanKeyword *keyword; + const ScanKeyword *keyword; - if (!isdefine()) - { - /* Is it an SQL/ECPG keyword? */ - keyword = ScanECPGKeywordLookup(yytext); - if (keyword != NULL) - return keyword->value; + if (!isdefine()) + { + /* Is it an SQL/ECPG keyword? */ + keyword = ScanECPGKeywordLookup(yytext); + if (keyword != NULL) + return keyword->value; - /* Is it a C keyword? */ - keyword = ScanCKeywordLookup(yytext); - if (keyword != NULL) - return keyword->value; + /* Is it a C keyword? */ + keyword = ScanCKeywordLookup(yytext); + if (keyword != NULL) + return keyword->value; - /* - * None of the above. Return it as an identifier. - * - * The backend will attempt to truncate and case-fold - * the identifier, but I see no good reason for ecpg - * to do so; that's just another way that ecpg could get - * out of step with the backend. - */ - base_yylval.str = mm_strdup(yytext); - return IDENT; - } + /* + * None of the above. Return it as an identifier. + * + * The backend will attempt to truncate and case-fold + * the identifier, but I see no good reason for ecpg + * to do so; that's just another way that ecpg could get + * out of step with the backend. + */ + base_yylval.str = mm_strdup(yytext); + return IDENT; } -{other} { return yytext[0]; } + } + +{other} { + return yytext[0]; + } + {exec_sql} { BEGIN(SQL); return SQL_START; } {informix_special} { /* are we simulating Informix? */ -- 2.17.1