From 107e3c8a0b65b0196ea4370a724c8b2a1b0fdf79 Mon Sep 17 00:00:00 2001
From: John Naylor <jcnaylor@gmail.com>
Date: Sun, 30 Sep 2018 12:51:41 +0700
Subject: [PATCH v1 1/4] First pass at syncing ECPG scanner with the core
 scanner.

Adjust whitespace and formatting, clean up some comments, and move
the block of whitespace rules.
---
 src/backend/parser/scan.l         |   2 +-
 src/fe_utils/psqlscan.l           |   2 +-
 src/interfaces/ecpg/preproc/pgc.l | 773 ++++++++++++++++--------------
 3 files changed, 408 insertions(+), 369 deletions(-)
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 950b8b8591..a2454732a1 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -192,7 +192,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
  * XXX perhaps \f (formfeed) should be treated as a newline as well?
  *
  * XXX if you change the set of whitespace characters, fix scanner_isspace()
- * to agree, and see also the plpgsql lexer.
+ * to agree.
  */
 
 space			[ \t\n\r\f]
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index fdf49875a7..25253b54ea 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -151,7 +151,7 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
  * XXX perhaps \f (formfeed) should be treated as a newline as well?
  *
  * XXX if you change the set of whitespace characters, fix scanner_isspace()
- * to agree, and see also the plpgsql lexer.
+ * to agree.
  */
 
 space			[ \t\n\r\f]
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index 0792118cfe..b96f17ca20 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -108,16 +108,19 @@ static struct _if_value
  * We use exclusive states for quoted strings, extended comments,
  * and to eliminate parsing troubles for numeric strings.
  * Exclusive states:
- *	<xb> bit string literal
- *	<xcc> extended C-style comments in C
- *	<xcsql> extended C-style comments in SQL
- *	<xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
- *	<xh> hexadecimal numeric string - thomas 1997-11-16
- *	<xq> standard quoted strings - thomas 1997-07-30
- *	<xqc> standard quoted strings in C - michael
- *	<xe> extended quoted strings (support backslash escape sequences)
- *	<xn> national character quoted strings
+ *  <xb> bit string literal
+ *  <xcc> extended C-style comments in C
+ *  <xcsql> extended C-style comments in SQL
+ *  <xd> delimited identifiers (double-quoted identifiers)
+ *  <xdc>
+ *  <xh> hexadecimal numeric string
+ *  <xq> standard quoted strings
+ *  <xe> extended quoted strings (support backslash escape sequences)
+ *  <xn> national character quoted strings
+ *  <xqc> standard quoted strings in C
  *  <xdolq> $foo$ quoted strings
+ *  <xcond>
+ *  <xskip>
  *  <xui> quoted identifier with Unicode escapes
  *  <xus> quoted string with Unicode escapes
  */
@@ -138,6 +141,48 @@ static struct _if_value
 %x xui
 %x xus
 
+/*
+ * In order to make the world safe for Windows and Mac clients as well as
+ * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
+ * sequence will be seen as two successive newlines, but that doesn't cause
+ * any problems.  SQL-style comments, which start with -- and extend to the
+ * next newline, are treated as equivalent to a single whitespace character.
+ *
+ * NOTE a fine point: if there is no newline following --, we will absorb
+ * everything to the end of the input as a comment.  This is correct.  Older
+ * versions of Postgres failed to recognize -- as a comment if the input
+ * did not end with a newline.
+ *
+ * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix ecpg_isspace()
+ * to agree.
+ */
+
+space			[ \t\n\r\f]
+horiz_space		[ \t\f]
+newline			[\n\r]
+non_newline		[^\n\r]
+
+comment			("--"{non_newline}*)
+
+whitespace		({space}+|{comment})
+
+/*
+ * SQL requires at least one newline in the whitespace separating
+ * string literals that are to be concatenated.  Silly, but who are we
+ * to argue?  Note that {whitespace_with_newline} should not have * after
+ * it, whereas {whitespace} should generally have a * after it...
+ */
+
+horiz_whitespace		({horiz_space}|{comment})
+whitespace_with_newline	({horiz_whitespace}*{newline}{whitespace}*)
+
+quote			'
+quotestop		{quote}{whitespace}*
+quotecontinue	{quote}{whitespace_with_newline}{quote}
+quotefail		{quote}{whitespace}*"-"
+
 /* Bit string
  */
 xbstart			[bB]{quote}
@@ -216,17 +261,17 @@ xdcinside		({xdcqq}|{xdcqdq}|{xdcother})
  * The "extended comment" syntax closely resembles allowable operator syntax.
  * The tricky part here is to get lex to recognize a string starting with
  * slash-star as a comment, when interpreting it as an operator would produce
- * a longer match --- remember lex will prefer a longer match!	Also, if we
+ * a longer match --- remember lex will prefer a longer match!  Also, if we
  * have something like plus-slash-star, lex will think this is a 3-character
  * operator whereas we want to see it as a + operator and a comment start.
  * The solution is two-fold:
  * 1. append {op_chars}* to xcstart so that it matches as much text as
- *	  {operator} would. Then the tie-breaker (first matching rule of same
- *	  length) ensures xcstart wins.  We put back the extra stuff with yyless()
- *	  in case it contains a star-slash that should terminate the comment.
+ *    {operator} would. Then the tie-breaker (first matching rule of same
+ *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
+ *    in case it contains a star-slash that should terminate the comment.
  * 2. In the operator rule, check for slash-star within the operator, and
- *	  if found throw it back with yyless().  This handles the plus-slash-star
- *	  problem.
+ *    if found throw it back with yyless().  This handles the plus-slash-star
+ *    problem.
  * Dash-dash comments have similar interactions with the operator rule.
  */
 xcstart			\/\*{op_chars}*
@@ -262,7 +307,7 @@ not_equals		"!="
 
 /*
  * "self" is the set of chars that should be returned as single-character
- * tokens.	"op_chars" is the set of chars that can make up "Op" tokens,
+ * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
  * which can be one or more characters long (but if a single-char token
  * appears in the "self" set, it is not to be returned as an Op).  Note
  * that the sets overlap, but each has some chars that are not in the other.
@@ -290,50 +335,6 @@ realfail2		({integer}|{decimal})[Ee][-+]
 
 param			\${integer}
 
-/*
- * In order to make the world safe for Windows and Mac clients as well as
- * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
- * sequence will be seen as two successive newlines, but that doesn't cause
- * any problems.  SQL-style comments, which start with -- and extend to the
- * next newline, are treated as equivalent to a single whitespace character.
- *
- * NOTE a fine point: if there is no newline following --, we will absorb
- * everything to the end of the input as a comment.  This is correct.  Older
- * versions of Postgres failed to recognize -- as a comment if the input
- * did not end with a newline.
- *
- * XXX perhaps \f (formfeed) should be treated as a newline as well?
- *
- * XXX if you change the set of whitespace characters, fix ecpg_isspace()
- * to agree.
- */
-
-ccomment		"//".*\n
-
-space			[ \t\n\r\f]
-horiz_space		[ \t\f]
-newline			[\n\r]
-non_newline		[^\n\r]
-
-comment			("--"{non_newline}*)
-
-whitespace		({space}+|{comment})
-
-/*
- * SQL requires at least one newline in the whitespace separating
- * string literals that are to be concatenated.  Silly, but who are we
- * to argue?  Note that {whitespace_with_newline} should not have * after
- * it, whereas {whitespace} should generally have a * after it...
- */
-
-horiz_whitespace	({horiz_space}|{comment})
-whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
-
-quote			'
-quotestop		{quote}{whitespace}*
-quotecontinue	{quote}{whitespace_with_newline}{quote}
-quotefail		{quote}{whitespace}*"-"
-
 /* special characters for other dbms */
 /* we have to react differently in compat mode */
 informix_special	[\$]
@@ -349,6 +350,8 @@ include_next	[iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
 import			[iI][mM][pP][oO][rR][tT]
 undef			[uU][nN][dD][eE][fF]
 
+ccomment		"//".*\n
+
 if				[iI][fF]
 ifdef			[iI][fF][dD][eE][fF]
 ifndef			[iI][fF][nN][dD][eE][fF]
@@ -375,14 +378,14 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 /*
  * Dollar quoted strings are totally opaque, and no escaping is done on them.
  * Other quoted strings must allow some special characters such as single-quote
- *	and newline.
+ *  and newline.
  * Embedded single-quotes are implemented both in the SQL standard
- *	style of two adjacent single quotes "''" and in the Postgres/Java style
- *	of escaped-quote "\'".
+ *  style of two adjacent single quotes "''" and in the Postgres/Java style
+ *  of escaped-quote "\'".
  * Other embedded escaped characters are matched explicitly and the leading
- *	backslash is dropped from the string. - thomas 1997-09-24
+ *  backslash is dropped from the string.
  * Note that xcstart must appear before operator, as explained above!
- *	Also whitespace (comment) must appear before operator.
+ *  Also whitespace (comment) must appear before operator.
  */
 
 %%
@@ -392,7 +395,9 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 		token_start = NULL;
 %}
 
-<SQL>{whitespace}	{ /* ignore */ }
+<SQL>{whitespace}	{
+					/* ignore */
+				}
 
 <C>{xcstart}		{
 					token_start = yytext;
@@ -437,9 +442,16 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					BEGIN(state_before);
 					token_start = NULL;
 				}
-<xcc,xcsql>{xcinside}	{ ECHO; }
-<xcc,xcsql>{op_chars}	{ ECHO; }
-<xcc,xcsql>\*+		{ ECHO; }
+
+<xcc,xcsql>{xcinside}	{
+					ECHO;
+				}
+<xcc,xcsql>{op_chars}	{
+					ECHO;
+				}
+<xcc,xcsql>\*+	{
+					ECHO;
+				}
 
 <xcc,xcsql><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated /* comment"); }
 
@@ -449,8 +461,8 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					startlit();
 					addlitchar('b');
 				}
-<xb>{quotestop} |
-<xb>{quotefail}	{
+<xb>{quotestop}	|
+<xb>{quotefail} {
 					yyless(1);
 					BEGIN(SQL);
 					if (literalbuf[strspn(literalbuf, "01") + 1] != '\0')
@@ -460,9 +472,13 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 				}
 
 <xh>{xhinside}	|
-<xb>{xbinside}	{ addlit(yytext, yyleng); }
+<xb>{xbinside}	{
+					addlit(yytext, yyleng);
+				}
 <xh>{quotecontinue}	|
-<xb>{quotecontinue}	{ /* ignore */ }
+<xb>{quotecontinue}	{
+					/* ignore */
+				}
 <xb><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated bit string literal"); }
 
 <SQL>{xhstart}	{
@@ -472,176 +488,194 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					addlitchar('x');
 				}
 <xh>{quotestop}	|
-<xh>{quotefail}	{
-				yyless(1);
-				BEGIN(SQL);
-				base_yylval.str = mm_strdup(literalbuf);
-				return XCONST;
-			}
+<xh>{quotefail} {
+					yyless(1);
+					BEGIN(SQL);
+					base_yylval.str = mm_strdup(literalbuf);
+					return XCONST;
+				}
 
 <xh><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); }
 <SQL>{xnstart} {
-				/* National character.
-				 * Transfer it as-is to the backend.
-				 */
-				token_start = yytext;
-				state_before = YYSTATE;
-				BEGIN(xn);
-				startlit();
-			}
+					/* National character.
+					 * Transfer it as-is to the backend.
+					 */
+					token_start = yytext;
+					state_before = YYSTATE;
+					BEGIN(xn);
+					startlit();
+				}
 <C>{xqstart}	{
-				token_start = yytext;
-				state_before = YYSTATE;
-				BEGIN(xqc);
-				startlit();
-			}
+					token_start = yytext;
+					state_before = YYSTATE;
+					BEGIN(xqc);
+					startlit();
+				}
 <SQL>{xqstart}	{
-				token_start = yytext;
-				state_before = YYSTATE;
-				BEGIN(xq);
-				startlit();
-			}
+					token_start = yytext;
+					state_before = YYSTATE;
+					BEGIN(xq);
+					startlit();
+				}
 <SQL>{xestart}	{
-				token_start = yytext;
-				state_before = YYSTATE;
-				BEGIN(xe);
-				startlit();
-			}
+					token_start = yytext;
+					state_before = YYSTATE;
+					BEGIN(xe);
+					startlit();
+				}
 <SQL>{xusstart}	{
-				token_start = yytext;
-				state_before = YYSTATE;
-				BEGIN(xus);
-				startlit();
-				addlit(yytext, yyleng);
-			}
+					token_start = yytext;
+					state_before = YYSTATE;
+					BEGIN(xus);
+					startlit();
+					addlit(yytext, yyleng);
+				}
 <xq,xqc>{quotestop} |
 <xq,xqc>{quotefail} {
-				yyless(1);
-				BEGIN(state_before);
-				base_yylval.str = mm_strdup(literalbuf);
-				return SCONST;
-			}
+					yyless(1);
+					BEGIN(state_before);
+					base_yylval.str = mm_strdup(literalbuf);
+					return SCONST;
+				}
 <xe>{quotestop} |
 <xe>{quotefail} {
-				yyless(1);
-				BEGIN(state_before);
-				base_yylval.str = mm_strdup(literalbuf);
-				return ECONST;
-			}
+					yyless(1);
+					BEGIN(state_before);
+					base_yylval.str = mm_strdup(literalbuf);
+					return ECONST;
+				}
 <xn>{quotestop} |
 <xn>{quotefail} {
-				yyless(1);
-				BEGIN(state_before);
-				base_yylval.str = mm_strdup(literalbuf);
-				return NCONST;
-			}
+					yyless(1);
+					BEGIN(state_before);
+					base_yylval.str = mm_strdup(literalbuf);
+					return NCONST;
+				}
 <xus>{xusstop} {
-				addlit(yytext, yyleng);
-				BEGIN(state_before);
-				base_yylval.str = mm_strdup(literalbuf);
-				return UCONST;
-			}
+					addlit(yytext, yyleng);
+					BEGIN(state_before);
+					base_yylval.str = mm_strdup(literalbuf);
+					return UCONST;
+				}
 <xq,xe,xn,xus>{xqdouble}	{ addlitchar('\''); }
-<xqc>{xqcquote}		{
-				addlitchar('\\');
-				addlitchar('\'');
-			}
+<xqc>{xqcquote}	{
+					addlitchar('\\');
+					addlitchar('\'');
+				}
 <xq,xqc,xn,xus>{xqinside}	{ addlit(yytext, yyleng); }
-<xe>{xeinside}		{ addlit(yytext, yyleng); }
-<xe>{xeunicode}		{ addlit(yytext, yyleng); }
-<xe>{xeescape}		{ addlit(yytext, yyleng); }
-<xe>{xeoctesc}		{ addlit(yytext, yyleng); }
-<xe>{xehexesc}		{ addlit(yytext, yyleng); }
+<xe>{xeinside}	{
+					addlit(yytext, yyleng);
+				}
+<xe>{xeunicode}	{
+					addlit(yytext, yyleng);
+				}
+<xe>{xeescape}	{
+					addlit(yytext, yyleng);
+				}
+<xe>{xeoctesc}	{
+					addlit(yytext, yyleng);
+				}
+<xe>{xehexesc}	{
+					addlit(yytext, yyleng);
+				}
 <xq,xqc,xe,xn,xus>{quotecontinue}	{ /* ignore */ }
-<xe>.		{
-			   /* This is only needed for \ just before EOF */
-			   addlitchar(yytext[0]);
-			}
+<xe>.			{
+					/* This is only needed for \ just before EOF */
+					addlitchar(yytext[0]);
+				}
 <xq,xqc,xe,xn,xus><<EOF>>	{ mmfatal(PARSE_ERROR, "unterminated quoted string"); }
 <SQL>{dolqfailed}	{
-				/* throw back all but the initial "$" */
-				yyless(1);
-				/* and treat it as {other} */
-				return yytext[0];
-			}
+					/* throw back all but the initial "$" */
+					yyless(1);
+					/* and treat it as {other} */
+					return yytext[0];
+				}
 <SQL>{dolqdelim} {
-				token_start = yytext;
-				if (dolqstart)
-					free(dolqstart);
-				dolqstart = mm_strdup(yytext);
-				BEGIN(xdolq);
-				startlit();
-				addlit(yytext, yyleng);
-			}
-<xdolq>{dolqdelim} {
-				if (strcmp(yytext, dolqstart) == 0)
-				{
+					token_start = yytext;
+					if (dolqstart)
+						free(dolqstart);
+					dolqstart = mm_strdup(yytext);
+					BEGIN(xdolq);
+					startlit();
 					addlit(yytext, yyleng);
-					free(dolqstart);
-					dolqstart = NULL;
-					BEGIN(SQL);
-					base_yylval.str = mm_strdup(literalbuf);
-					return DOLCONST;
-				}
-				else
-				{
-					/*
-					 * When we fail to match $...$ to dolqstart, transfer
-					 * the $... part to the output, but put back the final
-					 * $ for rescanning.  Consider $delim$...$junk$delim$
-					 */
-					addlit(yytext, yyleng-1);
-					yyless(yyleng-1);
 				}
-			}
-<xdolq>{dolqinside}	{ addlit(yytext, yyleng); }
-<xdolq>{dolqfailed}	{ addlit(yytext, yyleng); }
-<xdolq>{other}		{
-				/* single quote or dollar sign */
-				addlitchar(yytext[0]);
-			}
-<xdolq><<EOF>>		{ base_yyerror("unterminated dollar-quoted string"); }
-<SQL>{xdstart}		{
-						state_before = YYSTATE;
-						BEGIN(xd);
-						startlit();
-					}
-<SQL>{xuistart}		{
-						state_before = YYSTATE;
-						BEGIN(xui);
-						startlit();
+<xdolq>{dolqdelim} {
+					if (strcmp(yytext, dolqstart) == 0)
+					{
 						addlit(yytext, yyleng);
-					}
-<xd>{xdstop}		{
-						BEGIN(state_before);
-						if (literallen == 0)
-							mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
-						/* The backend will truncate the identifier here. We do not as it does not change the result. */
-						base_yylval.str = mm_strdup(literalbuf);
-						return CSTRING;
-					}
-<xdc>{xdstop}		{
-						BEGIN(state_before);
+						free(dolqstart);
+						dolqstart = NULL;
+						BEGIN(SQL);
 						base_yylval.str = mm_strdup(literalbuf);
-						return CSTRING;
+						return DOLCONST;
 					}
-<xui>{xuistop}		{
-						BEGIN(state_before);
-						if (literallen == 2) /* "U&" */
-							mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
-						/* The backend will truncate the identifier here. We do not as it does not change the result. */
-						addlit(yytext, yyleng);
-						base_yylval.str = mm_strdup(literalbuf);
-						return UIDENT;
+					else
+					{
+						/*
+						 * When we fail to match $...$ to dolqstart, transfer
+						 * the $... part to the output, but put back the final
+						 * $ for rescanning.  Consider $delim$...$junk$delim$
+						 */
+						addlit(yytext, yyleng-1);
+						yyless(yyleng-1);
 					}
-<xd,xui>{xddouble}		{ addlitchar('"'); }
-<xd,xui>{xdinside}		{ addlit(yytext, yyleng); }
-<xd,xdc,xui><<EOF>>		{ mmfatal(PARSE_ERROR, "unterminated quoted identifier"); }
+				}
+<xdolq>{dolqinside} {
+					addlit(yytext, yyleng);
+				}
+<xdolq>{dolqfailed} {
+					addlit(yytext, yyleng);
+				}
+<xdolq>.		{
+					/* single quote or dollar sign */
+					addlitchar(yytext[0]);
+				}
+<xdolq><<EOF>>	{ base_yyerror("unterminated dollar-quoted string"); }
+<SQL>{xdstart}	{
+					state_before = YYSTATE;
+					BEGIN(xd);
+					startlit();
+				}
+<SQL>{xuistart}	{
+					state_before = YYSTATE;
+					BEGIN(xui);
+					startlit();
+					addlit(yytext, yyleng);
+				}
+<xd>{xdstop}	{
+					BEGIN(state_before);
+					if (literallen == 0)
+						mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
+					/* The backend will truncate the identifier here. We do not as it does not change the result. */
+					base_yylval.str = mm_strdup(literalbuf);
+					return CSTRING;
+				}
+<xdc>{xdstop}	{
+					BEGIN(state_before);
+					base_yylval.str = mm_strdup(literalbuf);
+					return CSTRING;
+				}
+<xui>{xuistop}	{
+					BEGIN(state_before);
+					if (literallen == 2) /* "U&" */
+						mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
+					/* The backend will truncate the identifier here. We do not as it does not change the result. */
+					addlit(yytext, yyleng);
+					base_yylval.str = mm_strdup(literalbuf);
+					return UIDENT;
+				}
+<xd,xui>{xddouble}	{
+					addlitchar('"');
+				}
+<xd,xui>{xdinside}	{
+					addlit(yytext, yyleng);
+				}
+<xd,xdc,xui><<EOF>>	{ mmfatal(PARSE_ERROR, "unterminated quoted identifier"); }
 <C,SQL>{xdstart}	{
-						state_before = YYSTATE;
-						BEGIN(xdc);
-						startlit();
-					}
+					state_before = YYSTATE;
+					BEGIN(xdc);
+					startlit();
+				}
 <xdc>{xdcinside}	{ addlit(yytext, yyleng); }
 <SQL>{typecast}		{ return TYPECAST; }
 <SQL>{dot_dot}		{ return DOT_DOT; }
@@ -660,184 +694,189 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 				else
 					return yytext[0];
 				}
-<SQL>{self}			{ /*
-					   * We may find a ';' inside a structure
-					   * definition in a TYPE or VAR statement.
-					   * This is not an EOL marker.
-					   */
-					  if (yytext[0] == ';' && struct_level == 0)
-						 BEGIN(C);
-					  return yytext[0];
-					}
-<SQL>{operator}		{
-						/*
-						 * Check for embedded slash-star or dash-dash; those
-						 * are comment starts, so operator must stop there.
-						 * Note that slash-star or dash-dash at the first
-						 * character will match a prior rule, not this one.
-						 */
-						int		nchars = yyleng;
-						char   *slashstar = strstr(yytext, "/*");
-						char   *dashdash = strstr(yytext, "--");
+<SQL>{self}		{
+					/*
+					 * We may find a ';' inside a structure
+					 * definition in a TYPE or VAR statement.
+					 * This is not an EOL marker.
+					 */
+					if (yytext[0] == ';' && struct_level == 0)
+						BEGIN(C);
+					return yytext[0];
+				}
+<SQL>{operator}	{
+					/*
+					 * Check for embedded slash-star or dash-dash; those
+					 * are comment starts, so operator must stop there.
+					 * Note that slash-star or dash-dash at the first
+					 * character will match a prior rule, not this one.
+					 */
+					int			nchars = yyleng;
+					char	   *slashstar = strstr(yytext, "/*");
+					char	   *dashdash = strstr(yytext, "--");
 
-						if (slashstar && dashdash)
-						{
-							/* if both appear, take the first one */
-							if (slashstar > dashdash)
-								slashstar = dashdash;
-						}
-						else if (!slashstar)
+					if (slashstar && dashdash)
+					{
+						/* if both appear, take the first one */
+						if (slashstar > dashdash)
 							slashstar = dashdash;
-						if (slashstar)
-							nchars = slashstar - yytext;
+					}
+					else if (!slashstar)
+						slashstar = dashdash;
+					if (slashstar)
+						nchars = slashstar - yytext;
 
-						/*
-						 * For SQL compatibility, '+' and '-' cannot be the
-						 * last char of a multi-char operator unless the operator
-						 * contains chars that are not in SQL operators.
-						 * The idea is to lex '=-' as two operators, but not
-						 * to forbid operator names like '?-' that could not be
-						 * sequences of SQL operators.
-						 */
-						if (nchars > 1 &&
-							(yytext[nchars - 1] == '+' ||
-							 yytext[nchars - 1] == '-'))
-						{
-							int		ic;
+					/*
+					 * For SQL compatibility, '+' and '-' cannot be the
+					 * last char of a multi-char operator unless the operator
+					 * contains chars that are not in SQL operators.
+					 * The idea is to lex '=-' as two operators, but not
+					 * to forbid operator names like '?-' that could not be
+					 * sequences of SQL operators.
+					 */
+					if (nchars > 1 &&
+						(yytext[nchars - 1] == '+' ||
+						 yytext[nchars - 1] == '-'))
+					{
+						int			ic;
 
-							for (ic = nchars - 2; ic >= 0; ic--)
-							{
-								char c = yytext[ic];
-								if (c == '~' || c == '!' || c == '@' ||
-									c == '#' || c == '^' || c == '&' ||
-									c == '|' || c == '`' || c == '?' ||
-									c == '%')
-									break;
-							}
-							if (ic < 0)
-							{
-								/*
-								 * didn't find a qualifying character, so remove
-								 * all trailing [+-]
-								 */
-								do {
-									nchars--;
-								} while (nchars > 1 &&
-									 (yytext[nchars - 1] == '+' ||
-									  yytext[nchars - 1] == '-'));
-							}
+						for (ic = nchars - 2; ic >= 0; ic--)
+						{
+							char c = yytext[ic];
+							if (c == '~' || c == '!' || c == '@' ||
+								c == '#' || c == '^' || c == '&' ||
+								c == '|' || c == '`' || c == '?' ||
+								c == '%')
+								break;
 						}
-
-						if (nchars < yyleng)
+						if (ic < 0)
 						{
-							/* Strip the unwanted chars from the token */
-							yyless(nchars);
-							/*
-							 * If what we have left is only one char, and it's
-							 * one of the characters matching "self", then
-							 * return it as a character token the same way
-							 * that the "self" rule would have.
-							 */
-							if (nchars == 1 &&
-								strchr(",()[].;:+-*/%^<>=", yytext[0]))
-								return yytext[0];
 							/*
-							 * Likewise, if what we have left is two chars, and
-							 * those match the tokens ">=", "<=", "=>", "<>" or
-							 * "!=", then we must return the appropriate token
-							 * rather than the generic Op.
+							 * didn't find a qualifying character, so remove
+							 * all trailing [+-]
 							 */
-							if (nchars == 2)
-							{
-								if (yytext[0] == '=' && yytext[1] == '>')
-									return EQUALS_GREATER;
-								if (yytext[0] == '>' && yytext[1] == '=')
-									return GREATER_EQUALS;
-								if (yytext[0] == '<' && yytext[1] == '=')
-									return LESS_EQUALS;
-								if (yytext[0] == '<' && yytext[1] == '>')
-									return NOT_EQUALS;
-								if (yytext[0] == '!' && yytext[1] == '=')
-									return NOT_EQUALS;
-							}
+							do {
+								nchars--;
+							} while (nchars > 1 &&
+								 (yytext[nchars - 1] == '+' ||
+								  yytext[nchars - 1] == '-'));
 						}
-
-						base_yylval.str = mm_strdup(yytext);
-						return Op;
 					}
-<SQL>{param}		{
-						base_yylval.ival = atol(yytext+1);
-						return PARAM;
-					}
-<C,SQL>{integer}	{
-						int val;
-						char* endptr;
 
-						errno = 0;
-						val = strtoint(yytext, &endptr, 10);
-						if (*endptr != '\0' || errno == ERANGE)
+					if (nchars < yyleng)
+					{
+						/* Strip the unwanted chars from the token */
+						yyless(nchars);
+						/*
+						 * If what we have left is only one char, and it's
+						 * one of the characters matching "self", then
+						 * return it as a character token the same way
+						 * that the "self" rule would have.
+						 */
+						if (nchars == 1 &&
+							strchr(",()[].;:+-*/%^<>=", yytext[0]))
+							return yytext[0];
+						/*
+						 * Likewise, if what we have left is two chars, and
+						 * those match the tokens ">=", "<=", "=>", "<>" or
+						 * "!=", then we must return the appropriate token
+						 * rather than the generic Op.
+						 */
+						if (nchars == 2)
 						{
-							errno = 0;
-							base_yylval.str = mm_strdup(yytext);
-							return FCONST;
+							if (yytext[0] == '=' && yytext[1] == '>')
+								return EQUALS_GREATER;
+							if (yytext[0] == '>' && yytext[1] == '=')
+								return GREATER_EQUALS;
+							if (yytext[0] == '<' && yytext[1] == '=')
+								return LESS_EQUALS;
+							if (yytext[0] == '<' && yytext[1] == '>')
+								return NOT_EQUALS;
+							if (yytext[0] == '!' && yytext[1] == '=')
+								return NOT_EQUALS;
 						}
-						base_yylval.ival = val;
-						return ICONST;
 					}
-<SQL>{ip}			{
-						base_yylval.str = mm_strdup(yytext);
-						return IP;
+
+					base_yylval.str = mm_strdup(yytext);
+					return Op;
 				}
-<C,SQL>{decimal}	{
+<SQL>{param}	{
+					base_yylval.ival = atol(yytext+1);
+					return PARAM;
+				}
+<C,SQL>{integer}	{
+					int val;
+					char* endptr;
+
+					errno = 0;
+					val = strtoint(yytext, &endptr, 10);
+					if (*endptr != '\0' || errno == ERANGE)
+					{
+						errno = 0;
 						base_yylval.str = mm_strdup(yytext);
 						return FCONST;
-			}
-<C,SQL>{real}		{
+					}
+					base_yylval.ival = val;
+					return ICONST;
+				}
+<SQL>{ip}		{
+					base_yylval.str = mm_strdup(yytext);
+					return IP;
+				}
+<C,SQL>{decimal}	{
+					base_yylval.str = mm_strdup(yytext);
+					return FCONST;
+				}
+<C,SQL>{real}	{
 						base_yylval.str = mm_strdup(yytext);
 						return FCONST;
-			}
+				}
 <SQL>{realfail1}	{
-						yyless(yyleng-1);
-						base_yylval.str = mm_strdup(yytext);
-						return FCONST;
-					}
+					yyless(yyleng-1);
+					base_yylval.str = mm_strdup(yytext);
+					return FCONST;
+				}
 <SQL>{realfail2}	{
-						yyless(yyleng-2);
-						base_yylval.str = mm_strdup(yytext);
-						return FCONST;
-					}
+					yyless(yyleng-2);
+					base_yylval.str = mm_strdup(yytext);
+					return FCONST;
+				}
 <SQL>:{identifier}((("->"|\.){identifier})|(\[{array}\]))*	{
-						base_yylval.str = mm_strdup(yytext+1);
-						return CVARIABLE;
-					}
+					base_yylval.str = mm_strdup(yytext+1);
+					return CVARIABLE;
+				}
 <SQL>{identifier}	{
-						const ScanKeyword  *keyword;
+					const ScanKeyword  *keyword;
 
-						if (!isdefine())
-						{
-							/* Is it an SQL/ECPG keyword? */
-							keyword = ScanECPGKeywordLookup(yytext);
-							if (keyword != NULL)
-								return keyword->value;
+					if (!isdefine())
+					{
+						/* Is it an SQL/ECPG keyword? */
+						keyword = ScanECPGKeywordLookup(yytext);
+						if (keyword != NULL)
+							return keyword->value;
 
-							/* Is it a C keyword? */
-							keyword = ScanCKeywordLookup(yytext);
-							if (keyword != NULL)
-								return keyword->value;
+						/* Is it a C keyword? */
+						keyword = ScanCKeywordLookup(yytext);
+						if (keyword != NULL)
+							return keyword->value;
 
-							/*
-							 * None of the above.  Return it as an identifier.
-							 *
-							 * The backend will attempt to truncate and case-fold
-							 * the identifier, but I see no good reason for ecpg
-							 * to do so; that's just another way that ecpg could get
-							 * out of step with the backend.
-							 */
-							base_yylval.str = mm_strdup(yytext);
-							return IDENT;
-						}
+						/*
+						 * None of the above.  Return it as an identifier.
+						 *
+						 * The backend will attempt to truncate and case-fold
+						 * the identifier, but I see no good reason for ecpg
+						 * to do so; that's just another way that ecpg could get
+						 * out of step with the backend.
+						 */
+						base_yylval.str = mm_strdup(yytext);
+						return IDENT;
 					}
-<SQL>{other}		{ return yytext[0]; }
+				}
+
+<SQL>{other}	{
+					return yytext[0];
+				}
+
 <C>{exec_sql}		{ BEGIN(SQL); return SQL_START; }
 <C>{informix_special}	{
 						/* are we simulating Informix? */
-- 
2.17.1