Re: PostgreSQL 9.3.5 substring(text from pattern for escape) bug - Mailing list pgsql-bugs
| From | Tom Lane |
|---|---|
| Subject | Re: PostgreSQL 9.3.5 substring(text from pattern for escape) bug |
| Date | |
| Msg-id | 20583.1557680147@sss.pgh.pa.us Whole thread Raw |
| In response to | Re: PostgreSQL 9.3.5 substring(text from pattern for escape) bug (Andrew Gierth <andrew@tao11.riddles.org.uk>) |
| Responses |
Re: PostgreSQL 9.3.5 substring(text from pattern for escape) bug
|
| List | pgsql-bugs |
Andrew Gierth <andrew@tao11.riddles.org.uk> writes:
> "Tom" == Tom Lane <tgl@sss.pgh.pa.us> writes:
> Tom> Huh, interesting. So we should be translating the initial
> Tom> substring to a non-greedy pattern. I believe Spencer's engine can
> Tom> handle that by sticking (?:...){1,1}? around it.
> Your suggested fix doesn't seem to work. If the leading/trailing
> substrings do not have | or parens in then it seems to work to wrap them
> in (?:(?:)??...), thanks to the rule that the first quantified atom in a
> subexpression sets the whole subexpression's greediness, but handling |
> or parens correctly seems harder.
[ pokes at that... ] Huh. That's a bug, which AFAICS is aboriginal in
Henry's code: it optimizes away a {1,1} quantifier without regard to
whether the quantifier is attempting to impose a different greediness
preference than its argument would have naturally. The attached
seems to fix it.
regards, tom lane
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index eb1f3d5..8cd7d56 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -1155,7 +1155,10 @@ parseqatom(struct vars *v,
/* rest of branch can be strung starting from atom->end */
s2 = atom->end;
}
- else if (m == 1 && n == 1)
+ else if (m == 1 && n == 1 &&
+ (qprefer == 0 ||
+ (atom->flags & (LONGER | SHORTER | MIXED)) == 0 ||
+ qprefer == (atom->flags & (LONGER | SHORTER | MIXED))))
{
/* no/vacuous quantifier: done */
EMPTYARC(s, atom->begin); /* empty prefix */
diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out
index c0bfa8a..f372003 100644
--- a/src/test/regress/expected/regex.out
+++ b/src/test/regress/expected/regex.out
@@ -492,6 +492,55 @@ select regexp_matches('foo/bar/baz',
{foo,bar,baz}
(1 row)
+-- Test that greediness can be overridden by outer quantifier
+select regexp_matches('llmmmfff', '^(l*)(.*)(f*)$');
+ regexp_matches
+----------------
+ {ll,mmmfff,""}
+(1 row)
+
+select regexp_matches('llmmmfff', '^(l*){1,1}(.*)(f*)$');
+ regexp_matches
+----------------
+ {ll,mmmfff,""}
+(1 row)
+
+select regexp_matches('llmmmfff', '^(l*){1,1}?(.*)(f*)$');
+ regexp_matches
+------------------
+ {"",llmmmfff,""}
+(1 row)
+
+select regexp_matches('llmmmfff', '^(l*){1,1}?(.*){1,1}?(f*)$');
+ regexp_matches
+----------------
+ {"",llmmm,fff}
+(1 row)
+
+select regexp_matches('llmmmfff', '^(l*?)(.*)(f*)$');
+ regexp_matches
+------------------
+ {"",llmmmfff,""}
+(1 row)
+
+select regexp_matches('llmmmfff', '^(l*?){1,1}(.*)(f*)$');
+ regexp_matches
+----------------
+ {ll,mmmfff,""}
+(1 row)
+
+select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*)(f*)$');
+ regexp_matches
+------------------
+ {"",llmmmfff,""}
+(1 row)
+
+select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*){1,1}?(f*)$');
+ regexp_matches
+----------------
+ {"",llmmm,fff}
+(1 row)
+
-- Test for infinite loop in cfindloop with zero-length possible match
-- but no actual match (can only happen in the presence of backrefs)
select 'a' ~ '$()|^\1';
diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql
index 1361b62..a174224 100644
--- a/src/test/regress/sql/regex.sql
+++ b/src/test/regress/sql/regex.sql
@@ -118,6 +118,16 @@ select regexp_matches('Programmer', '(\w)(.*?\1)', 'g');
select regexp_matches('foo/bar/baz',
'^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', '');
+-- Test that greediness can be overridden by outer quantifier
+select regexp_matches('llmmmfff', '^(l*)(.*)(f*)$');
+select regexp_matches('llmmmfff', '^(l*){1,1}(.*)(f*)$');
+select regexp_matches('llmmmfff', '^(l*){1,1}?(.*)(f*)$');
+select regexp_matches('llmmmfff', '^(l*){1,1}?(.*){1,1}?(f*)$');
+select regexp_matches('llmmmfff', '^(l*?)(.*)(f*)$');
+select regexp_matches('llmmmfff', '^(l*?){1,1}(.*)(f*)$');
+select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*)(f*)$');
+select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*){1,1}?(f*)$');
+
-- Test for infinite loop in cfindloop with zero-length possible match
-- but no actual match (can only happen in the presence of backrefs)
select 'a' ~ '$()|^\1';
pgsql-bugs by date: