Re: PostgreSQL 9.3.5 substring(text from pattern for escape) bug - Mailing list pgsql-bugs
From | Tom Lane |
---|---|
Subject | Re: PostgreSQL 9.3.5 substring(text from pattern for escape) bug |
Date | |
Msg-id | 20583.1557680147@sss.pgh.pa.us Whole thread Raw |
In response to | Re: PostgreSQL 9.3.5 substring(text from pattern for escape) bug (Andrew Gierth <andrew@tao11.riddles.org.uk>) |
Responses |
Re: PostgreSQL 9.3.5 substring(text from pattern for escape) bug
|
List | pgsql-bugs |
Andrew Gierth <andrew@tao11.riddles.org.uk> writes: > "Tom" == Tom Lane <tgl@sss.pgh.pa.us> writes: > Tom> Huh, interesting. So we should be translating the initial > Tom> substring to a non-greedy pattern. I believe Spencer's engine can > Tom> handle that by sticking (?:...){1,1}? around it. > Your suggested fix doesn't seem to work. If the leading/trailing > substrings do not have | or parens in then it seems to work to wrap them > in (?:(?:)??...), thanks to the rule that the first quantified atom in a > subexpression sets the whole subexpression's greediness, but handling | > or parens correctly seems harder. [ pokes at that... ] Huh. That's a bug, which AFAICS is aboriginal in Henry's code: it optimizes away a {1,1} quantifier without regard to whether the quantifier is attempting to impose a different greediness preference than its argument would have naturally. The attached seems to fix it. regards, tom lane diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index eb1f3d5..8cd7d56 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -1155,7 +1155,10 @@ parseqatom(struct vars *v, /* rest of branch can be strung starting from atom->end */ s2 = atom->end; } - else if (m == 1 && n == 1) + else if (m == 1 && n == 1 && + (qprefer == 0 || + (atom->flags & (LONGER | SHORTER | MIXED)) == 0 || + qprefer == (atom->flags & (LONGER | SHORTER | MIXED)))) { /* no/vacuous quantifier: done */ EMPTYARC(s, atom->begin); /* empty prefix */ diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index c0bfa8a..f372003 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -492,6 +492,55 @@ select regexp_matches('foo/bar/baz', {foo,bar,baz} (1 row) +-- Test that greediness can be overridden by outer quantifier +select regexp_matches('llmmmfff', '^(l*)(.*)(f*)$'); + regexp_matches +---------------- + {ll,mmmfff,""} +(1 row) + +select regexp_matches('llmmmfff', '^(l*){1,1}(.*)(f*)$'); + regexp_matches +---------------- + {ll,mmmfff,""} +(1 row) + +select regexp_matches('llmmmfff', '^(l*){1,1}?(.*)(f*)$'); + regexp_matches +------------------ + {"",llmmmfff,""} +(1 row) + +select regexp_matches('llmmmfff', '^(l*){1,1}?(.*){1,1}?(f*)$'); + regexp_matches +---------------- + {"",llmmm,fff} +(1 row) + +select regexp_matches('llmmmfff', '^(l*?)(.*)(f*)$'); + regexp_matches +------------------ + {"",llmmmfff,""} +(1 row) + +select regexp_matches('llmmmfff', '^(l*?){1,1}(.*)(f*)$'); + regexp_matches +---------------- + {ll,mmmfff,""} +(1 row) + +select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*)(f*)$'); + regexp_matches +------------------ + {"",llmmmfff,""} +(1 row) + +select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*){1,1}?(f*)$'); + regexp_matches +---------------- + {"",llmmm,fff} +(1 row) + -- Test for infinite loop in cfindloop with zero-length possible match -- but no actual match (can only happen in the presence of backrefs) select 'a' ~ '$()|^\1'; diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index 1361b62..a174224 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -118,6 +118,16 @@ select regexp_matches('Programmer', '(\w)(.*?\1)', 'g'); select regexp_matches('foo/bar/baz', '^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', ''); +-- Test that greediness can be overridden by outer quantifier +select regexp_matches('llmmmfff', '^(l*)(.*)(f*)$'); +select regexp_matches('llmmmfff', '^(l*){1,1}(.*)(f*)$'); +select regexp_matches('llmmmfff', '^(l*){1,1}?(.*)(f*)$'); +select regexp_matches('llmmmfff', '^(l*){1,1}?(.*){1,1}?(f*)$'); +select regexp_matches('llmmmfff', '^(l*?)(.*)(f*)$'); +select regexp_matches('llmmmfff', '^(l*?){1,1}(.*)(f*)$'); +select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*)(f*)$'); +select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*){1,1}?(f*)$'); + -- Test for infinite loop in cfindloop with zero-length possible match -- but no actual match (can only happen in the presence of backrefs) select 'a' ~ '$()|^\1';
pgsql-bugs by date: