diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index edc9be92a6e..6c5c722995a 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -6262,9 +6262,10 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); to_timestamp and to_date - skip multiple blank spaces in the input string unless the - FX option is used. For example, - to_timestamp('2000    JUN', 'YYYY MON') works, but + skip multiple blank spaces in the beginng of input string, before and + after patterns unless the FX option is used. For example, + to_timestamp(' 2000    JUN', 'YYYY MON') and + to_timestamp('2000 - JUN', 'YYYY-MON') work, but to_timestamp('2000    JUN', 'FXYYYY MON') returns an error because to_timestamp expects one space only. FX must be specified as the first item in @@ -6272,6 +6273,39 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); + + + Space or separator (non letter and non digit character) in the formatting string of + to_timestamp and to_date + matches to nothing or one space or separator in the input string + unless the FX option is used. + to_timestamp('2000JUN', 'YYYY///MON') and + to_timestamp('2000/JUN', 'YYYY MON') work, but + to_timestamp('2000//JUN', 'YYYY/MON') + returns an error because count of separators in the input string + doesn't match count of it in the formatting string. + When FX is specified then space matches only space + and separator matches only the same separator. + + + + + + TZH template pattern may match signed number. When + FX option isn't used that may lead to an ambiguity in + interpreting of minus sign, which may be also interpreted as separator. + This ambiguity is resolving as following. When number of spaces and + separators before minus sign in input string is greater or equal to the + number of spaces and separators before TZH in formatting + string, then this minus is interpreted as part of time-zone hours. + Otherwise minus sign is considered as separator between values. + to_timestamp('2000 -10', 'YYYY TZH') matches + -10 to TZH, but + to_timestamp('2000 -10', 'YYYY  TZH') + matches just 10 to TZH + + + Ordinary text is allowed in to_char @@ -6287,6 +6321,20 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); string; for example "XX" skips two input characters (whether or not they are XX). + + + Prior to PostgreSQL 12 it was possible to + skip input ordinary text using non letter and non digit characters. + For example, + to_timestamp('2000y6m1d', 'yyyy-MM-DD') worked + before. But currently to skip ordinary text in the input string it + is necessary to use only letter characters. For example, + to_timestamp('2000y6m1d', 'yyyytMMtDDt') or + to_timestamp('2000y6m1d', 'yyyy"y"MM"m"DD"d"') will + skip y, m and + d now. + + diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 30696e3575d..f5b692a0d9a 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -165,6 +165,8 @@ typedef struct #define NODE_TYPE_END 1 #define NODE_TYPE_ACTION 2 #define NODE_TYPE_CHAR 3 +#define NODE_TYPE_SEPARATOR 4 +#define NODE_TYPE_SPACE 5 #define SUFFTYPE_PREFIX 1 #define SUFFTYPE_POSTFIX 2 @@ -955,6 +957,7 @@ typedef struct NUMProc static const KeyWord *index_seq_search(const char *str, const KeyWord *kw, const int *index); static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type); +static bool is_separator_char(const char *str); static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, const KeySuffix *suf, const int *index, int ver, NUMDesc *Num); @@ -1044,6 +1047,16 @@ suff_search(const char *str, const KeySuffix *suf, int type) return NULL; } +static bool +is_separator_char(const char *str) +{ + /* ASCII printable character, but not letter or digit */ + return (*str > 0x20 && *str < 0x7F && + !(*str >= 'A' && *str <= 'Z') && + !(*str >= 'a' && *str <= 'z') && + !(*str >= '0' && *str <= '9')); +} + /* ---------- * Prepare NUMDesc (number description struct) via FormatNode struct * ---------- @@ -1319,7 +1332,14 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, if (*str == '\\' && *(str + 1) == '"') str++; chlen = pg_mblen(str); - n->type = NODE_TYPE_CHAR; + + if (ver == DCH_TYPE && is_separator_char(str)) + n->type = NODE_TYPE_SEPARATOR; + else if (isspace((unsigned char) *str)) + n->type = NODE_TYPE_SPACE; + else + n->type = NODE_TYPE_CHAR; + memcpy(n->character, str, chlen); n->character[chlen] = '\0'; n->key = NULL; @@ -2987,25 +3007,69 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) int len, value; bool fx_mode = false; + /* number of extra skipped charaters (more than given in format string) */ + int extra_skip = 0; for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) { - if (n->type != NODE_TYPE_ACTION) + /* + * Ignore spaces in the beginning of the string and before fields when + * not in FX (fixed width) mode. + */ + if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) && + (n->type == NODE_TYPE_ACTION || n == node)) + { + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } + + if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) { /* - * Separator, so consume one character from input string. Notice - * we don't insist that the consumed character match the format's - * character. + * In non FX (fixed format) mode we don't insist that the consumed + * character matches the format's character. */ - s += pg_mblen(s); + if (!fx_mode) + { + extra_skip--; + if (isspace((unsigned char) *s) || is_separator_char(s)) + { + s++; + extra_skip++; + } + } + else + { + /* + * In FX mode we insist that whitespaces and separator + * characters from the format string match the same in input + * string. + */ + if ((n->type == NODE_TYPE_SPACE && !isspace((unsigned char) *s)) || + (n->type == NODE_TYPE_SEPARATOR && *n->character != *s)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unexpected character \"%.*s\", expected space character \"%s\"", + pg_mblen(s), s, n->character), + errhint("In FX mode, punctuation in the input string " + "must exactly match the format string."))); + s++; + } continue; } - - /* Ignore spaces before fields when not in FX (fixed width) mode */ - if (!fx_mode && n->key->id != DCH_FX) + else if (n->type != NODE_TYPE_ACTION) { - while (*s != '\0' && isspace((unsigned char) *s)) - s++; + /* + * Text character, so consume one character from input string. + * Notice we don't insist that the consumed character match the + * format's character. + * Text field ignores FX mode. + */ + s += pg_mblen(s); + continue; } from_char_set_mode(out, n->key->date_mode); @@ -3086,10 +3150,24 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) n->key->name))); break; case DCH_TZH: - out->tzsign = *s == '-' ? -1 : +1; - + /* + * Value of TZH might be negative. And the issue is that we + * might swallow minus sign as the separator. So, if we have + * skipped more charaters than specified in the format string, + * then we consider prepending last skipped minus to TZH. + */ if (*s == '+' || *s == '-' || *s == ' ') + { + out->tzsign = *s == '-' ? -1 : +1; s++; + } + else + { + if (extra_skip > 0 && *(s - 1) == '-') + out->tzsign = -1; + else + out->tzsign = +1; + } from_char_parse_int_len(&out->tzh, &s, 2, n); break; @@ -3261,6 +3339,17 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) SKIP_THth(s, n->suffix); break; } + + /* Ignore all spaces after fields */ + if (!fx_mode) + { + extra_skip = 0; + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } } } diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out index 63e39198e68..63fb1c44fe9 100644 --- a/src/test/regress/expected/horology.out +++ b/src/test/regress/expected/horology.out @@ -2769,14 +2769,32 @@ SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS'); Sat Feb 16 08:14:30 0097 PST (1 row) +SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS'); + to_timestamp +------------------------------ + Fri Mar 18 23:38:15 2011 PDT +(1 row) + SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD'); to_timestamp ------------------------------ Sat Jan 12 00:00:00 1985 PST (1 row) +SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD'); + to_timestamp +------------------------------ + Sat Jan 12 00:00:00 1985 PST +(1 row) + +SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD'); + to_timestamp +------------------------------ + Sat Jan 12 00:00:00 1985 PST +(1 row) + SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16', - '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD'); + '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD'); to_timestamp ------------------------------ Sun May 16 00:00:00 1976 PDT @@ -2789,7 +2807,7 @@ SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD'); (1 row) SELECT to_timestamp('15 "text between quote marks" 98 54 45', - E'HH24 "\\text between quote marks\\"" YY MI SS'); + E'HH24 "\\"text between quote marks\\"" YY MI SS'); to_timestamp ------------------------------ Thu Jan 01 15:54:45 1998 PST @@ -2810,6 +2828,21 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay'); SELECT to_timestamp('97/Feb/16', 'YYMonDD'); ERROR: invalid value "/Fe" for "Mon" DETAIL: The given value did not match any of the allowed values for this field. +SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD'); + to_timestamp +------------------------------ + Sun Feb 16 00:00:00 1997 PST +(1 row) + +SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); +ERROR: unexpected character "/", expected space character ":" +HINT: In FX mode, punctuation in the input string must exactly match the format string. +SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD'); + to_timestamp +------------------------------ + Sun Feb 16 00:00:00 1997 PST +(1 row) + SELECT to_timestamp('19971116', 'YYYYMMDD'); to_timestamp ------------------------------ @@ -2966,7 +2999,7 @@ SELECT to_timestamp('2011-12-18 11:38 20', 'YYYY-MM-DD HH12:MI TZM'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); to_timestamp ------------------------------ - Sun Dec 18 03:38:15 2011 PST + Sun Dec 18 23:38:15 2011 PST (1 row) SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); @@ -2996,7 +3029,64 @@ SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); to_timestamp ------------------------------ - Sun Dec 18 03:38:15 2011 PST + Sun Dec 18 23:38:15 2011 PST +(1 row) + +SELECT to_timestamp('2000+ JUN', 'YYYY/MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp(' 2000 +JUN', 'YYYY/MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp(' 2000 +JUN', 'YYYY//MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 +JUN', 'YYYY//MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 + JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 ++ JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +ERROR: invalid value "+ J" for "MON" +DETAIL: The given value did not match any of the allowed values for this field. +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 -10', 'YYYY TZH'); + to_timestamp +------------------------------ + Sat Jan 01 02:00:00 2000 PST +(1 row) + +SELECT to_timestamp('2000 -10', 'YYYY TZH'); + to_timestamp +------------------------------ + Fri Dec 31 06:00:00 1999 PST (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); @@ -3014,13 +3104,13 @@ SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ - 12-08-2011 + 12-18-2011 (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ - 02-18-2011 + 12-18-2011 (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql index ebb196a1cfc..807037be76e 100644 --- a/src/test/regress/sql/horology.sql +++ b/src/test/regress/sql/horology.sql @@ -392,15 +392,21 @@ SELECT to_timestamp('0097/Feb/16 --> 08:14:30', 'YYYY/Mon/DD --> HH:MI:SS'); SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS'); +SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS'); + SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD'); +SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD'); + +SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD'); + SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16', - '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD'); + '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD'); SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD'); SELECT to_timestamp('15 "text between quote marks" 98 54 45', - E'HH24 "\\text between quote marks\\"" YY MI SS'); + E'HH24 "\\"text between quote marks\\"" YY MI SS'); SELECT to_timestamp('05121445482000', 'MMDDHH24MISSYYYY'); @@ -408,6 +414,12 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay'); SELECT to_timestamp('97/Feb/16', 'YYMonDD'); +SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD'); + +SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); + +SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD'); + SELECT to_timestamp('19971116', 'YYYYMMDD'); SELECT to_timestamp('20000-1116', 'YYYY-MMDD'); @@ -464,6 +476,17 @@ SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); +SELECT to_timestamp('2000+ JUN', 'YYYY/MON'); +SELECT to_timestamp(' 2000 +JUN', 'YYYY/MON'); +SELECT to_timestamp(' 2000 +JUN', 'YYYY//MON'); +SELECT to_timestamp('2000 +JUN', 'YYYY//MON'); +SELECT to_timestamp('2000 + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 ++ JUN', 'YYYY MON'); +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 -10', 'YYYY TZH'); +SELECT to_timestamp('2000 -10', 'YYYY TZH'); + SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD');