diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index bb794e044f4..6f28fe1e753 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -6262,16 +6262,54 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); to_timestamp and to_date - skip multiple blank spaces in the input string unless the - FX option is used. For example, - to_timestamp('2000    JUN', 'YYYY MON') works, but + skip multiple blank spaces at the beginning of the input string and + around date and time values unless the FX option is used. For example, + to_timestamp(' 2000    JUN', 'YYYY MON') and + to_timestamp('2000 - JUN', 'YYYY-MON') work, but to_timestamp('2000    JUN', 'FXYYYY MON') returns an error - because to_timestamp expects one space only. + because to_timestamp expects a single space only. FX must be specified as the first item in the template. + + + A separator (a space or a non-letter/non-digit character) in the template string of + to_timestamp and to_date + matches any single separator in the input string or is skipped, + unless the FX option is used. + For example, to_timestamp('2000JUN', 'YYYY///MON') and + to_timestamp('2000/JUN', 'YYYY MON') work, but + to_timestamp('2000//JUN', 'YYYY/MON') + returns an error because the number of separators in the input string + exceeds the number of separators in the template. + + + If FX is specified, separators in the + input and template strings must match exactly. For example, + to_timestamp('2000/JUN', 'FXYYYY MON') + returns an error because a space is expected in the input string. + + + + + + TZH template pattern can match a signed number. + Without the FX option, it may lead to ambiguity in + interpretation of the minus sign, which can also be interpreted as a separator. + This ambiguity is resolved as follows. If the number of separators before + TZH in the template string is less than the number of + separators before the minus sign in the input string, the minus sign + is interpreted as part of TZH. + Otherwise, the minus sign is considered to be a separator between values. + For example, to_timestamp('2000 -10', 'YYYY TZH') matches + -10 to TZH, but + to_timestamp('2000 -10', 'YYYY  TZH') + matches 10 to TZH. + + + Ordinary text is allowed in to_char @@ -6287,6 +6325,19 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); string; for example "XX" skips two input characters (whether or not they are XX). + + + Prior to PostgreSQL 12, it was possible to + skip arbitrary text in the input string using non-letter or non-digit + characters. For example, + to_timestamp('2000y6m1d', 'yyyy-MM-DD') used to + work. Now you can only use letter characters for this purpose. For example, + to_timestamp('2000y6m1d', 'yyyytMMtDDt') and + to_timestamp('2000y6m1d', 'yyyy"y"MM"m"DD"d"') + skip y, m, and + d. + + diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 30696e3575d..bf4e6067e85 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -165,6 +165,8 @@ typedef struct #define NODE_TYPE_END 1 #define NODE_TYPE_ACTION 2 #define NODE_TYPE_CHAR 3 +#define NODE_TYPE_SEPARATOR 4 +#define NODE_TYPE_SPACE 5 #define SUFFTYPE_PREFIX 1 #define SUFFTYPE_POSTFIX 2 @@ -955,6 +957,7 @@ typedef struct NUMProc static const KeyWord *index_seq_search(const char *str, const KeyWord *kw, const int *index); static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type); +static bool is_separator_char(const char *str); static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, const KeySuffix *suf, const int *index, int ver, NUMDesc *Num); @@ -1044,6 +1047,16 @@ suff_search(const char *str, const KeySuffix *suf, int type) return NULL; } +static bool +is_separator_char(const char *str) +{ + /* ASCII printable character, but not letter or digit */ + return (*str > 0x20 && *str < 0x7F && + !(*str >= 'A' && *str <= 'Z') && + !(*str >= 'a' && *str <= 'z') && + !(*str >= '0' && *str <= '9')); +} + /* ---------- * Prepare NUMDesc (number description struct) via FormatNode struct * ---------- @@ -1319,7 +1332,14 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, if (*str == '\\' && *(str + 1) == '"') str++; chlen = pg_mblen(str); - n->type = NODE_TYPE_CHAR; + + if (ver == DCH_TYPE && is_separator_char(str)) + n->type = NODE_TYPE_SEPARATOR; + else if (isspace((unsigned char) *str)) + n->type = NODE_TYPE_SPACE; + else + n->type = NODE_TYPE_CHAR; + memcpy(n->character, str, chlen); n->character[chlen] = '\0'; n->key = NULL; @@ -2987,25 +3007,69 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) int len, value; bool fx_mode = false; + /* number of extra skipped characters (more than given in format string) */ + int extra_skip = 0; for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) { - if (n->type != NODE_TYPE_ACTION) + /* + * Ignore spaces at the beginning of the string and before fields when + * not in FX (fixed width) mode. + */ + if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) && + (n->type == NODE_TYPE_ACTION || n == node)) + { + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } + + if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) { /* - * Separator, so consume one character from input string. Notice - * we don't insist that the consumed character match the format's - * character. + * In non FX (fixed format) mode we don't insist that the consumed + * character matches the format's character. */ - s += pg_mblen(s); + if (!fx_mode) + { + extra_skip--; + if (isspace((unsigned char) *s) || is_separator_char(s)) + { + s++; + extra_skip++; + } + } + else + { + /* + * In FX mode we insist that whitespaces and separator + * characters from the format string match the same in input + * string. + */ + if ((n->type == NODE_TYPE_SPACE && !isspace((unsigned char) *s)) || + (n->type == NODE_TYPE_SEPARATOR && *n->character != *s)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unexpected character \"%.*s\", expected character \"%s\"", + pg_mblen(s), s, n->character), + errhint("In FX mode, punctuation in the input string " + "must exactly match the format string."))); + s++; + } continue; } - - /* Ignore spaces before fields when not in FX (fixed width) mode */ - if (!fx_mode && n->key->id != DCH_FX) + else if (n->type != NODE_TYPE_ACTION) { - while (*s != '\0' && isspace((unsigned char) *s)) - s++; + /* + * Text character, so consume one character from input string. + * Notice we don't insist that the consumed character match the + * format's character. + * Text field ignores FX mode. + */ + s += pg_mblen(s); + continue; } from_char_set_mode(out, n->key->date_mode); @@ -3086,10 +3150,24 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) n->key->name))); break; case DCH_TZH: - out->tzsign = *s == '-' ? -1 : +1; - + /* + * Value of TZH might be negative. And the issue is that we + * might swallow minus sign as the separator. So, if we have + * skipped more characters than specified in the format string, + * then we consider prepending last skipped minus to TZH. + */ if (*s == '+' || *s == '-' || *s == ' ') + { + out->tzsign = *s == '-' ? -1 : +1; s++; + } + else + { + if (extra_skip > 0 && *(s - 1) == '-') + out->tzsign = -1; + else + out->tzsign = +1; + } from_char_parse_int_len(&out->tzh, &s, 2, n); break; @@ -3261,6 +3339,17 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) SKIP_THth(s, n->suffix); break; } + + /* Ignore all spaces after fields */ + if (!fx_mode) + { + extra_skip = 0; + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } } } diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out index e585b07c18c..976f4783e1e 100644 --- a/src/test/regress/expected/horology.out +++ b/src/test/regress/expected/horology.out @@ -2790,7 +2790,7 @@ SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD'); SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD'); to_timestamp ------------------------------ - Wed Jan 02 00:00:00 1985 PST + Sat Jan 12 00:00:00 1985 PST (1 row) SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16', @@ -2835,11 +2835,8 @@ SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD'); (1 row) SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); - to_timestamp ------------------------------- - Sun Feb 16 00:00:00 1997 PST -(1 row) - +ERROR: unexpected character "/", expected character ":" +HINT: In FX mode, punctuation in the input string must exactly match the format string. SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD'); to_timestamp ------------------------------ @@ -3002,7 +2999,7 @@ SELECT to_timestamp('2011-12-18 11:38 20', 'YYYY-MM-DD HH12:MI TZM'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); to_timestamp ------------------------------ - Sun Dec 18 03:38:15 2011 PST + Sun Dec 18 23:38:15 2011 PST (1 row) SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); @@ -3032,7 +3029,7 @@ SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); to_timestamp ------------------------------ - Sun Dec 18 03:38:15 2011 PST + Sun Dec 18 23:38:15 2011 PST (1 row) SELECT to_timestamp('2000+ JUN', 'YYYY/MON'); @@ -3042,8 +3039,11 @@ SELECT to_timestamp('2000+ JUN', 'YYYY/MON'); (1 row) SELECT to_timestamp(' 2000 +JUN', 'YYYY/MON'); -ERROR: invalid value "+JU" for "MON" -DETAIL: The given value did not match any of the allowed values for this field. + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + SELECT to_timestamp(' 2000 +JUN', 'YYYY//MON'); to_timestamp ------------------------------ @@ -3051,20 +3051,32 @@ SELECT to_timestamp(' 2000 +JUN', 'YYYY//MON'); (1 row) SELECT to_timestamp('2000 +JUN', 'YYYY//MON'); -ERROR: invalid value "+JU" for "MON" -DETAIL: The given value did not match any of the allowed values for this field. + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + SELECT to_timestamp('2000 + JUN', 'YYYY MON'); -ERROR: invalid value "+ J" for "MON" -DETAIL: The given value did not match any of the allowed values for this field. + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + SELECT to_timestamp('2000 ++ JUN', 'YYYY MON'); -ERROR: invalid value "+ J" for "MON" -DETAIL: The given value did not match any of the allowed values for this field. + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); ERROR: invalid value "+ J" for "MON" DETAIL: The given value did not match any of the allowed values for this field. SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); -ERROR: invalid value "+ J" for "MON" -DETAIL: The given value did not match any of the allowed values for this field. + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + SELECT to_timestamp('2000 -10', 'YYYY TZH'); to_timestamp ------------------------------ @@ -3092,13 +3104,13 @@ SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ - 12-08-2011 + 12-18-2011 (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ - 02-18-2011 + 12-18-2011 (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD');