diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 425544a..a3babed 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -12295,6 +12295,7 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; lag(value any [, offset integer [, default any ]]) + [ { RESPECT | IGNORE } NULLS ] @@ -12309,7 +12310,9 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; default are evaluated with respect to the current row. If omitted, offset defaults to 1 and - default to null + default to null. If + IGNORE NULLS is specified then the function will be evaluated + as if the rows containing nulls didn't exist. @@ -12322,6 +12325,7 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; lead(value any [, offset integer [, default any ]]) + [ { RESPECT | IGNORE } NULLS ] @@ -12336,7 +12340,9 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; default are evaluated with respect to the current row. If omitted, offset defaults to 1 and - default to null + default to null. If + IGNORE NULLS is specified then the function will be evaluated + as if the rows containing nulls didn't exist. @@ -12430,11 +12436,10 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; The SQL standard defines a RESPECT NULLS or - IGNORE NULLS option for lead, lag, - first_value, last_value, and - nth_value. This is not implemented in - PostgreSQL: the behavior is always the - same as the standard's default, namely RESPECT NULLS. + IGNORE NULLS option for first_value, + last_value, and nth_value. This is not + implemented in PostgreSQL: the behavior is + always the same as the standard's default, namely RESPECT NULLS. Likewise, the standard's FROM FIRST or FROM LAST option for nth_value is not implemented: only the default FROM FIRST behavior is supported. (You can achieve diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index bbc5336..3b69ac1 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2016,6 +2016,17 @@ WinGetCurrentPosition(WindowObject winobj) } /* + * WinGetFrameOptions + * Returns the frame option flags + */ +int +WinGetFrameOptions(WindowObject winobj) +{ + Assert(WindowObjectIsValid(winobj)); + return winobj->winstate->frameOptions; +} + +/* * WinGetPartitionRowCount * Return total number of rows contained in the current partition. * diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index b18b7a5..4713574 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -866,3 +866,34 @@ bms_hash_value(const Bitmapset *a) return DatumGetUInt32(hash_any((const unsigned char *) a->words, (lastword + 1) * sizeof(bitmapword))); } + +/* + * bms_initialize - initialize a Bitmapset using a custom memory allocator + * + * allocator + * A function pointer that will be called once to initialize the + * required amount of (zeroed-out) memory + * allocator_arg + * An argument that will be passed unmodified to the allocator + * function. Use this to pass any state the allocator requires. + * nbits + * The maximum capacity of the Bitmapset. An int64 as a Bitmapset with + * INT_MAX words can store more than INT_MAX bits. + */ +Bitmapset * +bms_initialize( + void *(*allocator) (void *arg, Size sz), + void *allocator_arg, + int64 nbits) +{ + int nwords; + Bitmapset * b; + + nwords = (nbits / BITS_PER_BITMAPWORD) + 1; + b = (Bitmapset *) allocator(allocator_arg, BITMAPSET_SIZE(nwords)); + + /* set up the Bitmapset's state */ + b->nwords = nwords; + + return b; +} diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 22e82ba..9073f90 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -288,6 +288,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type TriggerEvents TriggerOneEvent %type TriggerFuncArg %type TriggerWhen +%type opt_ignore_nulls %type event_trigger_when_list event_trigger_value_list %type event_trigger_when_item @@ -546,7 +547,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); HANDLER HAVING HEADER_P HOLD HOUR_P - IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IN_P + IDENTITY_P IF_P IGNORE ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IN_P INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION @@ -576,7 +577,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); RANGE READ REAL REASSIGN RECHECK RECURSIVE REF REFERENCES REFRESH REINDEX RELATIVE_P RELEASE RENAME REPEATABLE REPLACE REPLICA - RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK + RESET RESPECT RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROW ROWS RULE SAVEPOINT SCHEMA SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE SEQUENCES @@ -11553,19 +11554,28 @@ filter_clause: | /*EMPTY*/ { $$ = NULL; } ; -over_clause: OVER window_specification - { $$ = $2; } - | OVER ColId +opt_ignore_nulls: + IGNORE NULLS_P { $$ = FRAMEOPTION_IGNORE_NULLS; } + | RESPECT NULLS_P { $$ = 0; } + | /* EMPTY */ { $$ = 0; } + ; + +over_clause: opt_ignore_nulls OVER window_specification + { + $3->frameOptions |= $1; + $$ = $3; + } + | opt_ignore_nulls OVER ColId { WindowDef *n = makeNode(WindowDef); - n->name = $2; + n->name = $3; n->refname = NULL; n->partitionClause = NIL; n->orderClause = NIL; - n->frameOptions = FRAMEOPTION_DEFAULTS; + n->frameOptions = FRAMEOPTION_DEFAULTS | $1; n->startOffset = NULL; n->endOffset = NULL; - n->location = @2; + n->location = @3; $$ = n; } | /*EMPTY*/ @@ -12542,6 +12552,7 @@ unreserved_keyword: | HOUR_P | IDENTITY_P | IF_P + | IGNORE | IMMEDIATE | IMMUTABLE | IMPLICIT_P @@ -12631,6 +12642,7 @@ unreserved_keyword: | REPLACE | REPLICA | RESET + | RESPECT | RESTART | RESTRICT | RETURNS diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 4e4e1cd..51fc6ca 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -579,28 +579,76 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, { Index winref = 0; ListCell *lc; + WindowDef *refwin; Assert(windef->refname == NULL && windef->partitionClause == NIL && - windef->orderClause == NIL && - windef->frameOptions == FRAMEOPTION_DEFAULTS); + windef->orderClause == NIL); foreach(lc, pstate->p_windowdefs) { - WindowDef *refwin = (WindowDef *) lfirst(lc); - + refwin = (WindowDef *) lfirst(lc); winref++; if (refwin->name && strcmp(refwin->name, windef->name) == 0) - { - wfunc->winref = winref; break; } - } + if (lc == NULL) /* didn't find it? */ ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("window \"%s\" does not exist", windef->name), parser_errposition(pstate, windef->location))); + else if (windef->frameOptions == FRAMEOPTION_DEFAULTS) + wfunc->winref = winref; + else + { + /* + * This is the window we want - but we have to tweak the + * definition slightly (e.g. to support the IGNORE NULLS + * frame option) as we're not using the default (i.e. parent) + * frame options. + * + * We'll create a 'child' (using refname to inherit everything + * from the parent) that just overrides the frame options + * (assuming it doesn't already exist): + */ + WindowDef *clone = makeNode(WindowDef); + + clone->refname = pstrdup(refwin->name); + clone->frameOptions = windef->frameOptions; /* Note windef! */ + clone->startOffset = copyObject(refwin->startOffset); + clone->endOffset = copyObject(refwin->endOffset); + clone->location = refwin->location; + + /* + * Add this new definition to the list. Note that there's + * a chance a window with this definition already exists! + */ + winref = 0; + foreach(lc, pstate->p_windowdefs) + { + refwin = (WindowDef *) lfirst(lc); + + winref++; + if (refwin->refname && + strcmp(refwin->refname, clone->refname) == 0 && + equal(refwin->partitionClause, clone->partitionClause) && + equal(refwin->orderClause, clone->orderClause) && + refwin->frameOptions == clone->frameOptions && + equal(refwin->startOffset, clone->startOffset) && + equal(refwin->endOffset, clone->endOffset)) + { + /* found a duplicate window specification */ + wfunc->winref = winref; + break; + } + } + if (lc == NULL) /* didn't find it? */ + { + pstate->p_windowdefs = lappend(pstate->p_windowdefs, clone); + wfunc->winref = list_length(pstate->p_windowdefs); + } + } } else { diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index 1f02c9a..8a7f867 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -518,6 +518,23 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, errmsg("FILTER is not implemented in non-aggregate window functions"), parser_errposition(pstate, location))); + if (over->frameOptions & FRAMEOPTION_IGNORE_NULLS) + { + /* + * As this is only implemented for the lead & lag window functions + * we'll filter out all aggregate functions too. + */ + if (fdresult != FUNCDETAIL_WINDOWFUNC + || (strcmp("lead", strVal(llast(funcname))) != 0 && + strcmp("lag", strVal(llast(funcname))) != 0)) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("RESPECT NULLS is only implemented for the lead and lag window functions"), + parser_errposition(pstate, location))); + } + } + /* * ordered aggs not allowed in windows yet */ diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 2b005d6..6c222a3 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -4778,11 +4778,15 @@ get_rule_windowspec(WindowClause *wc, List *targetList, bool needspace = false; const char *sep; ListCell *l; + size_t refname_len = 0; + int initial_buf_len = buf->len; appendStringInfoChar(buf, '('); if (wc->refname) { - appendStringInfoString(buf, quote_identifier(wc->refname)); + const char *quoted_refname = quote_identifier(wc->refname); + refname_len = strlen(quoted_refname); + appendStringInfoString(buf, quoted_refname); needspace = true; } /* partition clauses are always inherited, so only print if no refname */ @@ -4864,6 +4868,19 @@ get_rule_windowspec(WindowClause *wc, List *targetList, /* we will now have a trailing space; remove it */ buf->len--; } + + /* + * We'll tidy up the output slightly; if we've got a refname, but haven't + * overridden the partition-by, order-by or any of the frame flags relevant + * inside the window def's ()s, then we'll be left with "()". + * We'll trim off the brackets in this case: + */ + if (wc->refname && buf->len == initial_buf_len + refname_len + 1) + { + memcpy(buf->data + initial_buf_len, buf->data + initial_buf_len + 1, refname_len); + buf->len -= 1; /* the trailing ")" */ + } + else appendStringInfoChar(buf, ')'); } @@ -7493,7 +7510,7 @@ get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context) get_rule_expr((Node *) wfunc->aggfilter, context, false); } - appendStringInfoString(buf, ") OVER "); + appendStringInfoString(buf, ") "); foreach(l, context->windowClause) { @@ -7501,6 +7518,10 @@ get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context) if (wc->winref == wfunc->winref) { + if (wc->frameOptions & FRAMEOPTION_IGNORE_NULLS) + appendStringInfoString(buf, "IGNORE NULLS "); + appendStringInfoString(buf, "OVER "); + if (wc->name) appendStringInfoString(buf, quote_identifier(wc->name)); else diff --git a/src/backend/utils/adt/windowfuncs.c b/src/backend/utils/adt/windowfuncs.c index b7c42d3..187ed94 100644 --- a/src/backend/utils/adt/windowfuncs.c +++ b/src/backend/utils/adt/windowfuncs.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "nodes/bitmapset.h" #include "utils/builtins.h" #include "windowapi.h" @@ -24,6 +25,18 @@ typedef struct rank_context int64 rank; /* current rank */ } rank_context; + +typedef struct leadlag_const_context +{ + int64 next; /* the index of the lead / lagged value */ +} leadlag_const_context; + +/* + * lead-lag process helpers + */ + #define ISNULL_INDEX(i) (2 * (i)) + #define HAVESCANNED_INDEX(i) ((2 * (i)) + 1) + /* * ntile process information */ @@ -280,7 +293,8 @@ window_ntile(PG_FUNCTION_ARGS) * common operation of lead() and lag() * For lead() forward is true, whereas for lag() it is false. * withoffset indicates we have an offset second argument. - * withdefault indicates we have a default third argument. + * withdefault indicates we have a default third argument. We'll only + * return this default if the offset we want is outside of the partition. */ static Datum leadlag_common(FunctionCallInfo fcinfo, @@ -290,8 +304,18 @@ leadlag_common(FunctionCallInfo fcinfo, int32 offset; bool const_offset; Datum result; - bool isnull; - bool isout; + bool isnull = false; + bool isout = false; + bool ignore_nulls; + Bitmapset* null_values; + + /* + * We want to set the markpos (the earliest tuple we can access) as + * aggressively as possible to save memory, but if the offset isn't + * constant we really need random access on the partition (so can't + * mark at all). + */ + ignore_nulls = (WinGetFrameOptions(winobj) & FRAMEOPTION_IGNORE_NULLS) != 0; if (withoffset) { @@ -305,21 +329,239 @@ leadlag_common(FunctionCallInfo fcinfo, offset = 1; const_offset = true; } + if(!forward) + { + offset = -offset; + } + + if (ignore_nulls && !const_offset) + { + int64 bits_needed, scanning, current = WinGetCurrentPosition(winobj); + bool scanForward; + + /* + * This case is a little complicated; we're defining "IGNORE NULLS" as + * "run the query, and pretend the rows with nulls in them don't exist". + * This means that we'll scan from the current row an 'offset' number of + * non-null rows, and then return that one. + * + * As the offset isn't constant we need efficient random access to the + * partition, as we'll check upto O(partition size) tuples for each row + * we're calculating the window function value for. + */ + + /* + * Accessing tuples is expensive, so we'll keep track of the ones we've + * accessed (more specifically, if they're null or not). We'll need one + * bit for whether the value is null and one bit for whether we've checked + * that tuple or not. We'll keep these two bits together (as opposed to + * having two separate bitmaps) to improve cache locality. + */ + bits_needed = 2 * WinGetPartitionRowCount(winobj); + + /* + * This code is a bit messy - we want to initialize the Bitmapset in the + * partition's local memory. + */ + null_values = bms_initialize( + (void *(*) (void *arg, Size sz)) WinGetPartitionLocalMemory, + winobj, + bits_needed); + + /* + * We use offset >= 0 instead of just forward as the offset might be in the + * opposite direction to the way we're scanning. We'll then force offset to + * be positive to make counting down the rows easier. + */ + scanForward = offset == 0 ? forward : (offset > 0); + offset = abs(offset); + + for (scanning = current;; scanForward ? ++scanning : --scanning) + { + if (scanning < 0 || scanning >= WinGetPartitionRowCount(winobj)) + { + isout = true; + + /* + * As we're out of the window we want to return NULL or the default + * value, but not whatever's left in result. We'll use the isnull + * flag to say "ignore it"! + */ + isnull = true; + result = (Datum) 0; + + break; + } + + /* look in the bitmap cache - do we know if this index is null? */ + if (bms_is_member(HAVESCANNED_INDEX(scanning), null_values)) + { + isnull = bms_is_member(ISNULL_INDEX(scanning), null_values); + } + else + { + Bitmapset *b; + + /* first time we've accessed this index; let's see if it's null: */ + result = WinGetFuncArgInPartition(winobj, 0, + scanning, + WINDOW_SEEK_HEAD, + false, + &isnull, &isout); + if (isout) + break; + + /* + * Update our bitmap with this result. Note the bitmap should have + * been sized correctly so bms_add_member should never need to + * re-allocate a larger chunk of memory. + */ + b = bms_add_member(null_values, HAVESCANNED_INDEX(scanning)); + Assert(b == null_values); + if (isnull) + { + b = bms_add_member(null_values, ISNULL_INDEX(scanning)); + Assert(b == null_values); + } + } + + /* + * Now the isnull flag is set correctly. If !isnull there's a chance + * that we may stop iterating here: + */ + if (!isnull) + { + if (offset == 0) + { + result = WinGetFuncArgInPartition(winobj, 0, + scanning, + WINDOW_SEEK_HEAD, + false, + &isnull, &isout); + break; + } + else + --offset; /* it's not null, so we're one step closer to the value we want */ + } + else if (scanning == current) + { + /* + * A slight edge case. Consider: + * + * ---------- + * A | lag(A, 1) + * 1 | NULL + * 2 | 1 + * NULL | ? + * ---------- + * + * Does a lag of one when the current value is null mean go back to the first + * non-null value (i.e. 2), or find the previous non-null value of the first + * non-null value (i.e. 1)? We're implementing the former semantics, so we'll + * need to correct slightly: + */ + --offset; + } + } + } + else if (ignore_nulls /* && const_offset */) + { + /* + * We can process a constant offset much more efficiently; initially + * we'll scan through the first non-null rows, and store that + * index. On subsequent rows we'll decide whether to push that index + * forwards to the next non-null value, or just return it again. + */ + leadlag_const_context *context = WinGetPartitionLocalMemory( + winobj, + sizeof(leadlag_const_context)); + int count_forward = 0; + + /* + * Set the forward flag based on the direction of traversal - remember + * we can have a LEAD or LAG of -1, and that should be equivalent to + * a LAG or LEAD of 1 respectively. + */ + forward = offset == 0 ? forward : (offset > 0); + + if (WinGetCurrentPosition(winobj) == 0) + if (forward) + count_forward = offset; + else + context->next = offset; /* LAG, so offset is negative */ + else + { + /* + * LEADs and LAGs are actually pretty similar - the decision of + * whether or not to push our offset value forwards depends on + * the current row (for LEADs) or the previous row (for LAGs) is + * NULL - hence the (forward ? 0 : -1) below. + */ + result = WinGetFuncArgInPartition(winobj, 0, + forward ? 0 : -1, + WINDOW_SEEK_CURRENT, + forward, + &isnull, &isout); + if (!isnull) + count_forward = 1; + } + + /* + * Count forward through the rows, skipping nulls and terminating if + * we run off the end of the window. + */ + for (; count_forward > 0 && !isout; --count_forward) + { + do + { + /* + * Conveniently, calling WinGetFuncArgInPartition with an + * absolute index less than zero (correctly) sets isout + * and isnull to true + */ + result = WinGetFuncArgInPartition(winobj, 0, + ++(context->next), + WINDOW_SEEK_HEAD, + !forward, + &isnull, &isout); + } + while (isnull && !isout); + } result = WinGetFuncArgInPartition(winobj, 0, - (forward ? offset : -offset), + context->next, + WINDOW_SEEK_HEAD, + !forward, + &isnull, &isout); + } + else + { + /* + * We don't care about nulls; just get the row at the required offset. + */ + result = WinGetFuncArgInPartition(winobj, 0, + offset, WINDOW_SEEK_CURRENT, const_offset, &isnull, &isout); + } if (isout) { /* - * target row is out of the partition; supply default value if - * provided. otherwise it'll stay NULL + * Target row is out of the partition; supply default value if + * provided. */ if (withdefault) result = WinGetFuncArgCurrent(winobj, 2, &isnull); + else + { + /* + * Don't return whatever's lying around in result, force the output + * to null if there's no default. + */ + Assert(isnull); + } } if (isnull) diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 2a4b41d..4700c00 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -93,4 +93,10 @@ extern int bms_first_member(Bitmapset *a); /* support for hashtables using Bitmapsets as keys: */ extern uint32 bms_hash_value(const Bitmapset *a); +/* initialize a Bitmapset using a custom memory allocator */ +extern Bitmapset *bms_initialize( + void *(*allocator) (void *arg, Size sz), /* function pointer to the allocator */ + void *arg, /* passed through to the first argument to the allocator */ + int64 nbits); /* the maximum capacity of the Bitmapset */ + #endif /* BITMAPSET_H */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 51fef68..d27ca5f 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -406,19 +406,35 @@ typedef struct SortBy * For entries in a WINDOW list, "name" is the window name being defined. * For OVER clauses, we use "name" for the "OVER window" syntax, or "refname" * for the "OVER (window)" syntax, which is subtly different --- the latter - * implies overriding the window frame clause. + * implies overriding the window frame clause. In this case, the per-field + * comments to determine what the semantics are: + * VIRTUAL: + * If NULL, then the parent's (refname) value is used. + * MANDATORY: + * Never inherited from the parent, so must be specified - + * but can be NULL. + * SUPER: + * Always inherited from parent, any local version ignored. */ typedef struct WindowDef { NodeTag type; - char *name; /* window's own name */ - char *refname; /* referenced window name, if any */ - List *partitionClause; /* PARTITION BY expression list */ - List *orderClause; /* ORDER BY (list of SortBy) */ - int frameOptions; /* frame_clause options, see below */ - Node *startOffset; /* expression for starting bound, if any */ - Node *endOffset; /* expression for ending bound, if any */ - int location; /* parse location, or -1 if none/unknown */ + /* window's own name [MANDATORY value of NULL] */ + char *name; + /* referenced window name, if any [MANDATORY] */ + char *refname; + /* PARTITION BY expression list [VIRTUAL] */ + List *partitionClause; + /* ORDER BY (list of SortBy) [SUPER] */ + List *orderClause; + /* frame_clause options, see below [MANDATORY] */ + int frameOptions; + /* expression for starting bound, if any [MANDATORY] */ + Node *startOffset; + /* expression for ending bound, if any [MANDATORY] */ + Node *endOffset; + /* parse location, or -1 if none/unknown [MANDATORY] */ + int location; } WindowDef; /* @@ -443,6 +459,7 @@ typedef struct WindowDef #define FRAMEOPTION_END_VALUE_PRECEDING 0x00800 /* end is V. P. */ #define FRAMEOPTION_START_VALUE_FOLLOWING 0x01000 /* start is V. F. */ #define FRAMEOPTION_END_VALUE_FOLLOWING 0x02000 /* end is V. F. */ +#define FRAMEOPTION_IGNORE_NULLS 0x04000 #define FRAMEOPTION_START_VALUE \ (FRAMEOPTION_START_VALUE_PRECEDING | FRAMEOPTION_START_VALUE_FOLLOWING) diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 8bd34d6..9196b41 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -180,6 +180,7 @@ PG_KEYWORD("hold", HOLD, UNRESERVED_KEYWORD) PG_KEYWORD("hour", HOUR_P, UNRESERVED_KEYWORD) PG_KEYWORD("identity", IDENTITY_P, UNRESERVED_KEYWORD) PG_KEYWORD("if", IF_P, UNRESERVED_KEYWORD) +PG_KEYWORD("ignore", IGNORE, UNRESERVED_KEYWORD) PG_KEYWORD("ilike", ILIKE, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("immediate", IMMEDIATE, UNRESERVED_KEYWORD) PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD) @@ -314,6 +315,7 @@ PG_KEYWORD("repeatable", REPEATABLE, UNRESERVED_KEYWORD) PG_KEYWORD("replace", REPLACE, UNRESERVED_KEYWORD) PG_KEYWORD("replica", REPLICA, UNRESERVED_KEYWORD) PG_KEYWORD("reset", RESET, UNRESERVED_KEYWORD) +PG_KEYWORD("respect", RESPECT, UNRESERVED_KEYWORD) PG_KEYWORD("restart", RESTART, UNRESERVED_KEYWORD) PG_KEYWORD("restrict", RESTRICT, UNRESERVED_KEYWORD) PG_KEYWORD("returning", RETURNING, RESERVED_KEYWORD) diff --git a/src/include/windowapi.h b/src/include/windowapi.h index 5bbf1fa..81f5ba0 100644 --- a/src/include/windowapi.h +++ b/src/include/windowapi.h @@ -46,6 +46,8 @@ extern void *WinGetPartitionLocalMemory(WindowObject winobj, Size sz); extern int64 WinGetCurrentPosition(WindowObject winobj); extern int64 WinGetPartitionRowCount(WindowObject winobj); +extern int WinGetFrameOptions(WindowObject winobj); + extern void WinSetMarkPosition(WindowObject winobj, int64 markpos); extern bool WinRowsArePeers(WindowObject winobj, int64 pos1, int64 pos2); diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 7b31d13..5926a72 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -5,19 +5,21 @@ CREATE TEMPORARY TABLE empsalary ( depname varchar, empno bigint, salary int, - enroll_date date + enroll_date date, + term_date date, + respect text ); INSERT INTO empsalary VALUES -('develop', 10, 5200, '2007-08-01'), -('sales', 1, 5000, '2006-10-01'), -('personnel', 5, 3500, '2007-12-10'), -('sales', 4, 4800, '2007-08-08'), -('personnel', 2, 3900, '2006-12-23'), -('develop', 7, 4200, '2008-01-01'), -('develop', 9, 4500, '2008-01-01'), -('sales', 3, 4800, '2007-08-01'), -('develop', 8, 6000, '2006-10-01'), -('develop', 11, 5200, '2007-08-15'); +('develop', 10, 5200, '2007-08-01', null, null), +('sales', 1, 5000, '2006-10-01', null, 'frog'), +('personnel', 5, 3500, '2007-12-10', null, null), +('sales', 4, 4800, '2007-08-08', '2010-09-22', 'chicken'), +('personnel', 2, 3900, '2006-12-23', null, null), +('develop', 7, 4200, '2008-01-01', null, null), +('develop', 9, 4500, '2008-01-01', null, 'gorilla'), +('sales', 3, 4800, '2007-08-01', '2009-03-05', null), +('develop', 8, 6000, '2006-10-01', '2009-11-17', 'tiger'), +('develop', 11, 5200, '2007-08-15', null, null); SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; depname | empno | salary | sum -----------+-------+--------+------- @@ -931,30 +933,39 @@ FROM tenk1 WHERE unique1 < 10; 17 | 9 (10 rows) +-- test view definitions are preserved CREATE TEMP VIEW v_window AS - SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following) as sum_rows - FROM generate_series(1, 10) i; + SELECT + i, + sum(i) over (order by i rows between 1 preceding and 1 following) as sum_rows, + lag(i, 1) IGNORE NULLS OVER (ORDER BY i DESC) AS lagged_by_1, + lag(i, 2) IGNORE NULLS OVER w AS lagged_by_2 + FROM generate_series(1, 10) i + WINDOW w as (ORDER BY i ASC); SELECT * FROM v_window; - i | sum_rows -----+---------- - 1 | 3 - 2 | 6 - 3 | 9 - 4 | 12 - 5 | 15 - 6 | 18 - 7 | 21 - 8 | 24 - 9 | 27 - 10 | 19 + i | sum_rows | lagged_by_1 | lagged_by_2 +----+----------+-------------+------------- + 10 | 19 | | 8 + 9 | 27 | 10 | 7 + 8 | 24 | 9 | 6 + 7 | 21 | 8 | 5 + 6 | 18 | 7 | 4 + 5 | 15 | 6 | 3 + 4 | 12 | 5 | 2 + 3 | 9 | 4 | 1 + 2 | 6 | 3 | + 1 | 3 | 2 | (10 rows) SELECT pg_get_viewdef('v_window'); pg_get_viewdef ---------------------------------------------------------------------------------------- +----------------------------------------------------------------------------------------- SELECT i.i, + - sum(i.i) OVER (ORDER BY i.i ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS sum_rows+ - FROM generate_series(1, 10) i(i); + sum(i.i) OVER (ORDER BY i.i ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS sum_rows, + + lag(i.i, 1) IGNORE NULLS OVER (ORDER BY i.i DESC) AS lagged_by_1, + + lag(i.i, 2) IGNORE NULLS OVER w AS lagged_by_2 + + FROM generate_series(1, 10) i(i) + + WINDOW w AS (ORDER BY i.i); (1 row) -- with UNION @@ -1033,5 +1044,165 @@ FROM empsalary GROUP BY depname; 25100 | 1 | 22600 | develop (3 rows) +-- test null behaviour: (1) lags +SELECT term_date, lag(term_date) OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lag +------------+------------ + | + | + 03-05-2009 | + 09-22-2010 | 03-05-2009 + | 09-22-2010 + | + 11-17-2009 | + | 11-17-2009 + | + | +(10 rows) + +SELECT term_date, lag(term_date) RESPECT NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lag +------------+------------ + | + | + 03-05-2009 | + 09-22-2010 | 03-05-2009 + | 09-22-2010 + | + 11-17-2009 | + | 11-17-2009 + | + | +(10 rows) + +-- a numeric (date) column +SELECT term_date, lag(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lag +------------+------------ + | + | + 03-05-2009 | + 09-22-2010 | 03-05-2009 + | 09-22-2010 + | 09-22-2010 + 11-17-2009 | 09-22-2010 + | 11-17-2009 + | 11-17-2009 + | 11-17-2009 +(10 rows) + +-- a text column +SELECT respect, lag(respect) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + respect | lag +---------+--------- + frog | + | frog + | frog + chicken | frog + | chicken + | chicken + tiger | chicken + gorilla | tiger + | gorilla + | gorilla +(10 rows) + +-- (2) leads +SELECT term_date, lead(term_date) OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lead +------------+------------ + | + | 03-05-2009 + 03-05-2009 | 09-22-2010 + 09-22-2010 | + | + | 11-17-2009 + 11-17-2009 | + | + | + | +(10 rows) + +SELECT term_date, lead(term_date) RESPECT NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lead +------------+------------ + | + | 03-05-2009 + 03-05-2009 | 09-22-2010 + 09-22-2010 | + | + | 11-17-2009 + 11-17-2009 | + | + | + | +(10 rows) + +SELECT term_date, lead(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lead +------------+------------ + | 03-05-2009 + | 03-05-2009 + 03-05-2009 | 09-22-2010 + 09-22-2010 | 11-17-2009 + | 11-17-2009 + | 11-17-2009 + 11-17-2009 | + | + | + | +(10 rows) + +-- these should be errors as the functionality isn't implemented yet: +SELECT term_date, first_value(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; +ERROR: RESPECT NULLS is only implemented for the lead and lag window functions +LINE 1: SELECT term_date, first_value(term_date) IGNORE NULLS OVER (... + ^ +SELECT term_date, max(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; +ERROR: RESPECT NULLS is only implemented for the lead and lag window functions +LINE 1: SELECT term_date, max(term_date) IGNORE NULLS OVER (ORDER BY... + ^ -- cleanup DROP TABLE empsalary; +-- some more test cases: +-- (1) leading with an order-by +CREATE TABLE test_table ( + id serial, + val integer); +INSERT INTO test_table (val) SELECT * FROM unnest(ARRAY[1,2,3,4,NULL, NULL, NULL, 5, 6, 7]); +SELECT val, lead(val, 2) IGNORE NULLS OVER (ORDER BY id) FROM test_table ORDER BY id; + val | lead +-----+------ + 1 | 3 + 2 | 4 + 3 | 5 + 4 | 6 + | 6 + | 6 + | 6 + 5 | 7 + 6 | + 7 | +(10 rows) + +DROP TABLE test_table; +-- (2) two functions in the same window +SELECT val, + lead(val, 2) IGNORE NULLS OVER w AS ignore, + lead(val, 2) RESPECT NULLS OVER w AS respect +FROM unnest(ARRAY[1,2,3,4,NULL, NULL, NULL, 5, 6, 7]) AS val +WINDOW w as (); + val | ignore | respect +-----+--------+--------- + 1 | 3 | 3 + 2 | 4 | 4 + 3 | 5 | + 4 | 6 | + | 6 | + | 6 | 5 + | 6 | 6 + 5 | 7 | 7 + 6 | | + 7 | | +(10 rows) + diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 6ee3696..cda112f 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -6,20 +6,22 @@ CREATE TEMPORARY TABLE empsalary ( depname varchar, empno bigint, salary int, - enroll_date date + enroll_date date, + term_date date, + respect text ); INSERT INTO empsalary VALUES -('develop', 10, 5200, '2007-08-01'), -('sales', 1, 5000, '2006-10-01'), -('personnel', 5, 3500, '2007-12-10'), -('sales', 4, 4800, '2007-08-08'), -('personnel', 2, 3900, '2006-12-23'), -('develop', 7, 4200, '2008-01-01'), -('develop', 9, 4500, '2008-01-01'), -('sales', 3, 4800, '2007-08-01'), -('develop', 8, 6000, '2006-10-01'), -('develop', 11, 5200, '2007-08-15'); +('develop', 10, 5200, '2007-08-01', null, null), +('sales', 1, 5000, '2006-10-01', null, 'frog'), +('personnel', 5, 3500, '2007-12-10', null, null), +('sales', 4, 4800, '2007-08-08', '2010-09-22', 'chicken'), +('personnel', 2, 3900, '2006-12-23', null, null), +('develop', 7, 4200, '2008-01-01', null, null), +('develop', 9, 4500, '2008-01-01', null, 'gorilla'), +('sales', 3, 4800, '2007-08-01', '2009-03-05', null), +('develop', 8, 6000, '2006-10-01', '2009-11-17', 'tiger'), +('develop', 11, 5200, '2007-08-15', null, null); SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; @@ -222,9 +224,16 @@ SELECT sum(unique1) over unique1 FROM tenk1 WHERE unique1 < 10; +-- test view definitions are preserved CREATE TEMP VIEW v_window AS - SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following) as sum_rows - FROM generate_series(1, 10) i; + SELECT + i, + sum(i) over (order by i rows between 1 preceding and 1 following) as sum_rows, + lag(i, 1) IGNORE NULLS OVER (ORDER BY i DESC) AS lagged_by_1, + lag(i, 2) IGNORE NULLS OVER w AS lagged_by_2 + FROM generate_series(1, 10) i + WINDOW w as (ORDER BY i ASC); + SELECT * FROM v_window; @@ -272,5 +281,48 @@ SELECT sum(salary), row_number() OVER (ORDER BY depname), sum( depname FROM empsalary GROUP BY depname; +-- test null behaviour: (1) lags + +SELECT term_date, lag(term_date) OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +SELECT term_date, lag(term_date) RESPECT NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +-- a numeric (date) column +SELECT term_date, lag(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +-- a text column +SELECT respect, lag(respect) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +-- (2) leads + +SELECT term_date, lead(term_date) OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +SELECT term_date, lead(term_date) RESPECT NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +SELECT term_date, lead(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +-- these should be errors as the functionality isn't implemented yet: +SELECT term_date, first_value(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; +SELECT term_date, max(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + -- cleanup DROP TABLE empsalary; + +-- some more test cases: + +-- (1) leading with an order-by +CREATE TABLE test_table ( + id serial, + val integer); +INSERT INTO test_table (val) SELECT * FROM unnest(ARRAY[1,2,3,4,NULL, NULL, NULL, 5, 6, 7]); +SELECT val, lead(val, 2) IGNORE NULLS OVER (ORDER BY id) FROM test_table ORDER BY id; +DROP TABLE test_table; + +-- (2) two functions in the same window +SELECT val, + lead(val, 2) IGNORE NULLS OVER w AS ignore, + lead(val, 2) RESPECT NULLS OVER w AS respect +FROM unnest(ARRAY[1,2,3,4,NULL, NULL, NULL, 5, 6, 7]) AS val +WINDOW w as (); + +