From 066ad9104ec0e967e20e820977286f001e4055a4 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Thu, 21 Sep 2017 19:09:02 +0300 Subject: [PATCH 2/2] Fix GiST ordering by distance for NULLs and NaNs --- src/backend/access/gist/gistget.c | 29 ++++++++++------- src/backend/access/gist/gistscan.c | 36 +++++++++++++++++++-- src/include/access/gist_private.h | 13 ++++++-- src/test/regress/expected/gist.out | 64 ++++++++++++++++++++++++++++++++++++++ src/test/regress/sql/gist.sql | 60 +++++++++++++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 18 deletions(-) diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 760ea0c..7fed700 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -132,7 +132,7 @@ gistindex_keytest(IndexScanDesc scan, GISTSTATE *giststate = so->giststate; ScanKey key = scan->keyData; int keySize = scan->numberOfKeys; - double *distance_p; + GISTDistance *distance_p; Relation r = scan->indexRelation; *recheck_p = false; @@ -150,7 +150,10 @@ gistindex_keytest(IndexScanDesc scan, if (GistPageIsLeaf(page)) /* shouldn't happen */ elog(ERROR, "invalid GiST tuple found on leaf page"); for (i = 0; i < scan->numberOfOrderBys; i++) - so->distances[i] = -get_float8_infinity(); + { + so->distances[i].value = -get_float8_infinity(); + so->distances[i].isnull = false; + } return true; } @@ -248,7 +251,8 @@ gistindex_keytest(IndexScanDesc scan, if ((key->sk_flags & SK_ISNULL) || isNull) { /* Assume distance computes as null and sorts to the end */ - *distance_p = get_float8_infinity(); + distance_p->value = get_float8_nan(); + distance_p->isnull = true; } else { @@ -285,7 +289,8 @@ gistindex_keytest(IndexScanDesc scan, ObjectIdGetDatum(key->sk_subtype), PointerGetDatum(&recheck)); *recheck_distances_p |= recheck; - *distance_p = DatumGetFloat8(dist); + distance_p->value = DatumGetFloat8(dist); + distance_p->isnull = false; } key++; @@ -319,8 +324,8 @@ gistindex_keytest(IndexScanDesc scan, * sibling will be processed next. */ static void -gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, - TIDBitmap *tbm, int64 *ntids) +gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, + GISTDistance *myDistances, TIDBitmap *tbm, int64 *ntids) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; GISTSTATE *giststate = so->giststate; @@ -367,7 +372,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, /* Insert it into the queue using same distances as for this page */ memcpy(item->distances, myDistances, - sizeof(double) * scan->numberOfOrderBys); + sizeof(item->distances[0]) * scan->numberOfOrderBys); pairingheap_add(so->queue, &item->phNode); @@ -497,7 +502,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, /* Insert it into the queue using new distance data */ memcpy(item->distances, so->distances, - sizeof(double) * scan->numberOfOrderBys); + sizeof(item->distances[0]) * scan->numberOfOrderBys); pairingheap_add(so->queue, &item->phNode); @@ -571,8 +576,8 @@ getNextNearest(IndexScanDesc scan) if (!scan->xs_orderbynulls[i]) pfree(DatumGetPointer(scan->xs_orderbyvals[i])); #endif - scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]); - scan->xs_orderbynulls[i] = false; + scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i].value); + scan->xs_orderbynulls[i] = item->distances[i].isnull; } else if (so->orderByTypes[i] == FLOAT4OID) { @@ -582,8 +587,8 @@ getNextNearest(IndexScanDesc scan) if (!scan->xs_orderbynulls[i]) pfree(DatumGetPointer(scan->xs_orderbyvals[i])); #endif - scan->xs_orderbyvals[i] = Float4GetDatum((float4) item->distances[i]); - scan->xs_orderbynulls[i] = false; + scan->xs_orderbyvals[i] = Float4GetDatum((float4) item->distances[i].value); + scan->xs_orderbynulls[i] = item->distances[i].isnull; } else { diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index 058544e..95cb554 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -14,6 +14,8 @@ */ #include "postgres.h" +#include + #include "access/gist_private.h" #include "access/gistscan.h" #include "access/relscan.h" @@ -36,8 +38,36 @@ pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node /* Order according to distance comparison */ for (i = 0; i < scan->numberOfOrderBys; i++) { - if (sa->distances[i] != sb->distances[i]) - return (sa->distances[i] < sb->distances[i]) ? 1 : -1; + double da; + double db; + + if (sa->distances[i].isnull) + { + if (sb->distances[i].isnull) + continue; /* NULL = NULL */ + + return -1; /* NULL > non-NULL */ + } + + if (sb->distances[i].isnull) + return 1; /* non-NULL < NULL */ + + da = sa->distances[i].value; + db = sb->distances[i].value; + + if (isnan(da)) + { + if (isnan(db)) + continue; /* NaN = NaN */ + + return -1; /* NaN > non-NaN */ + } + + if (isnan(db)) + return 1; /* non-NaN < NaN */ + + if (da != db) + return (da < db) ? 1 : -1; } /* Heap items go before inner pages, to ensure a depth-first search */ @@ -81,7 +111,7 @@ gistbeginscan(Relation r, int nkeys, int norderbys) so->queueCxt = giststate->scanCxt; /* see gistrescan */ /* workspaces with size dependent on numberOfOrderBys: */ - so->distances = palloc(sizeof(double) * scan->numberOfOrderBys); + so->distances = palloc(sizeof(so->distances[0]) * scan->numberOfOrderBys); so->qual_ok = true; /* in case there are zero keys */ if (scan->numberOfOrderBys > 0) { diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index bfef2df..2ae3162 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -125,6 +125,13 @@ typedef struct GISTSearchHeapItem * LP_DEAD */ } GISTSearchHeapItem; +/* Nullable distance */ +typedef struct GISTDistance +{ + double value; + bool isnull; +} GISTDistance; + /* Unvisited item, either index page or heap tuple */ typedef struct GISTSearchItem { @@ -136,13 +143,13 @@ typedef struct GISTSearchItem /* we must store parentlsn to detect whether a split occurred */ GISTSearchHeapItem heap; /* heap info, if heap tuple */ } data; - double distances[FLEXIBLE_ARRAY_MEMBER]; /* numberOfOrderBys + GISTDistance distances[FLEXIBLE_ARRAY_MEMBER]; /* numberOfOrderBys * entries */ } GISTSearchItem; #define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber) -#define SizeOfGISTSearchItem(n_distances) (offsetof(GISTSearchItem, distances) + sizeof(double) * (n_distances)) +#define SizeOfGISTSearchItem(n_distances) (offsetof(GISTSearchItem, distances) + sizeof(GISTDistance) * (n_distances)) /* * GISTScanOpaqueData: private state for a scan of a GiST index @@ -158,7 +165,7 @@ typedef struct GISTScanOpaqueData bool firstCall; /* true until first gistgettuple call */ /* pre-allocated workspace arrays */ - double *distances; /* output area for gistindex_keytest */ + GISTDistance *distances; /* output area for gistindex_keytest */ /* info about killed items if any (killedItems is NULL if never used) */ OffsetNumber *killedItems; /* offset numbers of killed items */ diff --git a/src/test/regress/expected/gist.out b/src/test/regress/expected/gist.out index 91f9998..6f165e6 100644 --- a/src/test/regress/expected/gist.out +++ b/src/test/regress/expected/gist.out @@ -225,3 +225,67 @@ reset enable_seqscan; reset enable_bitmapscan; reset enable_indexonlyscan; drop table gist_tbl; +-- Test ORDER BY circle <-> point with Inifinities and NaNs +CREATE TABLE gist_circle_tbl (id int, c circle); +INSERT INTO gist_circle_tbl + SELECT (x - 1) * 100 + y, circle(point(x * 10, y * 10), 1 + (x + y) % 10) + FROM generate_series(1, 100) x, + generate_series(1, 100) y; +INSERT INTO gist_circle_tbl + SELECT i, '<(200, 300), 5>' + FROM generate_series(10001, 11000) AS i; +INSERT INTO gist_circle_tbl + VALUES + (11001, NULL), + (11002, NULL), + (11003, '<(0,100), infinity>'), + (11004, '<(-infinity,0),1000>'), + (11005, '<(infinity,-infinity),infinity>'); +CREATE INDEX gist_circle_tbl_idx ON gist_circle_tbl USING gist(c); +-- get ordering results from seq scan +SET enable_seqscan TO on; +SET enable_indexscan TO off; +EXPLAIN (COSTS OFF) +SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id +FROM gist_circle_tbl; + QUERY PLAN +------------------------------------------------ + WindowAgg + -> Sort + Sort Key: ((c <-> '(123,456)'::point)) + -> Seq Scan on gist_circle_tbl +(4 rows) + +CREATE TABLE gist_circle_tbl_ord_seq AS + SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id + FROM gist_circle_tbl; +-- get ordering results from index scan +SET enable_seqscan TO off; +SET enable_indexscan TO on; +EXPLAIN (COSTS OFF) +SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id +FROM gist_circle_tbl; + QUERY PLAN +--------------------------------------------------------------- + WindowAgg + -> Index Scan using gist_circle_tbl_idx on gist_circle_tbl + Order By: (c <-> '(123,456)'::point) +(3 rows) + +CREATE TABLE gist_circle_tbl_ord_idx AS + SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id + FROM gist_circle_tbl; +-- compare results +SELECT * +FROM gist_circle_tbl_ord_seq seq FULL JOIN gist_circle_tbl_ord_idx idx USING (n, id) +WHERE seq.id IS NULL OR idx.id IS NULL; + n | id | dist | dist +---+----+------+------ +(0 rows) + +-- clean up +RESET enable_seqscan; +RESET enable_indexscan; +DROP TABLE gist_circle_tbl; +DROP TABLE gist_circle_tbl_ord_seq; +DROP TABLE gist_circle_tbl_ord_idx; diff --git a/src/test/regress/sql/gist.sql b/src/test/regress/sql/gist.sql index 49126fd..a603e72 100644 --- a/src/test/regress/sql/gist.sql +++ b/src/test/regress/sql/gist.sql @@ -120,3 +120,63 @@ reset enable_bitmapscan; reset enable_indexonlyscan; drop table gist_tbl; + + +-- Test ORDER BY circle <-> point with Inifinities and NaNs +CREATE TABLE gist_circle_tbl (id int, c circle); + +INSERT INTO gist_circle_tbl + SELECT (x - 1) * 100 + y, circle(point(x * 10, y * 10), 1 + (x + y) % 10) + FROM generate_series(1, 100) x, + generate_series(1, 100) y; + +INSERT INTO gist_circle_tbl + SELECT i, '<(200, 300), 5>' + FROM generate_series(10001, 11000) AS i; + +INSERT INTO gist_circle_tbl + VALUES + (11001, NULL), + (11002, NULL), + (11003, '<(0,100), infinity>'), + (11004, '<(-infinity,0),1000>'), + (11005, '<(infinity,-infinity),infinity>'); + +CREATE INDEX gist_circle_tbl_idx ON gist_circle_tbl USING gist(c); + +-- get ordering results from seq scan +SET enable_seqscan TO on; +SET enable_indexscan TO off; + +EXPLAIN (COSTS OFF) +SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id +FROM gist_circle_tbl; + +CREATE TABLE gist_circle_tbl_ord_seq AS + SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id + FROM gist_circle_tbl; + +-- get ordering results from index scan +SET enable_seqscan TO off; +SET enable_indexscan TO on; + +EXPLAIN (COSTS OFF) +SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id +FROM gist_circle_tbl; + +CREATE TABLE gist_circle_tbl_ord_idx AS + SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id + FROM gist_circle_tbl; + +-- compare results +SELECT * +FROM gist_circle_tbl_ord_seq seq FULL JOIN gist_circle_tbl_ord_idx idx USING (n, id) +WHERE seq.id IS NULL OR idx.id IS NULL; + +-- clean up +RESET enable_seqscan; +RESET enable_indexscan; + +DROP TABLE gist_circle_tbl; +DROP TABLE gist_circle_tbl_ord_seq; +DROP TABLE gist_circle_tbl_ord_idx; -- 2.7.4