*** a/configure.in --- b/configure.in *************** AC_MSG_WARN([*** skipping thread test on *** 1913,1918 **** --- 1913,1944 ---- fi fi + # strxfrm stuff + if test "$PORTNAME" != "win32" + then + AC_MSG_CHECKING([strxfrm non-redundancy]) + + _CFLAGS="$CFLAGS" + _LIBS="$LIBS" + CFLAGS="$CFLAGS -DIN_CONFIGURE" + LIBS="$LIBS" + AC_TRY_RUN([#include "$srcdir/src/test/locale/test-strxfrm-redundant.c"], + [AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(no) + AC_DEFINE(PG_BAD_STRXFRM, 1, [Define to 1 if you have strxfrm() with redundant header bytes.]) + AC_MSG_WARN([strxfrm test program failed + This platform has a strxfrm implementation that produces header bytes. Optimization disabled. ])], + [AC_MSG_RESULT(maybe) + AC_MSG_WARN([ + *** Skipping strxfrm test program because of cross-compile build. + *** Run the program src/test/locale/test-strxfrm-redundant.c on the target machine. + ])]) + CFLAGS="$_CFLAGS" + LIBS="$_LIBS" + else + AC_MSG_WARN([*** skipping strxfrm test on Win32]) + fi + # If compiler will take -Wl,--as-needed (or various platform-specific # spellings thereof) then add that to LDFLAGS. This is much easier than # trying to filter LIBS to the minimum for each executable. *** a/src/backend/commands/analyze.c --- b/src/backend/commands/analyze.c *************** compute_scalar_stats(VacAttrStatsP stats *** 2292,2297 **** --- 2292,2302 ---- /* We always use the default collation for statistics */ ssup.ssup_collation = DEFAULT_COLLATION_OID; ssup.ssup_nulls_first = false; + /* + * It isn't feasible to perform poor man's conversion, so opt out of that + * additional optimization entirely + */ + ssup.type = sortKeyOther; PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup); *** a/src/backend/executor/nodeAgg.c --- b/src/backend/executor/nodeAgg.c *************** initialize_aggregates(AggState *aggstate *** 377,383 **** peraggstate->sortOperators, peraggstate->sortCollations, peraggstate->sortNullsFirst, ! work_mem, false); } /* --- 377,383 ---- peraggstate->sortOperators, peraggstate->sortCollations, peraggstate->sortNullsFirst, ! work_mem, -1, false); } /* *** a/src/backend/executor/nodeMergeAppend.c --- b/src/backend/executor/nodeMergeAppend.c *************** ExecInitMergeAppend(MergeAppend *node, E *** 137,142 **** --- 137,148 ---- sortKey->ssup_nulls_first = node->nullsFirst[i]; sortKey->ssup_attno = node->sortColIdx[i]; + /* + * It isn't feasible to perform poor man's conversion, so opt out of + * that additional optimization entirely + */ + sortKey->type = sortKeyOther; + PrepareSortSupportFromOrderingOp(node->sortOperators[i], sortKey); } *** a/src/backend/executor/nodeMergejoin.c --- b/src/backend/executor/nodeMergejoin.c *************** MJExamineQuals(List *mergeclauses, *** 234,239 **** --- 234,253 ---- op_lefttype, op_righttype, BTSORTSUPPORT_PROC); + + /* + * sortsupport routine must know if poor man's optimization is + * applicable in principle. Tell routine to never use optimization, + * since it isn't likely to be useful here. In practice MJCompare() + * probably yields equality more often than not. + * + * XXX: It might be worth setting this to sortKeyTrueLeading, to make + * the comparison func more optimistic about the chances of equality + * being indicated. To do so would be an abuse of the interface, + * though. + */ + clause->ssup.type = sortKeyOther; + if (OidIsValid(sortfunc)) { /* The sort support function should provide a comparator */ *** a/src/backend/executor/nodeSort.c --- b/src/backend/executor/nodeSort.c *************** ExecSort(SortState *node) *** 89,94 **** --- 89,95 ---- plannode->collations, plannode->nullsFirst, work_mem, + plannode->plan.plan_rows, node->randomAccess); if (node->bounded) tuplesort_set_bound(tuplesortstate, node->bound); *** a/src/backend/lib/Makefile --- b/src/backend/lib/Makefile *************** subdir = src/backend/lib *** 12,17 **** top_builddir = ../../.. include $(top_builddir)/src/Makefile.global ! OBJS = ilist.o binaryheap.o stringinfo.o include $(top_srcdir)/src/backend/common.mk --- 12,17 ---- top_builddir = ../../.. include $(top_builddir)/src/Makefile.global ! OBJS = ilist.o binaryheap.o hyperloglog.o stringinfo.o include $(top_srcdir)/src/backend/common.mk *** a/src/backend/lib/hyperloglog.c --- b/src/backend/lib/hyperloglog.c *************** *** 0 **** --- 1,202 ---- + /*------------------------------------------------------------------------- + * + * hyperloglog.c + * A simple HyperLogLog cardinality estimator implementation + * + * Portions Copyright (c) 2014, PostgreSQL Global Development Group + * + * Based on Hideaki Ohno's C++ implementation. This is probably not ideally + * suited to estimating the cardinality of very large sets; in particular, we + * have not attempted to further optimize the implementation as described in + * the Heule, Nunkesser and Hall paper "HyperLogLog in Practice: Algorithmic + * Engineering of a State of The Art Cardinality Estimation Algorithm". + * + * A sparse representation of HyperLogLog state is used, with fixed space + * overhead. + * + * IDENTIFICATION + * src/backend/lib/hyperloglog.c + * + *------------------------------------------------------------------------- + */ + + #include "postgres.h" + + #include + + #include "lib/hyperloglog.h" + + #define POW_2_32 (4294967296.0) + #define NEG_POW_2_32 (-4294967296.0) + + static inline uint8 rho(uint32 x, uint8 b); + + /* + * Initialize HyperLogLog track state + * + * bwidth is bit width (so register size will be 2 to the power of bwidth). + * Must be between 4 and 16 inclusive. + */ + void + initHyperLogLog(hyperLogLogState *cState, uint8 bwidth) + { + double alpha; + + if (bwidth < 4 || bwidth > 16) + elog(ERROR, "bit width must be between 4 and 16 inclusive"); + + cState->registerWidth = bwidth; + cState->nRegisters = 1 << bwidth; + cState->arrSize = sizeof(uint8) * cState->nRegisters + 1; + + /* + * Initialize hashes array to zero, not negative infinity, per discussion + * of the coupon collector problem in the HyperLogLog paper + */ + cState->hashesArr = palloc0(cState->arrSize); + + /* + * "alpha" is a value that for each possible number of registers (m) is + * used to correct a systematic multiplicative bias present in m ^ 2 Z (Z + * is "the indicator function" through which we finally compute E, + * estimated cardinality). + */ + switch (cState->nRegisters) + { + case 16: + alpha = 0.673; + break; + case 32: + alpha = 0.697; + break; + case 64: + alpha = 0.709; + break; + default: + alpha = 0.7213 / (1.0 + 1.079 / cState->nRegisters); + } + + /* + * Precalculate alpha m ^ 2, later used to generate "raw" HyperLogLog + * estimate E + */ + cState->alphaMM = alpha * cState->nRegisters * cState->nRegisters; + } + + /* + * Adds element to the estimator, from caller-supplied hash. + * + * It is critical that the hash value passed be an actual hash value, typically + * generated using hash_any(). The algorithm relies on a specific bit-pattern + * observable in conjunction with stochastic averaging. + */ + void + addHyperLogLog(hyperLogLogState *cState, uint32 hash) + { + uint8 count; + uint32 index; + + /* Use the first "k" (registerWidth) bits as a zero based index */ + index = hash >> (BITS_PER_BYTE * sizeof(uint32) - cState->registerWidth); + + /* Compute the rank of the remaining 32 - "k" (registerWidth) bits */ + count = rho(hash << cState->registerWidth, + BITS_PER_BYTE * sizeof(uint32) - cState->registerWidth); + + cState->hashesArr[index] = Max(count, cState->hashesArr[index]); + } + + /* + * Estimates cardinality, based on elements added so far + */ + double + estimateHyperLogLog(hyperLogLogState *cState) + { + double result; + double sum = 0.0; + int i; + + for (i = 0; i < cState->nRegisters; i++) + { + sum += 1.0 / pow(2.0, cState->hashesArr[i]); + } + + /* result set to "raw" HyperLogLog estimate (E in the HyperLogLog paper) */ + result = cState->alphaMM / sum; + + if (result <= (5.0 / 2.0) * cState->nRegisters) + { + /* Small range correction */ + int zero_count = 0; + + for (i = 0; i < cState->nRegisters; i++) + { + if (cState->hashesArr[i] == 0) + zero_count++; + } + + if (zero_count != 0) + result = cState->nRegisters * log((double) cState->nRegisters / + zero_count); + } + else if (result > (1.0 / 30.0) * POW_2_32) + { + /* Large range correction */ + result = NEG_POW_2_32 * log(1.0 - (result / POW_2_32)); + } + + return result; + } + + /* + * Merges the estimate from one HyperLogLog state to another, returning the + * estimate of their union. + * + * The number of registers in each must match. + */ + void + mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState) + { + int r; + + if (cState->nRegisters != oState->nRegisters) + elog(ERROR, "number of registers mismatch: %zu != %zu", + cState->nRegisters, oState->nRegisters); + + for (r = 0; r < cState->nRegisters; ++r) + { + cState->hashesArr[r] = Max(cState->hashesArr[r], oState->hashesArr[r]); + } + } + + + /* + * Worker for addHyperLogLog(). + * + * Calculates the position of the first set bit in first b bits of x argument + * starting from the first, reading from most significant to least significant + * bits. + * + * Example (when considering fist 10 bits of x): + * + * rho(x = 0b1000000000) returns 1 + * rho(x = 0b0010000000) returns 3 + * rho(x = 0b0000000000) returns b + 1 + * + * "The binary address determined by the first b bits of x" + * + * Return value "j" used to index bit pattern to watch. + */ + static inline uint8 + rho(uint32 x, uint8 b) + { + uint8 j = 1; + + while (j <= b && !(x & 0x80000000)) + { + j++; + x <<= 1; + } + + return j; + } *** a/src/backend/utils/adt/orderedsetaggs.c --- b/src/backend/utils/adt/orderedsetaggs.c *************** ordered_set_startup(FunctionCallInfo fci *** 280,286 **** qstate->sortOperators, qstate->sortCollations, qstate->sortNullsFirsts, ! work_mem, false); else osastate->sortstate = tuplesort_begin_datum(qstate->sortColType, qstate->sortOperator, --- 280,286 ---- qstate->sortOperators, qstate->sortCollations, qstate->sortNullsFirsts, ! work_mem, -1, false); else osastate->sortstate = tuplesort_begin_datum(qstate->sortColType, qstate->sortOperator, *** a/src/backend/utils/adt/varlena.c --- b/src/backend/utils/adt/varlena.c *************** *** 17,25 **** --- 17,27 ---- #include #include + #include "access/hash.h" #include "access/tuptoaster.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" + #include "lib/hyperloglog.h" #include "libpq/md5.h" #include "libpq/pqformat.h" #include "miscadmin.h" *************** *** 29,34 **** --- 31,37 ---- #include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" + #include "utils/sortsupport.h" /* GUC variable */ *************** typedef struct *** 50,61 **** --- 53,89 ---- int skiptable[256]; /* skip distance for given mismatched char */ } TextPositionState; + typedef struct + { + char *buf1; /* 1st string, or poorman original string buf */ + char *buf2; /* 2nd string, or leading key/poor man blob */ + int buflen1; + int buflen2; + hyperLogLogState hlstate; + double length; /* Aggregate string length copied so far */ + #ifdef HAVE_LOCALE_T + pg_locale_t locale; + #endif + } TextSortSupport; + + /* + * This should be large enough that most strings will fit, but small enough + * that we feel comfortable putting it on the stack + */ + #define TEXTBUFLEN 1024 + #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X)) #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X)) #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n)) #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n)) #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x) + static void btpoorman_worker(SortSupport ssup, Oid collid); + static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup); + static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup); + static int bttextcmp_poorman(Datum x, Datum y, SortSupport ssup); + static Datum bttext_convert(Datum original, SortSupport ssup); + static bool bttext_abort(int memtupcount, double rowhint, SortSupport ssup); static int32 text_length(Datum str); static text *text_catenate(text *t1, text *t2); static text *text_substring(Datum str, *************** varstr_cmp(char *arg1, int len1, char *a *** 1356,1365 **** } else { ! #define STACKBUFLEN 1024 ! ! char a1buf[STACKBUFLEN]; ! char a2buf[STACKBUFLEN]; char *a1p, *a2p; --- 1384,1391 ---- } else { ! char a1buf[TEXTBUFLEN]; ! char a2buf[TEXTBUFLEN]; char *a1p, *a2p; *************** varstr_cmp(char *arg1, int len1, char *a *** 1393,1416 **** int a2len; int r; ! if (len1 >= STACKBUFLEN / 2) { a1len = len1 * 2 + 2; a1p = palloc(a1len); } else { ! a1len = STACKBUFLEN; a1p = a1buf; } ! if (len2 >= STACKBUFLEN / 2) { a2len = len2 * 2 + 2; a2p = palloc(a2len); } else { ! a2len = STACKBUFLEN; a2p = a2buf; } --- 1419,1442 ---- int a2len; int r; ! if (len1 >= TEXTBUFLEN / 2) { a1len = len1 * 2 + 2; a1p = palloc(a1len); } else { ! a1len = TEXTBUFLEN; a1p = a1buf; } ! if (len2 >= TEXTBUFLEN / 2) { a2len = len2 * 2 + 2; a2p = palloc(a2len); } else { ! a2len = TEXTBUFLEN; a2p = a2buf; } *************** varstr_cmp(char *arg1, int len1, char *a *** 1475,1485 **** } #endif /* WIN32 */ ! if (len1 >= STACKBUFLEN) a1p = (char *) palloc(len1 + 1); else a1p = a1buf; ! if (len2 >= STACKBUFLEN) a2p = (char *) palloc(len2 + 1); else a2p = a2buf; --- 1501,1511 ---- } #endif /* WIN32 */ ! if (len1 >= TEXTBUFLEN) a1p = (char *) palloc(len1 + 1); else a1p = a1buf; ! if (len2 >= TEXTBUFLEN) a2p = (char *) palloc(len2 + 1); else a2p = a2buf; *************** bttextcmp(PG_FUNCTION_ARGS) *** 1683,1688 **** --- 1709,2204 ---- PG_RETURN_INT32(result); } + Datum + bttextsortsupport(PG_FUNCTION_ARGS) + { + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + Oid collid = ssup->ssup_collation; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + btpoorman_worker(ssup, collid); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); + } + + /* + * Worker for sort support routine + */ + static void + btpoorman_worker(SortSupport ssup, Oid collid) + { + TextSortSupport *tss; + + /* + * WIN32 requires complex hacks when the database encoding is UTF-8 (except + * when using the "C" collation). For now, we don't optimize that case. + */ + #if defined(WIN32) + /* + * FIXME: There is an clear obligation to provide a comparator. This + * early return is therefore unacceptable. It seems pretty ugly to + * separately prepare a shim routine here, especially since there is no + * convenient way to do a reverse lookup to get an ordering operator from + * sortsupport state. Something must be done, though. + */ + if (GetDatabaseEncoding() == PG_UTF8) + return; + #endif + + /* + * We are conservative about applying the poor man's normalized key + * optimization in cases where it might be less effective. In order to + * apply that optimization, we require: + * + * * That the platform's strxfrm() meet a certain standard for + * representing as much information as possible in leading bytes. + * + * * That there are a full 8 bytes of storage per Datum on the platform, + * since we pack bytes into that representation. Having only 4 bytes + * could make worse case performance drastically more likely. + * + * Still, there is no reason to not perform fmgr elision on these + * platforms. + */ + #if defined(PG_BAD_STRXFRM) || SIZEOF_DATUM != 8 + ssup->type = sortKeyOther; + #endif + + /* + * We may need a collation-sensitive comparison. To make things faster, + * we'll figure out the collation based on the locale id and cache the + * result. Also, since strxfrm()/strcoll() require NULL-terminated inputs, + * prepare one or two palloc'd buffers to use as temporary workspace. In + * the ad-hoc comparison case we only use palloc'd buffers when we need + * more space than we're comfortable allocating on the stack, but here we + * can keep the buffers around for the whole sort, so it makes sense to + * allocate them once and use them unconditionally (although we won't need + * them when sorting proper begins and strxfrm() conversion has already + * occurred, when sorting a poor man's key). + */ + tss = palloc(sizeof(TextSortSupport)); + #ifdef HAVE_LOCALE_T + tss->locale = 0; + #endif + + if (collid != DEFAULT_COLLATION_OID) + { + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string comparison"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + #ifdef HAVE_LOCALE_T + tss->locale = pg_newlocale_from_collation(collid); + #endif + } + + tss->buf1 = palloc(TEXTBUFLEN); + tss->buflen1 = TEXTBUFLEN; + tss->buf2 = palloc(TEXTBUFLEN); + tss->buflen2 = TEXTBUFLEN; + tss->length = 0; + + ssup->ssup_extra = tss; + if (ssup->type != sortKeyPoorman) + { + /* + * If LC_COLLATE = C, we can make things quite a bit faster by using + * memcmp() rather than strcoll(). To minimize the per-comparison + * overhead, we make this decision just once for the whole sort. + */ + if (lc_collate_is_c(collid)) + ssup->comparator = bttextfastcmp_c; + else + ssup->comparator = bttextfastcmp_locale; + + ssup->converter = NULL; + ssup->abort_conversion = NULL; + ssup->proper = NULL; + return; + } + + initHyperLogLog(&tss->hlstate, 10); + + ssup->comparator = bttextcmp_poorman; + ssup->converter = bttext_convert; + ssup->abort_conversion = bttext_abort; + + ssup->proper = palloc0(sizeof(SortSupportData)); + ssup->proper->ssup_cxt = ssup->ssup_cxt; + ssup->proper->ssup_collation = ssup->ssup_collation; + ssup->proper->ssup_reverse = ssup->ssup_reverse; + ssup->proper->ssup_nulls_first = ssup->ssup_nulls_first; + ssup->proper->ssup_attno = ssup->ssup_attno; + + /* + * Initialize the "proper" sortsupport state with a reliable + * strcoll()-based comparison func for tie-breaking. + */ + ssup->proper->type = sortKeyTrueLeading; + btpoorman_worker(ssup->proper, collid); + } + + /* + * sortsupport comparison func (for C locale case) + */ + static int + bttextfastcmp_c(Datum x, Datum y, SortSupport ssup) + { + text *arg1 = DatumGetTextPP(x); + text *arg2 = DatumGetTextPP(y); + char *a1p, + *a2p; + int len1, + len2, + result; + + a1p = VARDATA_ANY(arg1); + a2p = VARDATA_ANY(arg2); + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + result = memcmp(a1p, a2p, Min(len1, len2)); + if ((result == 0) && (len1 != len2)) + result = (len1 < len2) ? -1 : 1; + + /* We can't afford to leak memory here. */ + if (PointerGetDatum(arg1) != x) + pfree(arg1); + if (PointerGetDatum(arg2) != y) + pfree(arg2); + + return result; + } + + /* + * sortsupport comparison func (for locale case) + */ + static int + bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup) + { + text *arg1 = DatumGetTextPP(x); + text *arg2 = DatumGetTextPP(y); + TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra; + + /* working state */ + char *a1p, + *a2p; + int len1, + len2, + result; + + a1p = VARDATA_ANY(arg1); + a2p = VARDATA_ANY(arg2); + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + if (ssup->type == sortKeyTrueLeading && len1 == len2) + { + /* + * "True" leading key. This indicates that we're being called as a + * fully reliable tie-breaker for the poor man's normalized key + * comparison (there may be other attributes that must be subsequently + * compared later). + * + * In general there is a pretty good chance control reached here + * because the key is actually fully equal. It seems worthwhile to try + * and give an answer using only a cheap memcmp() comparison on the + * assumption that this will indicate equality frequently enough for it + * to be worth it on balance. This is a reasonable assumption, since + * sorting is almost certainly bottlenecked on memory bandwidth. + */ + if (memcmp(a1p, a2p, len1) == 0) + return 0; + } + + if (len1 >= tss->buflen1) + { + pfree(tss->buf1); + tss->buflen1 *= 2; + tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen1); + } + if (len2 >= tss->buflen2) + { + pfree(tss->buf2); + tss->buflen2 *= 2; + tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2); + } + + memcpy(tss->buf1, a1p, len1); + tss->buf1[len1] = '\0'; + memcpy(tss->buf2, a2p, len2); + tss->buf2[len2] = '\0'; + + #ifdef HAVE_LOCALE_T + if (tss->locale) + result = strcoll_l(tss->buf1, tss->buf2, tss->locale); + else + #endif + result = strcoll(tss->buf1, tss->buf2); + + /* + * In some locales strcoll() can claim that nonidentical strings are equal. + * Believing that would be bad news for a number of reasons, so we follow + * Perl's lead and sort "equal" strings according to strcmp(). + */ + if (result == 0) + result = strcmp(tss->buf1, tss->buf2); + + /* We can't afford to leak memory here. */ + if (PointerGetDatum(arg1) != x) + pfree(arg1); + if (PointerGetDatum(arg2) != y) + pfree(arg2); + + return result; + } + + /* + * Poor man's normalized key comparison func + */ + static int + bttextcmp_poorman(Datum x, Datum y, SortSupport ssup) + { + char *a = (char *) &x; + char *b = (char *) &y; + int result; + + result = memcmp(a, b, sizeof(Datum)); + + /* + * When result = 0, the core system will call bttextfastcmp_locale() or + * bttextfastcmp_c(). Even a strcmp() on two non-truncated strxfrm() blobs + * cannot indicate *equality* reliably, for the same reason that there is a + * strcoll() strcmp() tie-breaker elsewhere (there'd still need to be a + * strcmp() tie-breaker on the *original* string). + * + * XXX: In principle it ought to be possible to tell the core system that + * we really do know that the two strings are fully equal iff the C + * collation is used, presuming that the core system could also somehow + * differentiate between this case and the case where we have a truncated C + * collated string that we have no firm conclusion on (perhaps a forth, + * magical return value could be used while restricting all other return + * values to (-1, 0, 1)). It doesn't seem worth the trouble of surfacing + * that distinction generally, though. + */ + return result; + } + + /* + * Conversion routine for sortsupport. Converts text to poor man's normalized + * keys. Our encoding strategy is simple -- pack the first 8 bytes of a + * strxfrm() blob into a Datum. + */ + static Datum + bttext_convert(Datum original, SortSupport ssup) + { + TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra; + text *full = DatumGetTextPP(original); + + /* working state */ + Datum res; + char *pres; + int len; + Size bsize; + uint32 lohalf, + hihalf, + hash; + + /* + * Convert text into a "poor man's normalized key". This is a + * pass-by-value Datum that is treated as a char array by the specialized + * comparator bttextcmp_poorman(). + */ + pres = (char *) &res; + /* memset() so non-copied bytes are always NULL */ + memset(pres, 0, sizeof(Datum)); + len = VARSIZE_ANY_EXHDR(full); + + /* By convention, we use buffer 1 to store and NULL terminate text */ + if (len >= tss->buflen1) + { + pfree(tss->buf1); + tss->buflen1 *= 2; + tss->buf1 = palloc(tss->buflen1); + } + + /* Just like strcoll(), strxfrm() expects a NULL-terminated string */ + memcpy(tss->buf1, VARDATA_ANY(full), len); + tss->buf1[len] = '\0'; + + retry: + + /* + * Note the lack of any special handling of the C locale here. strxfrm() + * is used indifferently. + */ + #ifdef HAVE_LOCALE_T + if (tss->locale) + bsize = strxfrm_l(tss->buf2, tss->buf1, tss->buflen2, tss->locale); + else + #endif + bsize = strxfrm(tss->buf2, tss->buf1, tss->buflen2); + + if (bsize >= tss->buflen2) + { + /* + * The C standard states that the contents of the buffer is now + * unspecified. Grow buffer, and retry. + */ + pfree(tss->buf2); + tss->buflen2 = Max(bsize + 1, tss->buflen2 * 2); + tss->buf2 = palloc(tss->buflen2); + goto retry; + } + + memcpy(pres, tss->buf2, Min(sizeof(Datum), bsize)); + + /* + * Maintain approximate cardinality of poor man's keys using HyperLogLog. + * Form 32-bit hash from packed 64-bit Datum representation. Used as cheap + * insurance against the worst case, where we do many string + * transformations for savings in full strcoll()-based comparisons. + */ + lohalf = (uint32) res; + hihalf = (uint32) (res >> 32); + hash = hash_uint32(lohalf ^ hihalf); + + addHyperLogLog(&tss->hlstate, hash); + + /* Maintain total length of all strings, again for worst case prevention */ + tss->length += len; + + /* + * Iff last byte isn't NULL, as in the common case where the entire Datum + * is filled with blob bytes, that is interpreted as indicating that every + * Datum byte should be compared. This is safe because the strxfrm() blob + * is itself NULL-terminated, leaving no danger of misinterpreting any NULL + * bytes not intended to be interpreted as logically representing + * termination. + */ + return res; + } + + /* + * Callback for assessing projected effectiveness of poor man's normalized key + * optimization, using heuristic rules. Returns value indicating if the poor + * man's optimization is estimated to be worth it. + */ + static bool + bttext_abort(int memtupcount, double rowhint, SortSupport ssup) + { + TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra; + double est_distinct, + normalized_cardinality, + avg_text_len; + + Assert(ssup->type == sortKeyPoorman); + + avg_text_len = tss->length / (double) memtupcount; + + if (rowhint > 5 && avg_text_len < 64) + { + double normalized_rows_to_process; + + normalized_rows_to_process = (rowhint - memtupcount) / rowhint; + + if (normalized_rows_to_process > 0.90) + { + /* + * Be patient -- don't consider aborting until we've processed an + * estimated 10% of all rows to be sorted. + */ + #ifdef DEBUG_POORMAN_KEYS + elog(DEBUG1, "normalization patiently waited after %d tuples of %f", + memtupcount, rowhint); + #endif + return false; + } + + /* + * Because the core code calls here at geometrically spaced intervals, + * there is little point in ensuring that we don't abort the + * normalization process too late, when the costs are mostly sunk and + * it's probably worth proceeding with a marginal case. + */ + } + + est_distinct = estimateHyperLogLog(&tss->hlstate); + normalized_cardinality = est_distinct / (double) memtupcount; + + /* + * We're concerned about weighing the costs of the poor man's optimization + * against its probable benefit. + * + * The dominant cost is strxfrm() transformation for large strings, and not + * extra bttextcmp_poorman() calls. However, provided poor man's keys have + * a high cardinality, it doesn't matter how expensive it is, because each + * early transformation is very likely beneficial. For smaller strings the + * cost of inefficient use of CPU cache will dominate, and so a much less + * stringent standard for cardinality is applied. + */ + #ifdef DEBUG_POORMAN_KEYS + elog(DEBUG1, "est_distinct after %d: %f (normalized_cardinality: %f, avg_len: %f)", + memtupcount, est_distinct, normalized_cardinality, avg_text_len); + #endif + + if (avg_text_len < 7.5) + { + /* + * Very unlikely to lose with many short strings. The key cardinality + * doesn't much matter, because our tie-breaker saves a second + * transformation (i.e. strcoll() call) by performing a memcmp(). + */ + ; + } + else if (avg_text_len < 12) + { + if (normalized_cardinality < 0.00001) + goto abort; + } + else if (avg_text_len < 16) + { + if (normalized_cardinality < 0.0001) + goto abort; + } + else if (avg_text_len < 32) + { + if (normalized_cardinality < 0.001) + goto abort; + } + else if (avg_text_len < 64) + { + if (normalized_cardinality < 0.3) + goto abort; + } + else + { + if (normalized_cardinality < 0.65) + goto abort; + } + + return false; + + abort: + #ifdef DEBUG_POORMAN_KEYS + elog(DEBUG1, "aborted poorman normalization due to worst-case at %d", + memtupcount); + #endif + return true; + } Datum text_larger(PG_FUNCTION_ARGS) *** a/src/backend/utils/sort/sortsupport.c --- b/src/backend/utils/sort/sortsupport.c *************** PrepareSortSupportComparisonShim(Oid cmp *** 82,87 **** --- 82,90 ---- ssup->ssup_extra = extra; ssup->comparator = comparison_shim; + ssup->proper = NULL; + ssup->converter = NULL; + ssup->abort_conversion = NULL; } /* *************** PrepareSortSupportFromOrderingOp(Oid ord *** 104,109 **** --- 107,115 ---- elog(ERROR, "operator %u is not a valid ordering operator", orderingOp); + /* For now, make sure converter is NULL - opclass routine may set it */ + ssup->converter = NULL; + if (issupport) { /* The sort support function should provide a comparator */ *** a/src/backend/utils/sort/tuplesort.c --- b/src/backend/utils/sort/tuplesort.c *************** bool optimize_bounded_sort = true; *** 150,156 **** * When sorting single Datums, the data value is represented directly by * datum1/isnull1. If the datatype is pass-by-reference and isnull1 is false, * then datum1 points to a separately palloc'd data value that is also pointed ! * to by the "tuple" pointer; otherwise "tuple" is NULL. * * While building initial runs, tupindex holds the tuple's run number. During * merge passes, we re-use it to hold the input tape number that each tuple in --- 150,159 ---- * When sorting single Datums, the data value is represented directly by * datum1/isnull1. If the datatype is pass-by-reference and isnull1 is false, * then datum1 points to a separately palloc'd data value that is also pointed ! * to by the "tuple" pointer; otherwise "tuple" is NULL. Note that there are ! * some exceptions, as when the sort support infrastructure provides a "poor ! * man's normalized key" representation. When that occurs, extra precautions ! * are taken when a comparison involving a pair of datum1s returns 0. * * While building initial runs, tupindex holds the tuple's run number. During * merge passes, we re-use it to hold the input tape number that each tuple in *************** struct Tuplesortstate *** 353,358 **** --- 356,373 ---- SortSupport onlyKey; /* + * Additional state for managing "poor man's normalized key" sortsupport + * routines. Feedback as to how effective the optimization is likely to be + * is received from all opclasses that support this additional capability. + * This gives us some reassurance that in the worst case (when all + * normalized keys are the same), the process can be aborted before wasting + * too many cycles on the normalization process. + */ + int nextpoorcheck; /* Tuple # at which to check applicability */ + bool aborted; /* Normalization process aborted */ + double rowsHint; /* Hint of total rows to be sorted */ + + /* * These variables are specific to the CLUSTER case; they are set by * tuplesort_begin_cluster. Note CLUSTER also uses tupDesc and * indexScanKey. *************** tuplesort_begin_heap(TupleDesc tupDesc, *** 600,606 **** int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, ! int workMem, bool randomAccess) { Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); MemoryContext oldcontext; --- 615,622 ---- int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, ! int workMem, double projectedTups, ! bool randomAccess) { Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); MemoryContext oldcontext; *************** tuplesort_begin_heap(TupleDesc tupDesc, *** 632,637 **** --- 648,655 ---- state->reversedirection = reversedirection_heap; state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + state->nextpoorcheck = 5; /* Next check of poor man's applicability */ + state->rowsHint = projectedTups; /* Hint to poor man's applicability test */ /* Prepare SortSupport data for each column */ state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); *************** tuplesort_begin_heap(TupleDesc tupDesc, *** 648,657 **** sortKey->ssup_nulls_first = nullsFirstFlags[i]; sortKey->ssup_attno = attNums[i]; PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); } ! if (nkeys == 1) state->onlyKey = state->sortKeys; MemoryContextSwitchTo(oldcontext); --- 666,691 ---- sortKey->ssup_nulls_first = nullsFirstFlags[i]; sortKey->ssup_attno = attNums[i]; + /* + * Must convey to sortsupport routine if poor man's optimization is + * applicable in principle + */ + if (i == 0) + sortKey->type = sortKeyPoorman; + else + sortKey->type = sortKeyOther; + PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); } ! /* ! * The "onlyKey" optimization cannot be used when a tie-breaker for an ! * unreliable poor man's normalized key comparison is required. Typically, ! * the optimization is only of significant value to pass-by-value types ! * anyway, whereas poor man's normalized keys are typically used by ! * pass-by-reference types. ! */ ! if (nkeys == 1 && !state->sortKeys->converter) state->onlyKey = state->sortKeys; MemoryContextSwitchTo(oldcontext); *************** tuplesort_begin_datum(Oid datumType, Oid *** 838,843 **** --- 872,890 ---- /* Prepare SortSupport data */ state->onlyKey = (SortSupport) palloc0(sizeof(SortSupportData)); + /* + * "Other" key, because conversion to poor man's representation is + * infeasible in the Datum case. This is not a "leading key", because + * those are only set by sortsupport routines. If we set this to + * sortKeyTrueLeading, we'd be making a misrepresentation to the + * sortsupport routine (that there was a prior, unreliable comparison that + * now needs a tie-breaker). + * + * XXX: It may be worth having our callers arrange to do a poor man's + * normalization pass themselves, and represent to us that they'll do so + * here, so that the datum case can avail of the optimization too. + */ + state->onlyKey->type = sortKeyOther; state->onlyKey->ssup_cxt = CurrentMemoryContext; state->onlyKey->ssup_collation = sortCollation; state->onlyKey->ssup_nulls_first = nullsFirstFlag; *************** comparetup_heap(const SortTuple *a, cons *** 2858,2869 **** int nkey; int32 compare; ! /* Compare the leading sort key */ ! compare = ApplySortComparator(a->datum1, a->isnull1, ! b->datum1, b->isnull1, ! sortKey); ! if (compare != 0) ! return compare; /* Compare additional sort keys */ ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; --- 2905,2919 ---- int nkey; int32 compare; ! if (!state->aborted) ! { ! /* Compare the leading sort key */ ! compare = ApplySortComparator(a->datum1, a->isnull1, ! b->datum1, b->isnull1, ! sortKey); ! if (compare != 0) ! return compare; ! } /* Compare additional sort keys */ ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; *************** comparetup_heap(const SortTuple *a, cons *** 2871,2876 **** --- 2921,2953 ---- rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); tupDesc = state->tupDesc; + + /* + * If a leading poor man's comparison returned 0 or normalization strategy + * was abandoned, call "true leading" key's comparator + */ + if (state->sortKeys->converter) + { + AttrNumber attno = sortKey->ssup_attno; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + Assert(attno == sortKey->proper->ssup_attno); + Assert(sortKey->type == sortKeyPoorman); + Assert(sortKey->proper->type == sortKeyTrueLeading); + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey->proper); + if (compare != 0) + return compare; + } + sortKey++; for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++) { *************** copytup_heap(Tuplesortstate *state, Sort *** 2911,2920 **** /* set up first-column key value */ htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); ! stup->datum1 = heap_getattr(&htup, ! state->sortKeys[0].ssup_attno, ! state->tupDesc, ! &stup->isnull1); } static void --- 2988,3036 ---- /* set up first-column key value */ htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); ! ! /* Once aborted, we give up on storing anything in datum1 entirely */ ! if (state->aborted) ! return; ! ! if (!state->sortKeys->converter) ! { ! /* Store ordinary Datum representation */ ! stup->datum1 = heap_getattr(&htup, ! state->sortKeys[0].ssup_attno, ! state->tupDesc, ! &stup->isnull1); ! } ! else ! { ! Datum original; ! ! /* ! * Store "poor man's normalized key", which cannot indicate equality in ! * a trustworthy manner, and may require a tie-breaker ! */ ! original = heap_getattr(&htup, state->sortKeys[0].ssup_attno, ! state->tupDesc, &stup->isnull1); ! ! if (stup->isnull1) ! stup->datum1 = original; ! else ! stup->datum1 = state->sortKeys->converter(original, ! state->sortKeys); ! ! /* Check effectiveness of optimization */ ! if (state->memtupcount >= state->nextpoorcheck) ! { ! state->nextpoorcheck *= 2; ! if (state->sortKeys->abort_conversion(state->memtupcount, ! state->rowsHint, ! state->sortKeys)) ! { ! /* Additional optimization did not work out -- give up */ ! state->aborted = true; ! } ! } ! } } static void *************** reversedirection_heap(Tuplesortstate *st *** 2980,2985 **** --- 3096,3112 ---- sortKey->ssup_reverse = !sortKey->ssup_reverse; sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first; } + + /* If poor man's optimization is used, update "key proper" */ + if (state->sortKeys->proper) + { + sortKey = state->sortKeys->proper; + + Assert(sortKey->type == sortKeyTrueLeading); + sortKey->ssup_reverse = !sortKey->ssup_reverse; + sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first; + } + } *** a/src/include/catalog/pg_amproc.h --- b/src/include/catalog/pg_amproc.h *************** DATA(insert ( 1989 26 26 1 356 )); *** 122,127 **** --- 122,128 ---- DATA(insert ( 1989 26 26 2 3134 )); DATA(insert ( 1991 30 30 1 404 )); DATA(insert ( 1994 25 25 1 360 )); + DATA(insert ( 1994 25 25 2 3251 )); DATA(insert ( 1996 1083 1083 1 1107 )); DATA(insert ( 2000 1266 1266 1 1358 )); DATA(insert ( 2002 1562 1562 1 1672 )); *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** DATA(insert OID = 3135 ( btnamesortsuppo *** 614,619 **** --- 614,621 ---- DESCR("sort support"); DATA(insert OID = 360 ( bttextcmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "25 25" _null_ _null_ _null_ _null_ bttextcmp _null_ _null_ _null_ )); DESCR("less-equal-greater"); + DATA(insert OID = 3251 ( bttextsortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2278 "2281" _null_ _null_ _null_ _null_ bttextsortsupport _null_ _null_ _null_ )); + DESCR("sort support"); DATA(insert OID = 377 ( cash_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "790 790" _null_ _null_ _null_ _null_ cash_cmp _null_ _null_ _null_ )); DESCR("less-equal-greater"); DATA(insert OID = 380 ( btreltimecmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "703 703" _null_ _null_ _null_ _null_ btreltimecmp _null_ _null_ _null_ )); *** a/src/include/lib/hyperloglog.h --- b/src/include/lib/hyperloglog.h *************** *** 0 **** --- 1,42 ---- + /* + * hyperloglog.h + * + * A simple HyperLogLog cardinality estimator implementation + * + * Portions Copyright (c) 2014, PostgreSQL Global Development Group + * + * src/include/lib/hyperloglog.h + */ + + #ifndef HYPERLOGLOG_H + #define HYPERLOGLOG_H + + /* + * HyperLogLog is an approximate technique for computing the number of distinct + * entries in a set. Importantly, it does this by using a fixed amount of + * memory. See the 2007 paper "HyperLogLog: the analysis of a near-optimal + * cardinality estimation algorithm" for more. + * + * hyperLogLogState + * + * registerWidth register width, in bits ("k") + * nRegisters number of registers + * alphaMM alpha * m ^ 2 (see initHyperLogLog()) + * hashesArr array of hashes + * arrSize size of hashesArr + */ + typedef struct hyperLogLogState + { + uint8 registerWidth; + Size nRegisters; + double alphaMM; + uint8 *hashesArr; + Size arrSize; + } hyperLogLogState; + + extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth); + extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash); + extern double estimateHyperLogLog(hyperLogLogState *cState); + extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState); + + #endif /* HYPERLOGLOG_H */ *** a/src/include/utils/builtins.h --- b/src/include/utils/builtins.h *************** extern Datum bttintervalcmp(PG_FUNCTION_ *** 316,321 **** --- 316,322 ---- extern Datum btcharcmp(PG_FUNCTION_ARGS); extern Datum btnamecmp(PG_FUNCTION_ARGS); extern Datum bttextcmp(PG_FUNCTION_ARGS); + extern Datum bttextsortsupport(PG_FUNCTION_ARGS); /* * Per-opclass sort support functions for new btrees. Like the *** a/src/include/utils/sortsupport.h --- b/src/include/utils/sortsupport.h *************** *** 49,54 **** --- 49,61 ---- #include "access/attnum.h" + typedef enum + { + sortKeyPoorman, /* Leading (poor-man-applicable) key? */ + sortKeyTrueLeading, /* "True" (non-poorman's) leading key? */ + sortKeyOther /* Second or subsequent key */ + } SortKeyType; + typedef struct SortSupportData *SortSupport; typedef struct SortSupportData *************** typedef struct SortSupportData *** 92,103 **** * than, equal to, or greater than y. Note that x and y are guaranteed * not null, and there is no way to return null either. Do not return * INT_MIN, as callers are allowed to negate the result before using it. */ int (*comparator) (Datum x, Datum y, SortSupport ssup); /* ! * Additional sort-acceleration functions might be added here later. */ } SortSupportData; --- 99,188 ---- * than, equal to, or greater than y. Note that x and y are guaranteed * not null, and there is no way to return null either. Do not return * INT_MIN, as callers are allowed to negate the result before using it. + * + * This comparator may be "semi-trustworthy" for opclasses with additional + * special support for dealing with a poor man's normalized key + * representation. */ int (*comparator) (Datum x, Datum y, SortSupport ssup); /* ! * "Poor man's normalized key" infrastructure follows. All callbacks must ! * be set by sortsupport opclasses that make use of this optional ! * additional infrastructure. ! * ! * This allows opclass authors to supply a conversion routine, used to ! * create an alternative representation of the underlying type (a "poor ! * man's normalized key"). Typically, this representation is an ad-hoc, ! * pass-by-value Datum format that only the opclass has knowledge of. An ! * alternative comparator, used only with this alternative representation ! * must also be provided. This representation is a simple approximation of ! * the original Datum. It must be possible to compare datums of this ! * representation with each other using the supplied alternative ! * comparator, and have any non-zero return value be a reliable proxy for ! * what a proper comparison would indicate. Returning zero from the ! * alternative comparator does not indicate equality, as with a ! * conventional support routine 1, though -- it indicates that it wasn't ! * possible to determine how the two poor man's values compared. A proper ! * comparison is therefore required. In many cases this results in most or ! * all comparisons only using the cheap alternative comparison func, which ! * is typically implemented as code that compiles to just a few CPU ! * instructions. The technique is particularly useful for in-memory ! * quicksorts, which can much more effectively work with CPU caches when ! * sorting pass-by-value types. One goal is to extend all these advantages ! * to pass-by-reference types. ! * ! * Opclass authors must consider the final cardinality of normalized keys ! * when devising an encoding scheme. It's possible for one strategy to ! * work better than another with a certain usage pattern, while the inverse ! * may be the case for some other usage pattern. */ + + /* + * Sort key "type" mostly just relates to whether or not a poor man's + * optimization is applicable in principle (i.e. the sortsupport routine + * needs to know if its dealing with a leading key). Even with a leading + * key, internal sortsupport clients like tuplesort may represent it as + * sortKeyOther because it isn't feasible to inject our conversion routine. + * However, the sortKeyTrueLeading type means that it's a "proper" + * sortsupport state, originally generated by the sortsupport routine + * itself - the core system will never set a "true leading" key type. + * There is very little distinction between a "true leading" and "other" + * key type, though - the distinction only exists to allow sortsupport + * routines to squeeze a bit more performance from the knowledge that a + * fully reliable tie-breaker comparison is required because a prior + * alternative comparison didn't work out (as opposed to being called + * without there ever being such an alternative comparison). + */ + SortKeyType type; /* Position of key */ + + /* + * Converter to poor man's format, from original representation. Core code + * uses this callback to convert to a pass-by-value untrustworthy + * Datum/poor man's normalized key. Note that original is guaranteed not + * null. + */ + Datum (*converter) (Datum original, SortSupport ssup); + + /* + * This callback allows clients to verify that the current strategy is + * working out. If there is a lot of duplicate poor man's keys in + * practice, it's useful to be able to abandon the strategy before paying + * too high a cost in conversion. + */ + bool (*abort_conversion) (int memtupcount, double rowhint, + SortSupport ssup); + + /* + * Alternative "true leading" SortSupport state for leading (poor man's) + * key, used only when alternative comparator returned 0, and the core + * system must use this separate state to perform a fully trustworthy + * comparison. This relates to the same attribute as our ssup_attno, but + * code code like tuplesort is required to call it directly (i.e. it is + * initialized by a poor man's SortSupport routine, and not any internal + * code). + */ + struct SortSupportData *proper; } SortSupportData; *** a/src/include/utils/tuplesort.h --- b/src/include/utils/tuplesort.h *************** extern Tuplesortstate *tuplesort_begin_h *** 62,68 **** int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, ! int workMem, bool randomAccess); extern Tuplesortstate *tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, bool randomAccess); --- 62,68 ---- int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, ! int workMem, double projectedTups, bool randomAccess); extern Tuplesortstate *tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, bool randomAccess); *** a/src/test/locale/test-strxfrm-redundant.c --- b/src/test/locale/test-strxfrm-redundant.c *************** *** 0 **** --- 1,81 ---- + /*------------------------------------------------------------------------- + * + * test-strxfrm-redundant.c + * libc strxfrm redundancy test program + * + * Copyright (c) 2014, PostgreSQL Global Development Group + * + * src/test/locale/test-strxfrm-redundant.c + * + * This program tests to see if the system's C standard library strxfrm() + * function has notable redundancy, or "header bytes". Certain + * implementations are known to have this problem, including the Mac OSX + * system libc. This is problematic because any header bytes the + * implementation includes are wasted for the purposes of "poor man's + * normalized key" optimization. + * + * The standard that we apply is that if there are any header bytes + * whatsoever, then the optimization cannot be used. Also, any trailing bytes + * should not contain information essential to the original string. With + * strings of 8 bytes or less comprised only of ASCII code points, we expect + * the full benefit of 8 bytes of packed Datum storage (varlena.c checks that + * we have 8 byte datums too). + * + *------------------------------------------------------------------------- + */ + #include + #include + + #define MAX_BLOB_SIZE 2048 + + int main() + { + char res1[MAX_BLOB_SIZE]; + char res2[MAX_BLOB_SIZE]; + int i; + size_t s, j; + + /* Use default locale */ + setlocale(LC_ALL, ""); + + s = strxfrm(res1, "abcdefgh", MAX_BLOB_SIZE); + j = strxfrm(res2, "ijklmnop", MAX_BLOB_SIZE); + + if (s != j) + return 1; + + /* + * Ensure that first 8 bytes don't match (i.e. that there are no "header + * bytes") + */ + for (i = 0; i < 8; i++) + { + if (res1[i] == res2[i]) + return 1; + } + + /* + * Ensure that the remaining bytes are identical, and that we therefore are + * guaranteed to have a 1:1 correspondence between blob bytes and original + * string bytes, at least for the simple case where only ASCII code points + * are transformed. + * + * On Glibc 2.19, with the "en_US.UTF8" collation, the strings under + * consideration look like this once transformed (shown with a splice at 8 + * bytes to aid visualization): + * + * abcdefgh: \x0c0d0e0f10111213 010909090909090909010909090909090909 + * ijklmnop: \x1415161718191a1b 010909090909090909010909090909090909 + * + * Before here must differ: ^ + * + * After here should be equal: ^ + */ + for (; i < s; i++) + { + if (res1[i] != res2[i]) + return 1; + } + + return 0; + }