diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c new file mode 100644 index 861048f..903681e *** a/src/backend/commands/analyze.c --- b/src/backend/commands/analyze.c *************** std_typanalyze(VacAttrStats *stats) *** 1723,1732 **** <opr, &eqopr, NULL, NULL); - /* If column has no "=" operator, we can't do much of anything */ - if (!OidIsValid(eqopr)) - return false; - /* Save the operator info for compute_stats routines */ mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData)); mystats->eqopr = eqopr; --- 1723,1728 ---- *************** std_typanalyze(VacAttrStats *stats) *** 1737,1743 **** /* * Determine which standard statistics algorithm to use */ ! if (OidIsValid(ltopr)) { /* Seems to be a scalar datatype */ stats->compute_stats = compute_scalar_stats; --- 1733,1739 ---- /* * Determine which standard statistics algorithm to use */ ! if (OidIsValid(eqopr) && OidIsValid(ltopr)) { /* Seems to be a scalar datatype */ stats->compute_stats = compute_scalar_stats; *************** std_typanalyze(VacAttrStats *stats) *** 1776,1786 **** /* * compute_minimal_stats() -- compute minimal column statistics * ! * We use this when we can find only an "=" operator for the datatype. * * We determine the fraction of non-null rows, the average width, the * most common values, and the (estimated) number of distinct values. * * The most common values are determined by brute force: we keep a list * of previously seen values, ordered by number of times seen, as we scan * the samples. A newly seen value is inserted just after the last --- 1772,1784 ---- /* * compute_minimal_stats() -- compute minimal column statistics * ! * We use this when we cannot find "=" and "<" operators for the datatype. * * We determine the fraction of non-null rows, the average width, the * most common values, and the (estimated) number of distinct values. * + * Only if there is a "=" operator, we try to find the most common values. + * * The most common values are determined by brute force: we keep a list * of previously seen values, ordered by number of times seen, as we scan * the samples. A newly seen value is inserted just after the last *************** compute_minimal_stats(VacAttrStatsP stat *** 1809,1830 **** Datum value; int count; } TrackItem; ! TrackItem *track; ! int track_cnt, ! track_max; int num_mcv = stats->attr->attstattarget; StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; ! /* ! * We track up to 2*n values for an n-element MCV list; but at least 10 ! */ ! track_max = 2 * num_mcv; ! if (track_max < 10) ! track_max = 10; ! track = (TrackItem *) palloc(track_max * sizeof(TrackItem)); ! track_cnt = 0; ! fmgr_info(mystats->eqfunc, &f_cmpeq); for (i = 0; i < samplerows; i++) { --- 1807,1830 ---- Datum value; int count; } TrackItem; ! TrackItem *track = NULL; ! int track_cnt = 0, ! track_max = 0; int num_mcv = stats->attr->attstattarget; StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; ! if (OidIsValid(mystats->eqfunc)) ! { ! /* ! * We track up to 2*n values for an n-element MCV list; but at least 10 ! */ ! track_max = 2 * num_mcv; ! if (track_max < 10) ! track_max = 10; ! track = (TrackItem *) palloc(track_max * sizeof(TrackItem)); ! fmgr_info(mystats->eqfunc, &f_cmpeq); ! } for (i = 0; i < samplerows; i++) { *************** compute_minimal_stats(VacAttrStatsP stat *** 1876,1926 **** total_width += strlen(DatumGetCString(value)) + 1; } ! /* ! * See if the value matches anything we're already tracking. ! */ ! match = false; ! firstcount1 = track_cnt; ! for (j = 0; j < track_cnt; j++) { ! /* We always use the default collation for statistics */ ! if (DatumGetBool(FunctionCall2Coll(&f_cmpeq, ! DEFAULT_COLLATION_OID, ! value, track[j].value))) { ! match = true; ! break; } - if (j < firstcount1 && track[j].count == 1) - firstcount1 = j; - } ! if (match) ! { ! /* Found a match */ ! track[j].count++; ! /* This value may now need to "bubble up" in the track list */ ! while (j > 0 && track[j].count > track[j - 1].count) ! { ! swapDatum(track[j].value, track[j - 1].value); ! swapInt(track[j].count, track[j - 1].count); ! j--; ! } ! } ! else ! { ! /* No match. Insert at head of count-1 list */ ! if (track_cnt < track_max) ! track_cnt++; ! for (j = track_cnt - 1; j > firstcount1; j--) { ! track[j].value = track[j - 1].value; ! track[j].count = track[j - 1].count; } ! if (firstcount1 < track_cnt) { ! track[firstcount1].value = value; ! track[firstcount1].count = 1; } } } --- 1876,1929 ---- total_width += strlen(DatumGetCString(value)) + 1; } ! if (track) { ! /* ! * See if the value matches anything we're already tracking. ! */ ! match = false; ! firstcount1 = track_cnt; ! for (j = 0; j < track_cnt; j++) { ! /* We always use the default collation for statistics */ ! if (DatumGetBool(FunctionCall2Coll(&f_cmpeq, ! DEFAULT_COLLATION_OID, ! value, track[j].value))) ! { ! match = true; ! break; ! } ! if (j < firstcount1 && track[j].count == 1) ! firstcount1 = j; } ! if (match) { ! /* Found a match */ ! track[j].count++; ! /* This value may now need to "bubble up" in the track list */ ! while (j > 0 && track[j].count > track[j - 1].count) ! { ! swapDatum(track[j].value, track[j - 1].value); ! swapInt(track[j].count, track[j - 1].count); ! j--; ! } } ! else { ! /* No match. Insert at head of count-1 list */ ! if (track_cnt < track_max) ! track_cnt++; ! for (j = track_cnt - 1; j > firstcount1; j--) ! { ! track[j].value = track[j - 1].value; ! track[j].count = track[j - 1].count; ! } ! if (firstcount1 < track_cnt) ! { ! track[firstcount1].value = value; ! track[firstcount1].count = 1; ! } } } }