diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c new file mode 100644 index ddb68ab..0f93619 *** a/src/backend/commands/analyze.c --- b/src/backend/commands/analyze.c *************** compute_scalar_stats(VacAttrStatsP stats *** 2464,2469 **** --- 2464,2471 ---- * emit duplicate histogram bin boundaries. (We might end up with * duplicate histogram entries anyway, if the distribution is skewed; * but we prefer to treat such values as MCVs if at all possible.) + * We also decrease ndistinct in the process such that going forward + * it refers to the number of distinct values left for the histogram. */ if (track_cnt == ndistinct && toowide_cnt == 0 && stats->stadistinct > 0 && *************** compute_scalar_stats(VacAttrStatsP stats *** 2471,2505 **** { /* Track list includes all values seen, and all will fit */ num_mcv = track_cnt; } else { ! double ndistinct = stats->stadistinct; ! double avgcount, ! mincount, ! maxmincount; - if (ndistinct < 0) - ndistinct = -ndistinct * totalrows; - /* estimate # of occurrences in sample of a typical value */ - avgcount = (double) samplerows / ndistinct; - /* set minimum threshold count to store a value */ - mincount = avgcount * 1.25; - if (mincount < 2) - mincount = 2; - /* don't let threshold exceed 1/K, however */ - maxmincount = (double) samplerows / (double) num_bins; - if (mincount > maxmincount) - mincount = maxmincount; if (num_mcv > track_cnt) num_mcv = track_cnt; ! for (i = 0; i < num_mcv; i++) { ! if (track[i].count < mincount) ! { ! num_mcv = i; break; } } } --- 2473,2533 ---- { /* Track list includes all values seen, and all will fit */ num_mcv = track_cnt; + + /* Nothing left for the histogram */ + num_hist = 0; + ndistinct = 0; } else { ! /* ! * Starting number of values left for the histogram: samplerows ! * sans nulls and too wide ones. ! */ ! int sample_cnt = values_cnt; if (num_mcv > track_cnt) num_mcv = track_cnt; ! for (i = 0; /* i < num_mcv */; i++) { ! /* ! * We have to put this before the loop condition, otherwise ! * we'll have to repeat this code before the loop and after ! * decreasing ndistinct. ! */ ! num_hist = ndistinct; ! if (num_hist > num_bins) ! num_hist = num_bins + 1; ! ! /* Another way to say "while (i < num_mcv)" */ ! if (i >= num_mcv) break; + + if (num_hist >= 2) + { + double avgcount, + mincount, + maxmincount; + + /* estimate # of occurrences in sample of a typical value */ + avgcount = (double) sample_cnt / (double) ndistinct; + + /* set minimum threshold count to store a value */ + mincount = 1.25 * avgcount; + + /* don't let threshold exceed 1/K, however */ + maxmincount = (sample_cnt - 1) / (double) (num_hist - 1); + if (mincount > maxmincount) + mincount = maxmincount; + if (track[i].count < mincount) + { + num_mcv = i; + break; + } } + /* Narrow our view of samples left for the histogram */ + sample_cnt -= track[i].count; + ndistinct--; } } *************** compute_scalar_stats(VacAttrStatsP stats *** 2542,2550 **** * values not accounted for in the MCV list. (This ensures the * histogram won't collapse to empty or a singleton.) */ - num_hist = ndistinct - num_mcv; - if (num_hist > num_bins) - num_hist = num_bins + 1; if (num_hist >= 2) { MemoryContext old_context; --- 2570,2575 ----