From 27544e1504a0f014165e1120e5230e22a424b8b4 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 10 Oct 2025 00:38:05 +0200 Subject: [PATCH v5 2/3] adjustments --- src/backend/executor/nodeHash.c | 43 ++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 0f815674c9f..264c5b7ed40 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -883,37 +883,51 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, * * Note that we can only change nbuckets during initial hashtable sizing. * Once we start building the hash, nbuckets is fixed. + * + * We will double a number of parameters (space_allowed, nbuckets and + * num_skew_mcvs), which brings the overflow risk. We simply stop the loop + * if that happens. We could do something smarter (e.g. cap nbuckets and + * continue), but the complexity is not worth it. It's extremely rare and + * this is best-effort attempt to reduce memory usage. */ while (nbatch > 1) { /* - * Ensure that nbuckets can be allocated. nbuckets may have been - * increased above hash_table_size / sizeof(HashJoinTuple) if work_mem - * was very small, so checking if MaxAllocSize is enough for the - * buckets is the safest check. + * Check that nbuckets wont't overflow MaxAllocSize. + * + * With extremely low work_mem values, nbuckets may have been set + * higher than hash_table_size / sizeof(HashJoinTuple). We don't try + * to correct that here, we accept nbuckets to be oversized. */ if (nbuckets > MaxAllocSize / sizeof(HashJoinTuple) / 2) break; /* - * We check that space_allowed wouldn't overflow because it will be - * larger than hash_table_bytes if there is a skew hashtable. + * Check that space_allowed won't overflow. + * + * We don't check hash_table_bytes, because we subtracted space for + * skew hashtable from it (so it may not be equal to space_allowed). */ if ((*space_allowed) > SIZE_MAX / 2) break; /* - * This is the same as: + * Check that num_skew_mcvs won't overflow. + */ + if ((*num_skew_mcvs) > INT_MAX / 2) + break; + + /* + * Will halving the number of batches and doubling the size of the + * hashtable reduce overall memory usage? * - * hash_table_bytes + 2 * nbatch * BLCKSZ < hash_table_bytes * 2 + - * nbatch * BLCKSZ + * This is the same as (S = space_allowed): * - * avoiding intermediate overflow. + * (S + 2 * nbatch * BLCKSZ) < (S * 2 + nbatch * BLCKSZ) * - * which is to say: will halving the number of batches and doubling - * the size of the hashtable reduce overall memory usage? + * but avoiding intermediate overflow. */ - if (nbatch < hash_table_bytes / BLCKSZ) + if (nbatch < *space_allowed / BLCKSZ) break; /* @@ -921,7 +935,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, * overflowing nbuckets. */ nbuckets *= 2; - hash_table_bytes *= 2; + + *num_skew_mcvs = (*num_skew_mcvs) * 2; *space_allowed = (*space_allowed) * 2; nbatch /= 2; -- 2.43.0