diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 5510a203b0..08ad102901 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -459,6 +459,30 @@ static const NumericVar const_ninf = static const int round_powers[4] = {0, 1000, 100, 10}; #endif +#define KARATSUBA_BASE_LIMIT 384 +#define KARATSUBA_VAR1_MIN1 128 +#define KARATSUBA_VAR1_MIN2 2000 +#define KARATSUBA_VAR2_MIN1 2500 +#define KARATSUBA_VAR2_MIN2 9000 +#define KARATSUBA_SLOPE 0.764 +#define KARATSUBA_INTERCEPT 90.737 + +#define KARATSUBA_LOW_RANGE_CONDITION(var1ndigits, var2ndigits) \ + ((var1ndigits) > (KARATSUBA_SLOPE) * (var2ndigits) + KARATSUBA_INTERCEPT) + +#define KARATSUBA_MIDDLE_RANGE_CONDITION(var1ndigits, var2ndigits) \ + ((var2ndigits) > KARATSUBA_VAR2_MIN1 && \ + (var1ndigits) > KARATSUBA_VAR1_MIN2) + +#define KARATSUBA_HIGH_RANGE_CONDITION(var1ndigits, var2ndigits) \ + ((var2ndigits) > KARATSUBA_VAR2_MIN2 && \ + (var1ndigits) > KARATSUBA_VAR1_MIN1) + +#define KARATSUBA_CONDITION(var1ndigits, var2ndigits) \ + ((var2ndigits) >= KARATSUBA_BASE_LIMIT && \ + (KARATSUBA_LOW_RANGE_CONDITION(var1ndigits, var2ndigits) || \ + KARATSUBA_MIDDLE_RANGE_CONDITION(var1ndigits, var2ndigits) || \ + KARATSUBA_HIGH_RANGE_CONDITION(var1ndigits, var2ndigits))) /* ---------- * Local functions @@ -548,6 +572,14 @@ static void add_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result); static void sub_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result); +inline static void split_var_at(const NumericVar *var, int split_point, + NumericVar *low, NumericVar *high); +static void mul_var_karatsuba_full(const NumericVar *var1, const NumericVar *var2, + NumericVar *result, + int rscale); +static void mul_var_karatsuba_half(const NumericVar *var1, const NumericVar *var2, + NumericVar *result, + int rscale); static void mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result, int rscale); @@ -8659,6 +8691,219 @@ sub_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result) } +/* + * split_var_at() - + * + * Split a NumericVar into two parts at a specified position. + */ +inline static void +split_var_at(const NumericVar *var, int split_point, + NumericVar *low, NumericVar *high) +{ + int high_ndigits = var->ndigits - split_point; + int low_ndigits = split_point; + + init_var(high); + init_var(low); + + high->ndigits = high_ndigits; + high->digits = var->digits; + high->buf = NULL; + high->weight = var->weight - low_ndigits; + high->sign = var->sign; + high->dscale = (var->ndigits - var->weight - 1) * DEC_DIGITS; + + low->ndigits = low_ndigits; + low->digits = var->digits + high_ndigits; + low->buf = NULL; + low->weight = var->weight - high_ndigits; + low->sign = var->sign; + low->dscale = var->dscale; +} + + +/* + * mul_var_karatsuba_full() - + * + * Multiplication using the Karatsuba algorithm. + * + * The algorithm normally starts by checking if any of the inputs + * are smaller than the NBASE, the base case for the recursion, + * and if so, fall back to traditional multiplication. + * + * That part is handled by the caller in our code, so when this function + * is called, we know that var1 and var2 are large enough for Karatsuba + * to be used. We also know that var1 is shorter or of equal length as var2, + * which has been arranged by the caller by swapping them if necessary. + * + * The algorithm then proceeds by splitting var1 and var2 into + * two high and low parts, at half the length of the longer input: + * + * m = max(size_NBASE(var1), size_NBASE(var2)) + * m2 = floor(m / 2) + * + * high1, low1 = split_var_at(var1, m2) + * high2, low2 = split_var_at(var2, m2) + * + * z0 = (low1 * low2) + * z1 = ((low1 + high1) * (low2 + high2)) + * z2 = (high1 * high2) + * + * return (z2 * NBASE ^ (m2 × 2)) + ((z1 - z2 - z0) * NBASE ^ m2) + z0 + */ +static void +mul_var_karatsuba_full(const NumericVar *var1, const NumericVar *var2, + NumericVar *result, int rscale) +{ + NumericVar high1, low1; + NumericVar high2, low2; + NumericVar z0, z1, z2; + NumericVar temp1, temp2; + int m2 = var2->ndigits / 2; + + init_var(&low1); + init_var(&low2); + init_var(&high1); + init_var(&high2); + init_var(&z0); + init_var(&z1); + init_var(&z2); + init_var(&temp1); + init_var(&temp2); + + split_var_at(var1, m2, &low1, &high1); + split_var_at(var2, m2, &low2, &high2); + + mul_var(&low1, &low2, &z0, low1.dscale + low2.dscale); + + add_var(&low1, &high1, &temp1); + add_var(&low2, &high2, &temp2); + mul_var(&temp1, &temp2, &z1, temp1.dscale + temp2.dscale); + + mul_var(&high1, &high2, &z2, high1.dscale + high2.dscale); + + set_var_from_var(&z2, &temp1); + temp1.weight += m2 * 2; + + sub_var(&z1, &z2, &z1); + sub_var(&z1, &z0, &temp2); + temp2.weight += m2; + + add_var(&temp1, &temp2, &temp2); + add_var(&temp2, &z0, result); + + free_var(&low1); + free_var(&low2); + free_var(&high1); + free_var(&high2); + free_var(&z0); + free_var(&z1); + free_var(&z2); + free_var(&temp1); + free_var(&temp2); + + /* Round to target rscale (and set result->dscale) */ + round_var(result, rscale); + + /* Strip leading and trailing zeroes */ + strip_var(result); + + return; +} + + +/* + * mul_var_karatsuba_half() - + * + * Karatsuba Multiplication for factors with significant length disparity. + * + * The Half-Karatsuba Multiplication Algorithm is a specialized case of + * the normal Karatsuba multiplication algorithm, designed for the scenario + * where var2 has at least twice as many base digits as var1. + * + * In this case var2 (the longer input) is split into high2 and low1, + * at m2 (half the length of var2) and var1 (the shorter input), + * is used directly without splitting. + * + * The algorithm then proceeds as follows: + * + * 1. Compute the product z0 = var1 * low2. + * 2. Compute the product temp2 = var1 * high2. + * 3. Adjust the weight of temp2 by adding m2 (* NBASE ^ m2) + * 4. Add temp2 and z0 to obtain the final result. + * + * Proof: + * + * The algorithm can be derived from the original Karatsuba algorithm by + * simplifying the formula when the shorter factor var1 is not split into + * high and low parts, as shown below. + * + * Original Karatsuba formula: + * + * result = (z2 * NBASE ^ (m2 × 2)) + ((z1 - z2 - z0) * NBASE ^ m2) + z0 + * + * Substitutions: + * + * low1 = var1 + * high1 = 0 + * + * Applying substitutions: + * + * z0 = (low1 * low2) + * = (var1 * low2) + * + * z1 = ((low1 + high1) * (low2 + high2)) + * = ((var1 + 0) * (low2 + high2)) + * = (var1 * low2) + (var1 * high2) + * + * z2 = (high1 * high2) + * = (0 * high2) + * = 0 + * + * Simplified using the above substitutions: + * + * result = (z2 * NBASE ^ (m2 × 2)) + ((z1 - z2 - z0) * NBASE ^ m2) + z0 + * = (0 * NBASE ^ (m2 × 2)) + ((z1 - 0 - z0) * NBASE ^ m2) + z0 + * = ((z1 - z0) * NBASE ^ m2) + z0 + * = ((z1 - z0) * NBASE ^ m2) + z0 + * = (var1 * high2) * NBASE ^ m2 + z0 + */ +static void +mul_var_karatsuba_half(const NumericVar *var1, const NumericVar *var2, + NumericVar *result, int rscale) +{ + NumericVar high2, low2; + NumericVar z0; + NumericVar temp2; + int m2 = var2->ndigits / 2; + + init_var(&high2); + init_var(&low2); + init_var(&z0); + init_var(&temp2); + + split_var_at(var2, m2, &low2, &high2); + + mul_var(var1, &low2, &z0, var1->dscale + low2.dscale); + mul_var(var1, &high2, &temp2, var1->dscale + high2.dscale); + temp2.weight += m2; + add_var(&temp2, &z0, result); + + free_var(&high2); + free_var(&low2); + free_var(&z0); + free_var(&temp2); + + /* Round to target rscale (and set result->dscale) */ + round_var(result, rscale); + + /* Strip leading and trailing zeroes */ + strip_var(result); + + return; +} + + /* * mul_var() - * @@ -8747,6 +8992,18 @@ mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result, return; } + /* + * Use the Karatsuba algorithm for sufficiently large factors. + */ + if (KARATSUBA_CONDITION(var1ndigits, var2ndigits)) + { + if (var1ndigits * 2 > var2ndigits) + mul_var_karatsuba_full(var1, var2, result, rscale); + else + mul_var_karatsuba_half(var1, var2, result, rscale); + return; + } + /* * We do the arithmetic in an array "dig[]" of signed int's. Since * INT_MAX is noticeably larger than NBASE*NBASE, this gives us headroom