Re: Avg performance for int8/numeric - Mailing list pgsql-patches
From | Mark Kirkwood |
---|---|
Subject | Re: Avg performance for int8/numeric |
Date | |
Msg-id | 45D648BF.4020607@paradise.net.nz Whole thread Raw |
In response to | Re: Avg performance for int8/numeric (Bruce Momjian <bruce@momjian.us>) |
Responses |
Re: Avg performance for int8/numeric
Re: Avg performance for int8/numeric |
List | pgsql-patches |
Bruce Momjian wrote: > I have tested this patch but it generates regression failures. > > There was some code drift, so I am attaching an updated version of the > patch, and the regression diffs. The 'four' column is an 'int4' so my > guess is that somehow the wrong aggregate is being called. > Good catch - I must have neglected to run the regression test after amending the number of array arguments for the numeric avg :-(. Hmmm - this changing the number of array args for avg means we can't mix transition functions for variance with final functions for avg - which is exactly what the regression suite does with the 'newavg' aggregate. I've 'fixed' this by amending the definition of 'newavg' to use the transition and final function that 'avg' does. However I found myself asking if this lost us the point of that test - so I looked back at the older postgres versions (e.g. 7.1.3) and saw that back *then* 'newavg' and 'avg' were defined using the same functions...so I think making the change as indicated is ok. I've attached a new patch with this change. Cheers Mark diff -Nacr pgsql.orig/src/backend/utils/adt/numeric.c pgsql/src/backend/utils/adt/numeric.c *** pgsql.orig/src/backend/utils/adt/numeric.c Sun Jan 21 11:36:20 2007 --- pgsql/src/backend/utils/adt/numeric.c Fri Feb 16 18:09:24 2007 *************** *** 2165,2170 **** --- 2165,2204 ---- return result; } + /* + * Improve avg performance by not caclulating sum(X*X). + */ + static ArrayType * + do_numeric_avg_accum(ArrayType *transarray, Numeric newval) + { + Datum *transdatums; + int ndatums; + Datum N, + sumX; + ArrayType *result; + + /* We assume the input is array of numeric */ + deconstruct_array(transarray, + NUMERICOID, -1, false, 'i', + &transdatums, NULL, &ndatums); + if (ndatums != 2) + elog(ERROR, "expected 2-element numeric array"); + N = transdatums[0]; + sumX = transdatums[1]; + + N = DirectFunctionCall1(numeric_inc, N); + sumX = DirectFunctionCall2(numeric_add, sumX, + NumericGetDatum(newval)); + + transdatums[0] = N; + transdatums[1] = sumX; + + result = construct_array(transdatums, 2, + NUMERICOID, -1, false, 'i'); + + return result; + } + Datum numeric_accum(PG_FUNCTION_ARGS) { *************** *** 2175,2180 **** --- 2209,2226 ---- } /* + * Optimized case for average of numeric. + */ + Datum + numeric_avg_accum(PG_FUNCTION_ARGS) + { + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + Numeric newval = PG_GETARG_NUMERIC(1); + + PG_RETURN_ARRAYTYPE_P(do_numeric_avg_accum(transarray, newval)); + } + + /* * Integer data types all use Numeric accumulators to share code and * avoid risk of overflow. For int2 and int4 inputs, Numeric accumulation * is overkill for the N and sum(X) values, but definitely not overkill *************** *** 2219,2224 **** --- 2265,2286 ---- PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval)); } + /* + * Optimized case for average of int8. + */ + Datum + int8_avg_accum(PG_FUNCTION_ARGS) + { + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + Datum newval8 = PG_GETARG_DATUM(1); + Numeric newval; + + newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8)); + + PG_RETURN_ARRAYTYPE_P(do_numeric_avg_accum(transarray, newval)); + } + + Datum numeric_avg(PG_FUNCTION_ARGS) { *************** *** 2232,2242 **** deconstruct_array(transarray, NUMERICOID, -1, false, 'i', &transdatums, NULL, &ndatums); ! if (ndatums != 3) ! elog(ERROR, "expected 3-element numeric array"); N = DatumGetNumeric(transdatums[0]); sumX = DatumGetNumeric(transdatums[1]); - /* ignore sumX2 */ /* SQL92 defines AVG of no values to be NULL */ /* N is zero iff no digits (cf. numeric_uminus) */ --- 2294,2303 ---- deconstruct_array(transarray, NUMERICOID, -1, false, 'i', &transdatums, NULL, &ndatums); ! if (ndatums != 2) ! elog(ERROR, "expected 2-element numeric array"); N = DatumGetNumeric(transdatums[0]); sumX = DatumGetNumeric(transdatums[1]); /* SQL92 defines AVG of no values to be NULL */ /* N is zero iff no digits (cf. numeric_uminus) */ diff -Nacr pgsql.orig/src/include/catalog/catversion.h pgsql/src/include/catalog/catversion.h *** pgsql.orig/src/include/catalog/catversion.h Fri Feb 16 18:06:34 2007 --- pgsql/src/include/catalog/catversion.h Fri Feb 16 18:09:24 2007 *************** *** 53,58 **** */ /* yyyymmddN */ ! #define CATALOG_VERSION_NO 200702131 #endif --- 53,58 ---- */ /* yyyymmddN */ ! #define CATALOG_VERSION_NO 200702151 #endif diff -Nacr pgsql.orig/src/include/catalog/pg_aggregate.h pgsql/src/include/catalog/pg_aggregate.h *** pgsql.orig/src/include/catalog/pg_aggregate.h Sun Jan 21 11:36:22 2007 --- pgsql/src/include/catalog/pg_aggregate.h Fri Feb 16 18:09:24 2007 *************** *** 80,89 **** */ /* avg */ ! DATA(insert ( 2100 int8_accum numeric_avg 0 1231 "{0,0,0}" )); DATA(insert ( 2101 int4_avg_accum int8_avg 0 1016 "{0,0}" )); DATA(insert ( 2102 int2_avg_accum int8_avg 0 1016 "{0,0}" )); ! DATA(insert ( 2103 numeric_accum numeric_avg 0 1231 "{0,0,0}" )); DATA(insert ( 2104 float4_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2105 float8_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2106 interval_accum interval_avg 0 1187 "{0 second,0 second}" )); --- 80,89 ---- */ /* avg */ ! DATA(insert ( 2100 int8_avg_accum numeric_avg 0 1231 "{0,0}" )); DATA(insert ( 2101 int4_avg_accum int8_avg 0 1016 "{0,0}" )); DATA(insert ( 2102 int2_avg_accum int8_avg 0 1016 "{0,0}" )); ! DATA(insert ( 2103 numeric_avg_accum numeric_avg 0 1231 "{0,0}" )); DATA(insert ( 2104 float4_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2105 float8_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2106 interval_accum interval_avg 0 1187 "{0 second,0 second}" )); diff -Nacr pgsql.orig/src/include/catalog/pg_proc.h pgsql/src/include/catalog/pg_proc.h *** pgsql.orig/src/include/catalog/pg_proc.h Fri Feb 16 18:06:37 2007 --- pgsql/src/include/catalog/pg_proc.h Fri Feb 16 18:09:24 2007 *************** *** 2744,2754 **** --- 2744,2758 ---- DESCR("STDDEV_SAMP aggregate final function"); DATA(insert OID = 1833 ( numeric_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_ numeric_accum- _null_ )); DESCR("aggregate transition function"); + DATA(insert OID = 2858 ( numeric_avg_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_ numeric_avg_accum- _null_ )); + DESCR("aggregate transition function"); DATA(insert OID = 1834 ( int2_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 21" _null_ _null_ _null_ int2_accum- _null_ )); DESCR("aggregate transition function"); DATA(insert OID = 1835 ( int4_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 23" _null_ _null_ _null_ int4_accum- _null_ )); DESCR("aggregate transition function"); DATA(insert OID = 1836 ( int8_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 20" _null_ _null_ _null_ int8_accum- _null_ )); + DESCR("aggregate transition function"); + DATA(insert OID = 2746 ( int8_avg_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 20" _null_ _null_ _null_ int8_avg_accum- _null_ )); DESCR("aggregate transition function"); DATA(insert OID = 1837 ( numeric_avg PGNSP PGUID 12 1 0 f f t f i 1 1700 "1231" _null_ _null_ _null_ numeric_avg- _null_ )); DESCR("AVG aggregate final function"); diff -Nacr pgsql.orig/src/include/utils/builtins.h pgsql/src/include/utils/builtins.h *** pgsql.orig/src/include/utils/builtins.h Fri Feb 16 18:06:37 2007 --- pgsql/src/include/utils/builtins.h Fri Feb 16 18:09:24 2007 *************** *** 841,849 **** --- 841,851 ---- extern Datum text_numeric(PG_FUNCTION_ARGS); extern Datum numeric_text(PG_FUNCTION_ARGS); extern Datum numeric_accum(PG_FUNCTION_ARGS); + extern Datum numeric_avg_accum(PG_FUNCTION_ARGS); extern Datum int2_accum(PG_FUNCTION_ARGS); extern Datum int4_accum(PG_FUNCTION_ARGS); extern Datum int8_accum(PG_FUNCTION_ARGS); + extern Datum int8_avg_accum(PG_FUNCTION_ARGS); extern Datum numeric_avg(PG_FUNCTION_ARGS); extern Datum numeric_var_pop(PG_FUNCTION_ARGS); extern Datum numeric_var_samp(PG_FUNCTION_ARGS); diff -Nacr pgsql.orig/src/test/regress/expected/create_aggregate.out pgsql/src/test/regress/expected/create_aggregate.out *** pgsql.orig/src/test/regress/expected/create_aggregate.out Sat Jul 29 13:45:35 2006 --- pgsql/src/test/regress/expected/create_aggregate.out Sat Feb 17 12:05:39 2007 *************** *** 3,11 **** -- -- all functions CREATEd CREATE AGGREGATE newavg ( ! sfunc = int4_accum, basetype = int4, stype = _numeric, ! finalfunc = numeric_avg, ! initcond1 = '{0,0,0}' ); -- test comments COMMENT ON AGGREGATE newavg_wrong (int4) IS 'an agg comment'; --- 3,11 ---- -- -- all functions CREATEd CREATE AGGREGATE newavg ( ! sfunc = int4_avg_accum, basetype = int4, stype = _int8, ! finalfunc = int8_avg, ! initcond1 = '{0,0}' ); -- test comments COMMENT ON AGGREGATE newavg_wrong (int4) IS 'an agg comment'; diff -Nacr pgsql.orig/src/test/regress/sql/create_aggregate.sql pgsql/src/test/regress/sql/create_aggregate.sql *** pgsql.orig/src/test/regress/sql/create_aggregate.sql Sat Jul 29 13:45:35 2006 --- pgsql/src/test/regress/sql/create_aggregate.sql Sat Feb 17 12:00:37 2007 *************** *** 4,12 **** -- all functions CREATEd CREATE AGGREGATE newavg ( ! sfunc = int4_accum, basetype = int4, stype = _numeric, ! finalfunc = numeric_avg, ! initcond1 = '{0,0,0}' ); -- test comments --- 4,12 ---- -- all functions CREATEd CREATE AGGREGATE newavg ( ! sfunc = int4_avg_accum, basetype = int4, stype = _int8, ! finalfunc = int8_avg, ! initcond1 = '{0,0}' ); -- test comments
pgsql-patches by date: