# test setup:
#
# create database qsort_test TEMPLATE template0 LC_COLLATE "C"; -- only C and icu collations abbreviatable
# create extension pg_prewarm';
# alter system set shared_buffers = '8GB';
# alter system set max_parallel_workers = 0;

set -e

ROWS=$1
SETUP=$2

function log {
	echo `date +%s` [`date +'%Y-%m-%d %H:%M:%S'`] $1
}

function create_tables {

if [[ "$SETUP" == "setup" ]] ; then

	log "seting up source data with $ROWS rows"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
-- tables for source data
DROP TABLE IF EXISTS data_int;
CREATE TABLE data_int (a numeric, rand float default random());
INSERT INTO  data_int (a) SELECT i FROM generate_series(1, $ROWS) i;
EOF
else
	log "skipping set up of source data"
fi

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
-- tables used for the actual testing
CREATE UNLOGGED TABLE IF NOT EXISTS int_test          (a int, b text generated always as (md5(a::text)) stored);
CREATE UNLOGGED TABLE IF NOT EXISTS bigint_test       (a bigint, b text generated always as (md5(a::text)) stored);
CREATE UNLOGGED TABLE IF NOT EXISTS text_test         (a text, b text generated always as (md5(a::text)) stored);
EOF

}

function truncate_tables {

	log "truncating tables"

	for data_type in int bigint text; do
		./inst/bin/psql -d qsort_test -c 'TRUNCATE TABLE '${data_type}'_test' > /dev/null
	done;

}

function vacuum_analyze {

	log "analyzing"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
VACUUM ANALYZE;
CHECKPOINT;
EOF

}

function prewarm {
	log "prewarming buffers"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SELECT pg_prewarm(oid) FROM pg_class WHERE oid > 16384 AND relkind = 'r';
EOF

}

################# load test tables

function load_random {

	truncate_tables

	log "loading random"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT a FROM data_int ORDER BY rand;
INSERT INTO bigint_test SELECT a FROM data_int ORDER BY rand;
INSERT INTO text_test SELECT lpad(a::text, 8, '0') FROM data_int ORDER BY rand;;

EOF

	prewarm

	vacuum_analyze

}

function load_sorted {

	truncate_tables

	log "loading sorted"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT a FROM data_int ORDER BY a;
INSERT INTO bigint_test SELECT a FROM data_int ORDER BY a;
INSERT INTO text_test SELECT lpad(a::text, 8, '0') FROM data_int ORDER BY a;

EOF

	prewarm

	vacuum_analyze

}

function load_sort50 {

	truncate_tables

	log "loading sort50"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test
(SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.5)
UNION ALL
(SELECT a FROM data_int WHERE a > $ROWS * 0.5 ORDER BY rand);

INSERT INTO bigint_test
(SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.5)
UNION ALL
(SELECT a FROM data_int WHERE a > $ROWS * 0.5 ORDER BY rand);

INSERT INTO text_test
(SELECT lpad(a::text, 8, '0') FROM data_int ORDER BY a LIMIT $ROWS * 0.5)
UNION ALL
(SELECT lpad(a::text, 8, '0') FROM data_int WHERE a > $ROWS * 0.5 ORDER BY rand);

EOF

	prewarm

	vacuum_analyze

}

function load_sort90 {

	truncate_tables

	log "loading sort90"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test
(SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.9)
UNION ALL
(SELECT a FROM data_int WHERE a > $ROWS * 0.9 ORDER BY rand);

INSERT INTO bigint_test
(SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.9)
UNION ALL
(SELECT a FROM data_int WHERE a > $ROWS * 0.9 ORDER BY rand);

INSERT INTO text_test
(SELECT lpad(a::text, 8, '0') FROM data_int ORDER BY a LIMIT $ROWS * 0.9)
UNION ALL
(SELECT lpad(a::text, 8, '0') FROM data_int WHERE a > $ROWS * 0.9 ORDER BY rand);


EOF

	prewarm

	vacuum_analyze

}

function load_sort99 {

	truncate_tables

	log "loading sort99"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test
(SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.99)
UNION ALL
(SELECT a FROM data_int WHERE a > $ROWS * 0.99 ORDER BY rand);

INSERT INTO bigint_test
(SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.99)
UNION ALL
(SELECT a FROM data_int WHERE a > $ROWS * 0.99 ORDER BY rand);

INSERT INTO text_test
(SELECT lpad(a::text, 8, '0') FROM data_int ORDER BY a LIMIT $ROWS * 0.99)
UNION ALL
(SELECT lpad(a::text, 8, '0') FROM data_int WHERE a > $ROWS * 0.99 ORDER BY rand);


EOF

	prewarm

	vacuum_analyze

}

function load_reversed {

	truncate_tables

	log "loading reversed"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT a FROM data_int ORDER BY a DESC;
INSERT INTO bigint_test SELECT a FROM data_int ORDER BY a DESC;
INSERT INTO text_test SELECT lpad(a::text, 8, '0') FROM data_int ORDER BY a DESC;

EOF

	prewarm

	vacuum_analyze

}

function load_dithered {

	truncate_tables

	log "loading dithered"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT a FROM data_int ORDER BY a + (100 * random());
INSERT INTO bigint_test SELECT a FROM data_int ORDER BY a + (100 * random());
INSERT INTO text_test SELECT lpad(a::text, 8, '0') FROM data_int ORDER BY a + (100 * random()); -- xxx

EOF

	prewarm

	vacuum_analyze

}

function load_organ {

	truncate_tables

	log "loading organ"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

WITH first_part AS (SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.5)
INSERT INTO int_test
(SELECT a FROM first_part ORDER BY a ASC)
UNION ALL
(SELECT a FROM first_part ORDER BY a DESC);

WITH first_part AS (SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.5)
INSERT INTO bigint_test
(SELECT a FROM first_part ORDER BY a ASC)
UNION ALL
(SELECT a FROM first_part ORDER BY a DESC);

WITH first_part AS (SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.5)
INSERT INTO text_test
(SELECT lpad(a::text, 8, '0') FROM first_part ORDER BY a ASC)
UNION ALL
(SELECT lpad(a::text, 8, '0') FROM first_part ORDER BY a DESC);

EOF

	prewarm

	vacuum_analyze

}


function load_merge {

	truncate_tables

	log "loading merge"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

WITH first_part AS (SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.5)
INSERT INTO int_test
(SELECT a FROM first_part ORDER BY a ASC)
UNION ALL
(SELECT a FROM first_part ORDER BY a ASC);

WITH first_part AS (SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.5)
INSERT INTO bigint_test
(SELECT a FROM first_part ORDER BY a ASC)
UNION ALL
(SELECT a FROM first_part ORDER BY a ASC);

WITH first_part AS (SELECT a FROM data_int ORDER BY a LIMIT $ROWS * 0.5)
INSERT INTO text_test
(SELECT lpad(a::text, 8, '0') FROM first_part ORDER BY a ASC)
UNION ALL
(SELECT lpad(a::text, 8, '0') FROM first_part ORDER BY a ASC);

EOF

	prewarm

	vacuum_analyze

}

function load_dupsq {

	truncate_tables

	log "loading dupsq"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT a % floor(sqrt($ROWS))::int FROM data_int ORDER BY rand;
INSERT INTO bigint_test SELECT a % floor(sqrt($ROWS))::int FROM data_int ORDER BY rand;
INSERT INTO text_test SELECT lpad((a % floor(sqrt($ROWS))::int)::text, 8, '0') FROM data_int ORDER BY rand;

EOF

	prewarm

	vacuum_analyze

}

function load_dup8 {

	truncate_tables

	log "loading dup8"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT (power(a, 8) + $ROWS * 0.5) % $ROWS::int FROM data_int ORDER BY rand;
INSERT INTO bigint_test SELECT (power(a, 8) + $ROWS * 0.5) % $ROWS::int FROM data_int ORDER BY rand;
INSERT INTO text_test SELECT lpad(((power(a, 8) + $ROWS * 0.5) % $ROWS)::int::text, 8, '0') FROM data_int ORDER BY rand;

EOF

	prewarm

	vacuum_analyze

}

function load_mod8 {

	truncate_tables

	log "loading mod8"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT a % 8 FROM data_int ORDER BY rand;
INSERT INTO bigint_test SELECT a % 8 FROM data_int ORDER BY rand;
INSERT INTO text_test SELECT lpad((a % 8)::int::text, 8, '0') FROM data_int ORDER BY rand;

EOF

	prewarm

	vacuum_analyze

}

function load_mod100 {

	truncate_tables

	log "loading mod100"

	./inst/bin/psql -d qsort_test > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT a % 100 FROM data_int ORDER BY rand;
INSERT INTO bigint_test SELECT a % 100 FROM data_int ORDER BY rand;
INSERT INTO text_test SELECT lpad((a % 100)::int::text, 8, '0') FROM data_int ORDER BY rand;

EOF

	prewarm

	vacuum_analyze

}

################# run

function run_query {

	times=""
	group=$1
	data_type=$2
	sort_type=$3
	query=$4

# 	echo "--" `date` [`date +%s`] >> explains.log
# 	echo "-- $group rows=$ROWS work_mem=$data_type workers=$workers" >> explains.log
#
# 	./inst/bin/psql -d qsort_test >> explains.log 2>&1 <<EOF
# SET trace_sort=on;
# SET max_parallel_workers_per_gather=$workers;
# explain $query
# EOF

	s=`date +%s`

	for i in `seq 1 5`; do

		/usr/bin/time -f '%e' -o 'query.time' ./inst/bin/psql -d qsort_test > /dev/null <<EOF
\pset pager off
\o /dev/null
SET trace_sort=on;
SET work_mem='2GB';
SET max_parallel_workers_per_gather=0;
COPY ($query) TO '/dev/null'
EOF

		x=`cat query.time`
		times="$times\t$x"

	done

	e=`date +%s`

	echo -e "$ROWS\t$group\t$data_type\t$sort_type\t$query\t$times" >> results.csv
}

function run_index {

	times=""
	group=$1
	data_type=$2
	sort_type=$3
	query=$4

	s=`date +%s`

    for i in `seq 1 5`; do

        /usr/bin/time -f '%e' -o 'query.time' ./inst/bin/psql -d qsort_test > /dev/null <<EOF
\pset pager off
\o /dev/null
SET maintenance_work_mem='2GB';
$query
EOF

		x=`cat query.time`
		times="$times\t$x"

	done

	e=`date +%s`

	echo -e "$ROWS\t$group\t$data_type\t$sort_type\t$query\t$times" >> results.csv
}

function run_queries {

	for data_type in int bigint text; do

		log "running queries with type $data_type"

		run_query $1 $data_type 'Value' 'SELECT a FROM '${data_type}'_test ORDER BY a OFFSET '$ROWS''
		run_query $1 $data_type 'Tuple' 'SELECT * FROM '${data_type}'_test ORDER BY a OFFSET '$ROWS''
		run_index $1 $data_type 'BTree' 'CREATE INDEX x ON '${data_type}'_test (a); DROP INDEX x'

	done

}

create_tables

log "sort benchmark $ROWS"

echo -e "Num Rows\tDistribution\tData Type\tSort Type\tQuery\t\tTimes" > results.csv


load_random
run_queries "random"

load_sorted
run_queries "sorted"

load_sort50
run_queries "sort50"

load_sort90
run_queries "sort90"

load_sort99
run_queries "sort99"

load_dithered
run_queries "dithered"

load_reversed
run_queries "reversed"

load_organ
run_queries "organ"

load_merge
run_queries "merge"

load_dupsq
run_queries "dupsq"

load_dup8
run_queries "dup8"

load_mod8
run_queries "mod8"

load_mod100
run_queries "mod100"
