#!/usr/bin/env bash

set -e

DATADIR=/mnt/data/data-numa
PATH_OLD=$PATH

WARMUP_CLIENTS=1
WARMUP_DURATION=300

CLIENTS=128
DURATION=300

TS=$(date +%Y%m%d-%H%M%S)
LABEL=$1

OUTDIR=$TS-$LABEL

mkdir $OUTDIR

echo 'shared_buffers mode build pinning tps tps_25 tps_50 tps_75 tps_95 tps_99 lat_avg lat_stddev sb_warmup sb_benchmark' >> $OUTDIR/results.csv

for sb in 32GB 16GB 48GB; do

	for m in simple prepared; do

		for b in numa-0-master numa-1-buffers numa-2-localalloc numa-3-no-tail numa-4-freelist numa-5-clocksweep numa-6-pgproc numa-7-pinning; do

			if [ "$b" == "numa-0-master" ]; then

				cp postgresql.conf $DATADIR
				echo "shared_buffers = $sb" >> $DATADIR/postgresql.conf

			elif [ "$b" == "numa-1-buffers" ]; then

				echo "numa_buffers_interleave = on" >> $DATADIR/postgresql.conf

			elif [ "$b" == "numa-2-localalloc" ]; then

				echo "numa_localalloc = on" >> $DATADIR/postgresql.conf

			elif [ "$b" == "numa-3-no-tail" ]; then

				# nothing to do here
				:

			elif [ "$b" == "numa-4-freelist" ]; then

				echo "numa_partition_freelist = on" >> $DATADIR/postgresql.conf

			elif [ "$b" == "numa-5-clocksweep" ]; then

				# nothing to do here
				:

			elif [ "$b" == "numa-6-pgproc" ]; then

				echo "numa_procs_interleave = on" >> $DATADIR/postgresql.conf

			elif [ "$b" == "numa-7-pinning" ]; then

				echo "numa_procs_pin = on" >> $DATADIR/postgresql.conf

			fi

			export PATH=/home/azureuser/builds/$b/bin:$PATH_OLD

			for pin in none random colocated; do

				echo "========== $sb / $m / $b / $pin =========="

				echo "huge_pages = off" >> $DATADIR/postgresql.conf
				pg_ctl -D $DATADIR -l $OUTDIR/pg-$b.log start >> $OUTDIR/debug.log 2>&1

				npages=$(psql -t -A test -c "show shared_memory_size_in_huge_pages")

				echo npages $npages

				pg_ctl -D $DATADIR -l $OUTDIR/pg-$b.log stop >> $OUTDIR/debug.log 2>&1

				sudo ./reserve-pages.sh $((npages + 3000))

				echo "huge_pages = on" >> $DATADIR/postgresql.conf
				pg_ctl -D $DATADIR -l $OUTDIR/pg-$b.log start >> $OUTDIR/debug.log 2>&1

				psql -t -A test -c "show huge_pages_status"

				cat /proc/meminfo | grep HugePages

				numastat -cm


				# the whole warmup is done on the first core / node, to make it unbalanced

				# query the NUMA state (this likely also forces allocation to a single node)
				numactl --physcpubind=1 psql test -c "select numa_node, count(*) from pg_buffercache_numa group by numa_node"

				# warmup using a single backend, to cause imbalance
				numactl --physcpubind=1 pgbench -M $m -S -c $WARMUP_CLIENTS -j $WARMUP_CLIENTS -T $WARMUP_DURATION -P 1 test > $OUTDIR/warmup-$b-$sb-$m-$pin.log 2>&1

					# query the NUMA state (this likely also forces allocation to a single node)
					numactl --physcpubind=1 psql test -c "select numa_node, count(*) from pg_buffercache_numa group by numa_node"


				# what fraction of shared buffers is used after warmup?
				sb_warmup=$(psql test -t -A -c "select round(count(relfilenode) * 100.0 / count(*), 2) from pg_buffercache")


				cat /proc/meminfo | grep HugePages

				numastat -cm


				# now run a proper benchmark with many clients
				if [ "$pin" == "none" ]; then
					pgbench -M $m -S -c $CLIENTS -j $CLIENTS -T $DURATION -P 1 test > $OUTDIR/pgbench-$b-$sb-$m-$pin.log 2>&1
				else
					pgbench -M $m -S -c $CLIENTS -j $CLIENTS -T $DURATION -P 1 --pin-cpus $pin test > $OUTDIR/pgbench-$b-$sb-$m-$pin.log 2>&1
				fi

				tps=$(grep 'tps = ' $OUTDIR/pgbench-$b-$sb-$m-$pin.log | awk '{print $3}')
				lat_avg=$(grep 'latency average' $OUTDIR/pgbench-$b-$sb-$m-$pin.log | awk '{print $4}')
				lat_stddev=$(grep 'latency stddev' $OUTDIR/pgbench-$b-$sb-$m-$pin.log | awk '{print $4}')

				# calculate TPS percentiles
				c=$(grep progress $OUTDIR/pgbench-$b-$sb-$m-$pin.log | wc -l)
				tps_25=$(grep progress $OUTDIR/pgbench-$b-$sb-$m-$pin.log | awk '{print $4}' | sort -n | head -n $((c/4)) | tail -n 1)
				tps_50=$(grep progress $OUTDIR/pgbench-$b-$sb-$m-$pin.log | awk '{print $4}' | sort -n | head -n $((c/2)) | tail -n 1)
				tps_75=$(grep progress $OUTDIR/pgbench-$b-$sb-$m-$pin.log | awk '{print $4}' | sort -n | head -n $((c*3/4)) | tail -n 1)
				tps_95=$(grep progress $OUTDIR/pgbench-$b-$sb-$m-$pin.log | awk '{print $4}' | sort -n | head -n $((c*95/100)) | tail -n 1)
				tps_99=$(grep progress $OUTDIR/pgbench-$b-$sb-$m-$pin.log | awk '{print $4}' | sort -n | head -n $((c*99/100)) | tail -n 1)

				sb_benchmark=$(psql test -t -A -c "select round(count(relfilenode) * 100.0 / count(*), 2) from pg_buffercache")

				echo $sb $m $b $pin $tps $tps_25 $tps_50 $tps_75 $tps_95 $tps_99 $lat_avg $lat_stddev $sb_warmup $sb_benchmark >> $OUTDIR/results.csv

				pg_ctl -D $DATADIR -l $OUTDIR/pg-$b.log stop >> $OUTDIR/debug.log 2>&1

			done

		done

	done

done
