#!/bin/bash

# Configuration
PUB_DIR="/home/ajin/ajin-setup/dataoss"
SUB_DIR="/home/ajin/ajin-setup/dataoss2"
PUB_PORT=6972
SUB_PORT=6973
REPL_USER="repl_user"
REPL_DB="repl_db"
LOG_FILE="tablesync_test.log"

# Helper function to stop and clean PostgreSQL
cleanup_pg() {
    local dir=$1
    local port=$2
    echo "Stopping PostgreSQL in $dir (port $port)..."
    pg_ctl -D "$dir" -o "-p $port" stop -m immediate
    echo "Cleaning up $dir..."
    rm -rf "$dir"
}

# Initialize PostgreSQL cluster
init_pg() {
    local dir=$1
    echo "Initializing PostgreSQL cluster in $dir..."
    initdb -D "$dir" --no-locale --encoding=UTF8
	cat << EOF >> "$dir"/postgresql.conf
wal_level = logical
max_wal_senders = 10
max_replication_slots = 10
log_statement = 'all'
log_min_messages = 'debug1'
log_duration = on
log_min_duration_statement = 0

EOF
}

# Start PostgreSQL
start_pg() {
    local dir=$1
    local port=$2
    echo "Starting PostgreSQL in $dir on port $port..."
    pg_ctl -D "$dir" -o "-p $port" -l "$dir/logfile" start
    sleep 3
}

# Create partitioned table and replication setup
setup_replication() {
    echo "Setting up replication..."

    # Publisher setup
    psql -p $PUB_PORT -d postgres -c "CREATE DATABASE $REPL_DB;"
    
    # Create 10 partitioned tables on publisher
    for i in {1..10}; do
        psql -p $PUB_PORT -d $REPL_DB <<EOF

CREATE TABLE sales${i} (
    id SERIAL NOT NULL,
    sale_date DATE NOT NULL,
    region TEXT NOT NULL,
    amount NUMERIC(10,2) NOT NULL,
    category TEXT NOT NULL,
    PRIMARY KEY (id, sale_date,region)
) PARTITION BY RANGE (sale_date);

CREATE TABLE sales${i}_2023 PARTITION OF sales${i}
    FOR VALUES FROM ('2023-01-01') TO ('2024-01-01')
    PARTITION BY HASH (region);

CREATE TABLE sales${i}_2024 PARTITION OF sales${i}
    FOR VALUES FROM ('2024-01-01') TO ('2025-01-01')
    PARTITION BY HASH (region);

CREATE TABLE sales${i}_2023_part1 PARTITION OF sales${i}_2023
    FOR VALUES WITH (MODULUS 2, REMAINDER 0);

CREATE TABLE sales${i}_2023_part2 PARTITION OF sales${i}_2023
    FOR VALUES WITH (MODULUS 2, REMAINDER 1);

CREATE TABLE sales${i}_2024_part1 PARTITION OF sales${i}_2024
    FOR VALUES WITH (MODULUS 2, REMAINDER 0);

CREATE TABLE sales${i}_2024_part2 PARTITION OF sales${i}_2024
    FOR VALUES WITH (MODULUS 2, REMAINDER 1);

EOF
    done

    psql -p $PUB_PORT -d $REPL_DB -c "CREATE PUBLICATION pub FOR ALL TABLES WITH (publish_via_partition_root = true);"

    echo "Inserting data ..."

    # Insert data into all 10 tables
    for i in {1..10}; do
        psql -p $PUB_PORT -d $REPL_DB -c "INSERT INTO sales${i} (sale_date, region, amount, category)
SELECT 
    ('2023-01-01'::DATE + (random() * 730)::int) AS sale_date,
    CASE WHEN random() > 0.5 THEN 'North' ELSE 'South' END AS region,
    (random() * 1000)::NUMERIC(10,2) AS amount,
    CASE WHEN random() > 0.5 THEN 'Electronics' ELSE 'Furniture' END AS category
FROM generate_series(1, 500);"
    done

    # Subscriber setup
    psql -p $SUB_PORT -d postgres -c "CREATE DATABASE $REPL_DB;"
    
    # Create 10 partitioned tables on subscriber
    for i in {1..10}; do
        psql -p $SUB_PORT -d $REPL_DB <<EOF
CREATE TABLE sales${i} (
    id SERIAL NOT NULL,
    sale_date DATE NOT NULL,
    region TEXT NOT NULL,
    amount NUMERIC(10,2) NOT NULL,
    category TEXT NOT NULL,
    PRIMARY KEY (id, sale_date,region)
) PARTITION BY RANGE (sale_date);

CREATE TABLE sales${i}_2023 PARTITION OF sales${i}
    FOR VALUES FROM ('2023-01-01') TO ('2024-01-01')
    PARTITION BY HASH (region);

CREATE TABLE sales${i}_2024 PARTITION OF sales${i}
    FOR VALUES FROM ('2024-01-01') TO ('2025-01-01')
    PARTITION BY HASH (region);

CREATE TABLE sales${i}_2023_part1 PARTITION OF sales${i}_2023
    FOR VALUES WITH (MODULUS 2, REMAINDER 0);

CREATE TABLE sales${i}_2023_part2 PARTITION OF sales${i}_2023
    FOR VALUES WITH (MODULUS 2, REMAINDER 1);

CREATE TABLE sales${i}_2024_part1 PARTITION OF sales${i}_2024
    FOR VALUES WITH (MODULUS 2, REMAINDER 0);
	
CREATE TABLE sales${i}_2024_part2 PARTITION OF sales${i}_2024
    FOR VALUES WITH (MODULUS 2, REMAINDER 1);

EOF
    done

    psql -p $SUB_PORT -d $REPL_DB -c "CREATE SUBSCRIPTION sub CONNECTION 'host=localhost port=$PUB_PORT dbname=$REPL_DB user=$USER' PUBLICATION pub;"
}

# Measure tablesync time
measure_tablesync() {
    echo "Measuring tablesync time..."

    while true; do
        sync_status=$(psql -p $SUB_PORT -d $REPL_DB -t -c "SELECT count(*) FROM pg_subscription_rel WHERE srsubstate != 'r';" | xargs)
        if [ "$sync_status" -eq 0 ]; then
            break
        fi
        sleep 1
    done

    # Grep logs for all 10 tables
    for i in {1..10}; do
        grep -E -A1 "LOG:  statement: COPY public.sales${i}|\(SELECT id, sale_date, region, amount, category FROM public.sales${i}\) TO STDOUT" $PUB_DIR/logfile
    done
}

# Main execution
cleanup_pg "$PUB_DIR" "$PUB_PORT"
cleanup_pg "$SUB_DIR" "$SUB_PORT"

init_pg "$PUB_DIR"
init_pg "$SUB_DIR"

start_pg "$PUB_DIR" "$PUB_PORT"
start_pg "$SUB_DIR" "$SUB_PORT"

setup_replication
measure_tablesync

source ~/scripts/log_analyzer_functions.sh

calculate_sync_times $SUB_DIR/logfile