#!/usr/bin/env bash
set -euo pipefail

# ===== 0. Signal Trapping =====
trap 'echo ""; echo "!!!! Script aborted by user (Ctrl+C) !!!!"; exit 1' INT TERM

# ===== 1. Configuration =====
DB_NAME="${DB_NAME:-job_imdb}"
RESULT_DIR="${RESULT_DIR:-results}"
# Log file moved to RESULT_DIR
DEBUG_LOG="${RESULT_DIR}/benchmark.log"
SUMMARY_FILE="${RESULT_DIR}/summary.csv"
REPEAT_COUNT=3
LONG_QUERY_THRESHOLD_MS=300000 # 5 minutes in milliseconds

# Algorithm list
MEASURE_ALGOS=("dp" "goo_cost" "goo_result_size" "goo_combined" "geqo")

mkdir -p "$RESULT_DIR"
: > "$DEBUG_LOG"

log() {
  echo "$(date '+%Y-%m-%d %H:%M:%S') $@" | tee -a "$DEBUG_LOG"
}

log "========== JOB Benchmark Start =========="
log "DB_NAME       = $DB_NAME"
log "ALGOS         = ${MEASURE_ALGOS[*]}"
log "REPEAT_COUNT  = $REPEAT_COUNT"
log ""

# ===== 2. SQL Configuration Helper =====
get_guc_sql() {
  local algo="$1"
  case "$algo" in
    dp)
      echo "SET geqo_threshold = 100; SET enable_goo_join_search = off;" ;;
    goo_cost)
      echo "SET geqo_threshold = 2; SET enable_goo_join_search = on; SET goo_greedy_strategy = 'cost';" ;;
    goo_result_size)
      echo "SET geqo_threshold = 2; SET enable_goo_join_search = on; SET goo_greedy_strategy = 'result_size';" ;;
    goo_combined)
      echo "SET geqo_threshold = 2; SET enable_goo_join_search = on; SET goo_greedy_strategy = 'combined';" ;;
    geqo)
      echo "SET geqo_threshold = 2; SET enable_goo_join_search = off;" ;;
  esac
}

# ===== 3. Initialize CSV Header =====
header="query"
for algo in "${MEASURE_ALGOS[@]}"; do header="${header},${algo}"; done
echo "$header" > "$SUMMARY_FILE"

# ===== 4. Main Execution Loop =====
sql_files=()
while IFS= read -r f; do
  sql_files+=("$f")
done < <(ls *.sql 2>/dev/null | sort -V 2>/dev/null || ls *.sql 2>/dev/null | sort)

for sql in "${sql_files[@]}"; do
  query_base="${sql%.sql}"
  log ">>> Processing Query: $sql"
  
  csv_line="$query_base"

  for algo in "${MEASURE_ALGOS[@]}"; do
    
    min_time_ms=999999999
    final_result="ERROR" 
    
    # Create temp SQL file
    tmp_exec_sql=".exec_${algo}_${query_base}.tmp"
    get_guc_sql "$algo" > "$tmp_exec_sql"
    echo "\\timing on" >> "$tmp_exec_sql"
    echo "\\i '$sql'" >> "$tmp_exec_sql"

    # [CHANGE START] Use a dynamic variable for max iterations
    current_max_iters=$REPEAT_COUNT

    # Execution Loop (Repeat)
    # Check against current_max_iters, which might change dynamically
    for ((i=1; i<=current_max_iters; i++)); do
        run_out=".out_${algo}_${query_base}.tmp"
        
        set +e # Temporarily allow failure
        psql -d "$DB_NAME" -v ON_ERROR_STOP=1 -f "$tmp_exec_sql" > "$run_out" 2>&1
        exit_code=$?
        set -e

        if [ $exit_code -eq 0 ]; then
            # === Success ===
            curr_time=$(grep "Time:" "$run_out" | tail -n1 | sed -E 's/.*Time: ([0-9.]+) ms.*/\1/')
            
            if [ -n "$curr_time" ]; then
                 # 1. Update Min Time
                 is_smaller=$(awk -v a="$curr_time" -v b="$min_time_ms" 'BEGIN {print (a<b?1:0)}')
                 if [ "$is_smaller" -eq 1 ]; then
                     min_time_ms=$curr_time
                 fi
                 final_result="OK"

                 # [CHANGE START] Check if query took > 5 mins (300000 ms)
                 # awk returns 1 if true, 0 if false
                 is_long_query=$(awk -v t="$curr_time" -v limit="$LONG_QUERY_THRESHOLD_MS" 'BEGIN {print (t > limit ? 1 : 0)}')
                 
                 if [ "$is_long_query" -eq 1 ]; then
                     # If it's a long query and we haven't reduced the count yet
                     if [ "$current_max_iters" -eq "$REPEAT_COUNT" ]; then
                         current_max_iters=$((current_max_iters - 1))
                         log "   [Info] Query took > 5min ($curr_time ms). Reducing repeat count to $current_max_iters for this run."
                     fi
                 fi
                 # [CHANGE END]

            else
                 log "   [Warn] Exit code 0 but no time found."
                 cat "$run_out" >> "$DEBUG_LOG"
            fi

        else
            # === Failure ===
            # Check for Timeout
            if grep -E -i -q "(timeout|canceling statement)" "$run_out"; then
                final_result="TIMEOUT"
                log "   -> $algo: TIMEOUT (Iter $i)"
                rm -f "$run_out"
                break # Stop repeating if timeout
            else
                # === CRITICAL ERROR (Syntax, Connection, etc.) ===
                log "!!! FATAL ERROR executing $algo on $sql !!!"
                cat "$run_out" | tee -a "$DEBUG_LOG"
                rm -f "$run_out" "$tmp_exec_sql"
                exit 1 # Fail Fast
            fi
        fi
        rm -f "$run_out"
    done
    rm -f "$tmp_exec_sql"

    # Prepare CSV Value
    if [ "$final_result" == "TIMEOUT" ]; then
        val="TIMEOUT"
    elif [ "$final_result" == "OK" ]; then
        val="$min_time_ms"
        log "   -> $algo: $val ms"
    else
        val="ERROR" # Should be unreachable due to exit 1 above
    fi

    csv_line="${csv_line},${val}"
  done

  # Write to CSV
  echo "$csv_line" >> "$SUMMARY_FILE"

done

log "=========================================="
log "Done. Results saved to $SUMMARY_FILE"