<?php

/* Connection options */
$dbConnString = 'pgsql:host=/tmp;dbname=test;port=5432';
$dbUsername = 'smagen';
$dbPassword = '';
$statTarget = 1000;

$con = new PDO($dbConnString, $dbUsername);

/*
 * Prepared statement for insertion of information about dataset.
 */
$datasetInsert = $con->prepare('
  INSERT INTO datasets
  (
	distribution_type,
	distr1,
	param1,
	distr2,
	param2,
	distr3,
	param3,
	stat_target
  )
  VALUES
  (
	:distribution_type,
	:distr1,
	:param1,
	:distr2,
	:param2,
	:distr3,
	:param3,
	:stat_target
  )
  RETURNING
	id;
');

/*
 * Prepared statement for particular test result insertion.
 */
$testInsert = $con->prepare('
  INSERT INTO test_results
  (
	test_id,
	dataset_id,
	operator,
	estimate_count,
	actual_count
  )
  VALUES
  (
	:test_id,
	:dataset_id,
	:operator,
	:estimate_count,
	:actual_count
  );
');

/*
 * Operators which are using for testing. For each test range search using
 * each operator are executed.
 */
$operators = array(
  '&&', 
  '<@', 
  '@>'
);

/*
 * Test ranges are selected from the database.
 */
$tests = array();
foreach ($con->query('SELECT * FROM tests') as $row)
{
  $tests [$row['id']]= $row['value'];
}

/*
 * Distribution which values can be used as start of range:
 *  - uniform - values are distributed uniformly in range [0; param]
 *  - exponential - values are distributed exponentially which given parameter
 *  - gaussian - values according gaussian distribution which mean = 0, 
 *    variance = param
 */
$startDistributions = array(
  'uniform',
  'exponential',
  'gaussian'
);

/*
 * Distribution which values can be used as size of range:
 *  - uniform - values are distributed uniformly in range [0; param]
 *  - exponential - values are distributed exponentially which given parameter
 *  - gaussian_non_negative - values according gaussian distribution which
 *    mean = parameter, variance = param. Since we don't allow ranges with
 *    negative size, we use absolute value.
 */
$sizeDistributions = array(
  'uniform',
  'exponential',
  'gaussian_non_negative'
);

/* Count of rows in each dataset */
$count = 1000000;
/* Count of clusters in cluster dataset */
$clustersCount = 20;

/**
 * Returns estimated and actual counts of rows returned by given query.
 * 
 * @global PDO $con Database connection
 * @param string $query Query
 * @return integer
 */
function getCounts($query)
{
  global $con;
  /* Get plan of query execution in JSON format */
  $result = $con->query('EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) ' . $query);
  if (!$result)
  {
    echo $query;
    exit(1);
  }
  $row = $result->fetch(PDO::FETCH_NUM);
  
  /* Decoding JSON */
  $explain = json_decode($row[0], true);
  $plan = $explain[0]['Plan'];
  
  return array(
	'actual_count'   => $plan['Actual Rows'],
	'estimate_count' => $plan['Plan Rows']
  );
}

/**
 * Performs tests on given dataset.
 * 
 * @param  type  $tableSql SQL query for test dataset generation.
 * @param  type  $params   Params of test dataset.
 */
function testDistribution($tableSql, $params)
{
  global $con, $tests, $operators, $datasetInsert, $testInsert, $statTarget;
  $con->query('DROP TABLE IF EXISTS ranges;');
  
  /* 
   * Create table by given query, set statistics, collect statistics and create
   * index for faster testing.
   */
  $con->query($tableSql);
  $con->query('ALTER TABLE ranges ALTER COLUMN value SET STATISTICS ' . $statTarget . ';');
  $con->query('ANALYZE ranges;');
  $con->query('CREATE INDEX ranges_idx ON ranges USING gist (value);');
  
  $params['stat_target'] = $statTarget;
  $datasetInsert->execute($params);
  $row = $datasetInsert->fetch(PDO::FETCH_ASSOC);
  $datasetId = $row['id'];
	
  /* Perform tests */
  foreach ($tests as $testId => $testValue)
  {
	foreach ($operators as $operator)
	{
	  $query = "
		SELECT
		  *
		FROM
		  ranges
		WHERE
		  value $operator '$testValue'::floatrange;";
	  $counts = getCounts($query);
	  $params = array_merge($counts, array(
		'test_id'     => $testId,
		'dataset_id'  => $datasetId,
		'operator'    => $operator
	  ));
	  $testInsert->execute($params);
	}
  }
}

/* Possible distribution parameters for range size generation */
$sizeParams = array(
  10,
  100,
  1000,
  10000
);

/* Simple: ranges are distributed independently */
$startParam = 1000000;
foreach ($startDistributions as $startDistribution)
  foreach ($sizeDistributions as $sizeDistribution)
	foreach ($sizeParams as $sizeParam)
	{
	  testDistribution("
		CREATE TABLE
		  ranges AS
		(
		  SELECT
			*
		  FROM
			generate_objects(
			  $count,
			  '$startDistribution',
			  $startParam,
			  '$sizeDistribution',
			  $sizeParam
			) AS value
		);
	  ",
	  array(
		'distribution_type' => 'simple',
		'distr1'            => $startDistribution,
		'param1'            => $startParam,
		'distr2'            => $sizeDistribution,
		'param2'            => $sizeParam,
		'distr3'            => NULL,
		'param3'            => NULL
	  ));
	}

/* 
 * Clusters: ranges are grouped into clusters. Mean offset of range in cluster
 * is in clustersCount times less then range of clusters.
 */
$offsetParam = $startParam / $clustersCount;
foreach ($startDistributions as $centerDistribution)
  foreach ($startDistributions as $offsetDistribution)
	foreach ($sizeDistributions as $sizeDistribution)
	  foreach ($sizeParams as $sizeParam)
	  {
		testDistribution("
		  CREATE TABLE
			ranges AS
		  (
			SELECT
			  *
			FROM
			  generate_objects_clusters(
				$count,
				$clustersCount,
				'$centerDistribution',
				$startParam,
				'$offsetDistribution',
				$offsetParam,
				'$sizeDistribution',
				$sizeParam
			  ) AS value
		  );
		",
		array(
		  'distribution_type' => 'clusters',
		  'distr1'            => $centerDistribution,
		  'param1'            => $startParam,
		  'distr2'            => $offsetDistribution,
		  'param2'            => $offsetParam,
		  'distr3'            => $sizeDistribution,
		  'param3'            => $sizeParam
		));
	  }

/* Lifetime: ranges are produced by life simulation */
$lifetimeParam = $startParam / $clustersCount;
foreach ($startDistributions as $startDistribution)
  foreach ($sizeDistributions as $lifetimeDistribution)
	foreach ($sizeDistributions as $sizeDistribution)
	  foreach ($sizeParams as $sizeParam)
	  {
		testDistribution("
		  CREATE TABLE
			ranges AS
		  (
			SELECT
			  *
			FROM
			  generate_objects_clusters(
				$count,
				$clustersCount,
				'$startDistribution',
				$startParam,
				'$lifetimeDistribution',
				$lifetimeParam,
				'$sizeDistribution',
				$sizeParam
			  ) AS value
		  );
		",
		array(
		  'distribution_type' => 'lifetime',
		  'distr1'            => $centerDistribution,
		  'param1'            => $startParam,
		  'distr2'            => $lifetimeDistribution,
		  'param2'            => $lifetimeParam,
		  'distr3'            => $sizeDistribution,
		  'param3'            => $sizeParam
		));
	  }

