Re: 9.0.5 issue - duplicate rows returned when using SQL_ATTR_ROW_ARRAY_SIZE attribute - Mailing list pgsql-odbc
From | John Kew |
---|---|
Subject | Re: 9.0.5 issue - duplicate rows returned when using SQL_ATTR_ROW_ARRAY_SIZE attribute |
Date | |
Msg-id | CY1PR0301MB197858D2B4F0B69045F09FB5CCC10@CY1PR0301MB1978.namprd03.prod.outlook.com Whole thread Raw |
In response to | 9.0.5 issue - duplicate rows returned when using SQL_ATTR_ROW_ARRAY_SIZE attribute (John Kew <jkew@tableau.com>) |
Responses |
Re: Re: 9.0.5 issue - duplicate rows returned when using
SQL_ATTR_ROW_ARRAY_SIZE attribute
|
List | pgsql-odbc |
Folks,
As promised here is a repro of the issue we are seeing using some generated data in a temp table. The first query returns the correct results for both 9.0.3 and 9.0.5; whereas the second returns the wrong results with 9.0.5 only.
---------
#include <windows.h> #include <sqlext.h> #include <iostream> #include <fstream> #include <string> #include <vector> namespace { std::vector<SQLUSMALLINT> m_rowStatus; SQLHENV henv = SQL_NULL_HANDLE; SQLHDBC hdbc = SQL_NULL_HANDLE; SQLHSTMT hstmt = SQL_NULL_HANDLE; SQLRETURN ret; SQLCHAR buffer[SQL_MAX_MESSAGE_LENGTH]; SQLSMALLINT bufSize = SQL_MAX_MESSAGE_LENGTH; void ReportError( SQLSMALLINT handleType, SQLHANDLE handle ) { SQLCHAR sqlState[6] = { 0 }, msg[SQL_MAX_MESSAGE_LENGTH] = { 0 }; SQLINTEGER nativeError; SQLSMALLINT recNumber = 1, msgLen = SQL_MAX_MESSAGE_LENGTH; SQLRETURN sret; while ( (sret = SQLGetDiagRec( handleType, handle, recNumber, sqlState, &nativeError, msg, SQL_MAX_MESSAGE_LENGTH, &msgLen )) != SQL_NO_DATA && SQL_SUCCEEDED( sret ) ) { std::cerr << "SqlState = " << sqlState << std::endl << "NativeError = " << nativeError << std::endl << "Msg = " << msg << std::endl << "MsgLen = " << msgLen << std::endl; recNumber++; } std::cerr << "SQLGetDiagRec returned " << sret << std::endl; } void connect( const std::string &url ) { // Set up environment handle ret = SQLAllocHandle( SQL_HANDLE_ENV, SQL_NULL_HANDLE, &henv ); if ( !SQL_SUCCEEDED( ret ) ) { ReportError( SQL_HANDLE_ENV, henv ); exit( -1 ); } // Set up environment attributes ret = SQLSetEnvAttr( henv, SQL_ATTR_ODBC_VERSION, (SQLPOINTER*)SQL_OV_ODBC3, 0 ); if ( !SQL_SUCCEEDED( ret ) ) { ReportError( SQL_HANDLE_ENV, henv ); SQLFreeHandle( SQL_HANDLE_ENV, henv ); exit( -1 ); } // Set up connect handle ret = SQLAllocHandle( SQL_HANDLE_DBC, henv, &hdbc ); if ( !SQL_SUCCEEDED( ret ) ) { ReportError( SQL_HANDLE_DBC, hdbc ); SQLFreeHandle( SQL_HANDLE_ENV, henv ); exit( -1 ); } // Establish connection ret = SQLDriverConnect( hdbc, SQL_NULL_HANDLE, (SQLCHAR *)url.c_str(), (SQLSMALLINT)url.size(), buffer, SQL_MAX_MESSAGE_LENGTH, &bufSize, SQL_DRIVER_NOPROMPT ); if ( !SQL_SUCCEEDED( ret ) ) { ReportError( SQL_HANDLE_DBC, hdbc ); SQLFreeHandle( SQL_HANDLE_DBC, hdbc ); SQLFreeHandle( SQL_HANDLE_ENV, henv ); exit( -1 ); } } void disconnect() { SQLDisconnect( hdbc ); SQLFreeHandle( SQL_HANDLE_DBC, hdbc ); SQLFreeHandle( SQL_HANDLE_ENV, henv ); } void createTempTable( const int numRowsInTable ) { SQLHSTMT hstmt = SQL_NULL_HANDLE; ret = SQLAllocHandle( SQL_HANDLE_STMT, hdbc, &hstmt ); if ( !SQL_SUCCEEDED( ret ) ) { ReportError( SQL_HANDLE_STMT, hstmt ); SQLDisconnect( hdbc ); SQLFreeHandle( SQL_HANDLE_DBC, hdbc ); SQLFreeHandle( SQL_HANDLE_ENV, henv ); exit( -1 ); } std::string createTable; createTable = "create temp table postgres_test as ("; createTable += "SELECT generate_series( 1, " + std::to_string( numRowsInTable ) + " ) AS dim1, "; createTable += "md5( random()::text ) AS dim2, md5( random()::text ) AS dim3, md5( random()::text ) AS dim4, "; createTable += "md5( random()::text ) AS dim5, md5( random()::text ) AS dim6, random() AS meas1"; createTable += ")"; std::cout << std::endl << createTable << std::endl; // Execute the command ret = SQLExecDirect( hstmt, (SQLCHAR *)createTable.c_str(), SQL_NTS ); if ( !SQL_SUCCEEDED( ret ) ) { ReportError( SQL_HANDLE_STMT, hstmt ); SQLFreeHandle( SQL_HANDLE_STMT, hstmt ); SQLDisconnect( hdbc ); SQLFreeHandle( SQL_HANDLE_DBC, hdbc ); SQLFreeHandle( SQL_HANDLE_ENV, henv ); exit( -1 ); } SQLFreeHandle( SQL_HANDLE_STMT, hstmt ); } int runQuery( const std::string &query, const int rowArraySize ) { // Establish rowArraySize ret = SQLAllocHandle( SQL_HANDLE_STMT, hdbc, &hstmt ); if ( !SQL_SUCCEEDED( ret ) ) { ReportError( SQL_HANDLE_STMT, hstmt ); SQLDisconnect( hdbc ); SQLFreeHandle( SQL_HANDLE_DBC, hdbc ); SQLFreeHandle( SQL_HANDLE_ENV, henv ); exit( -1 ); } ret = SQLSetStmtAttrW( hstmt, SQL_ATTR_ROW_ARRAY_SIZE, (SQLPOINTER)rowArraySize, SQL_IS_UINTEGER ); if ( !SQL_SUCCEEDED( ret ) ) { ReportError( SQL_HANDLE_STMT, hstmt ); SQLDisconnect( hdbc ); SQLFreeHandle( SQL_HANDLE_DBC, hdbc ); SQLFreeHandle( SQL_HANDLE_ENV, henv ); exit( -1 ); } m_rowStatus.resize( rowArraySize ); ret = SQLSetStmtAttr( hstmt, SQL_ATTR_ROW_STATUS_PTR, &m_rowStatus[0], 0 ); if ( !SQL_SUCCEEDED( ret ) ) { ReportError( SQL_HANDLE_STMT, hstmt ); SQLDisconnect( hdbc ); SQLFreeHandle( SQL_HANDLE_DBC, hdbc ); SQLFreeHandle( SQL_HANDLE_ENV, henv ); exit( -1 ); } // Execute the query ret = SQLExecDirect( hstmt, (SQLCHAR *)query.c_str(), SQL_NTS ); if ( !SQL_SUCCEEDED( ret ) ) { ReportError( SQL_HANDLE_STMT, hstmt ); SQLFreeHandle( SQL_HANDLE_STMT, hstmt ); SQLDisconnect( hdbc ); SQLFreeHandle( SQL_HANDLE_DBC, hdbc ); SQLFreeHandle( SQL_HANDLE_ENV, henv ); exit( -1 ); } if ( rowArraySize != 1 ) std::cerr << " rows returned by query with rowArraySize=" << rowArraySize << ":" << std::endl; // Fetch and iterate through m_rowStatus to count rows fetched int totalRows = 0; int fetchIdx = 0; while ( (ret = SQLFetch( hstmt )) != SQL_NO_DATA && SQL_SUCCEEDED( ret ) ) { fetchIdx++; SQLULEN validRowsExported = 0; for ( SQLULEN row = 0; row < rowArraySize; ++row ) { if ( m_rowStatus[row] == SQL_PARAM_SUCCESS || m_rowStatus[row] == SQL_PARAM_SUCCESS_WITH_INFO ) validRowsExported++; else break; } totalRows += (int)validRowsExported; if ( rowArraySize != 1 ) std::cout << " fetchIdx=" << fetchIdx << ", fetched rows=" << validRowsExported << ", total rows=" << totalRows << std::endl; } SQLFreeHandle( SQL_HANDLE_STMT, hstmt ); return totalRows; } void runTest( const std::string &url, const std::string &query, const int rowArraySize ) { // execute query using rowArraySize 1 int rowsReturnedByQuery = runQuery( query, 1 ); std::cerr << " with rowArraySize 1, number of rows returned by query is " << rowsReturnedByQuery << std::endl; // execute query using specified rowArraySize if ( rowsReturnedByQuery != runQuery( query, rowArraySize ) ){ std::cerr << "Fetch rows failed!" << std::endl; } } } /* * Timing executing single query statement * args[0]: database url * args[1]: number of rows to insert into the temp table * args[2]: rowArraySize to use in the test * * Examples: * url numRowsInTable rowArraySize * "DRIVER={PostgreSQL Unicode(x64)};DATABASE=TestV1;SERVER=postgres.test.tsi.lan;UID=test;PWD=password;PORT=5432;BOOLSASCHAR=0;LFCONVERSION=0;UseDeclareFetch=1" 120 14 */ int main( int argc, char* argv[] ) { std::string url = argv[1]; int numRowsInTable = atoi( argv[2] ); int rowArraySize = atoi( argv[3] ); connect( url ); createTempTable( numRowsInTable ); std::string query; // Run test for query that appears to work for all rowArraySize values when numRowsInTable == 120 query = "SELECT \"postgres_test\".\"dim1\" AS \"dim1\", substring(\"postgres_test\".\"dim2\",1,1) AS \"dim2\", substring(\"postgres_test\".\"dim3\",1,1) AS \"dim3\","; query += " substring(\"postgres_test\".\"dim4\",1,1) AS \"dim4\", substring(\"postgres_test\".\"dim5\",1,1) AS \"dim5\", SUM(\"postgres_test\".\"meas1\") AS \"sum:meas1:ok\","; query += " substring(\"postgres_test\".\"dim6\",1,1) AS \"dim6\""; query += " FROM \"postgres_test\" \"postgres_test\""; query += " WHERE substring(\"postgres_test\".\"dim2\",1,1) IN ('a','b','c')"; query += " GROUP BY 1, 2, 3, 4, 5, 7"; std::cout << std::endl << query << std::endl << std::endl; runTest( url, query, rowArraySize ); // Run test for query that appears to work for few rowArraySize values when numRowsInTable == 120 // It consistently succeeds for rowArraySize values <=8,10,12,15,20 but fails for most other values. query = "SELECT \"postgres_test\".\"dim1\" AS \"dim1\", \"postgres_test\".\"dim2\" AS \"dim2\", \"postgres_test\".\"dim3\" AS \"dim3\","; query += " \"postgres_test\".\"dim4\" AS \"dim4\", \"postgres_test\".\"dim5\" AS \"dim5\", SUM(\"postgres_test\".\"meas1\") AS \"sum:meas1:ok\","; query += " \"postgres_test\".\"dim6\" AS \"dim6\""; query += " FROM \"postgres_test\" \"postgres_test\""; query += " GROUP BY 1, 2, 3, 4, 5, 7"; std::cout << std::endl << query << std::endl << std::endl; runTest( url, query, rowArraySize ); disconnect(); return 0; }
Sent: Monday, January 18, 2016 11:31 AM
To: pgsql-odbc@postgresql.org
Subject: 9.0.5 issue - duplicate rows returned when using SQL_ATTR_ROW_ARRAY_SIZE attribute
We have been testing the new postgres driver and we found an issue with the SQL_ATTR_ROW_ARRAY_SIZE attribute. We use this attribute to request row sets in chunks with can align with cache boundaries and so-forth; it is a general optimization we use with a number of data sources. With 9.05 though, executing a query which returns a result set > SQL_ATTR_ROW_ARRAY_SIZE will result in the second fetch of rows being duplicates of the first fetch.
So let’s say we set this attribute to 84 (m_rowsToFetch) and use the m_rowStatus array to count the success rows.
SQLSetStmtAttrW( hstmt[repeatIndex], SQL_ATTR_ROW_ARRAY_SIZE, (SQLPOINTER)m_rowsToFetch, SQL_IS_UINTEGER );
SQLSetStmtAttr( hstmt[repeatIndex], SQL_ATTR_ROW_STATUS_PTR, &m_rowStatus[0], 0 );
We then create two queries - the first of which returns 24 rows normally and the second which returns 120 rows. The first query will return the correct results for both 9.03 and 9.05; 24 rows. The second query will perform two fetch operations. For 9.03 the first fetch returns 84 rows and the second 36 rows, correctly totaling 120 rows. For 9.05 we get 84 and 84 on the fetch counts, and it appears the second query has duplicate content.
I believe you could modify one of the bulk* tests to exhibit this behavior; or we can sanitize a small part of our regression suite and release a full repro in code.
-John
pgsql-odbc by date: