Thread: libpq bug?
Hi, ALL, Following code: int PostgresDatabase::GetTableOwner (const std::wstring &schemaName, const std::wstring &tableName, std::wstring &owner, std::vector<std::wstring> &errorMsg) { int result = 0; std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid = c.relowner AND n.nspname = $1 AND relname = $2"; char *values[2]; values[0] = NULL, values[1] = NULL; values[0] = new char[schemaName.length() + 1]; values[1] = new char[tableName.length() + 1]; memset( values[0], '\0', schemaName.length() + 1 ); memset( values[1], '\0', tableName.length() + 1 ); strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str() ).c_str() ); strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str() ).c_str() ); int len1 = (int) schemaName.length(); int len2 = (int) tableName.length(); int length[2] = { len1, len2 }; int formats[2] = { 1, 1 }; PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes( query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 ); ExecStatusType status = PQresultStatus( res ); if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK ) { result = 1; std::wstring err = m_pimpl->m_myconv.from_bytes( PQerrorMessage( m_db ) ); errorMsg.push_back( L"Error executing query: " + err ); PQclear( res ); } else { owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) ); } return result; } when ran with the call of GetTableOwner( "public", "abcß", owner, errorMsg ); returns: ERROR: Invalid byte sequence for encoding UTF8. Does this mean I found the bug in the library? Any idea what I can do? Thank you.
Hi, Patrick, Here is my new code: int PostgresDatabase::GetTableOwner (const std::wstring &schemaName, const std::wstring &tableName, std::wstring &owner, std::vector<std::wstring> &errorMsg) { int result = 0; std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid = c.relowner AND n.nspname = $1 AND relname = $2"; char *values[2]; values[0] = NULL, values[1] = NULL; int charlength1 = schemaName.length() * sizeof( wchar_t ), charlength2 = tableName.length() * sizeof( wchar_t ); values[0] = new char[schemaName.length() * sizeof( wchar_t ) + 1]; values[1] = new char[tableName.length() * sizeof( wchar_t ) + 1]; memset( values[0], '\0', schemaName.length() * sizeof( wchar_t ) + 1 ); memset( values[1], '\0', tableName.length() * sizeof( wchar_t ) + 1 ); strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str() ).c_str() ); strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str() ).c_str() ); int len1 = (int) schemaName.length() * sizeof( wchar_t ); int len2 = (int) tableName.length() * sizeof( wchar_t ); int length[2] = { len1, len2 }; int formats[2] = { 1, 1 }; PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes( query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 ); ExecStatusType status = PQresultStatus( res ); if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK ) { result = 1; std::wstring err = m_pimpl->m_myconv.from_bytes( PQerrorMessage( m_db ) ); errorMsg.push_back( L"Error executing query: " + err ); PQclear( res ); } else { owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) ); } return result; } The charlength2 variable contains the value of 8 and I'm still getting the same error. Any idea? Thank you. On Fri, Dec 28, 2018 at 5:40 PM patrick keshishian <pkeshish@gmail.com> wrote: > > On Fri, Dec 28, 2018 at 3:07 PM Igor Korot <ikorot01@gmail.com> wrote: >> >> Hi, >> >> On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote: >> > >> > >> > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote: >> >> >> >> Hi, ALL, >> >> Following code: >> >> >> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName, >> >> const std::wstring &tableName, std::wstring &owner, >> >> std::vector<std::wstring> &errorMsg) >> >> { >> >> int result = 0; >> >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user >> >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid = >> >> c.relowner AND n.nspname = $1 AND relname = $2"; >> >> char *values[2]; >> >> values[0] = NULL, values[1] = NULL; >> >> values[0] = new char[schemaName.length() + 1]; >> >> values[1] = new char[tableName.length() + 1]; >> >> memset( values[0], '\0', schemaName.length() + 1 ); >> >> memset( values[1], '\0', tableName.length() + 1 ); >> >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str() >> >> ).c_str() ); >> >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str() >> >> ).c_str() ); >> >> int len1 = (int) schemaName.length(); >> >> int len2 = (int) tableName.length(); >> >> int length[2] = { len1, len2 }; >> >> int formats[2] = { 1, 1 }; >> >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes( >> >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 ); >> >> ExecStatusType status = PQresultStatus( res ); >> >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK ) >> >> { >> >> result = 1; >> >> std::wstring err = m_pimpl->m_myconv.from_bytes( >> >> PQerrorMessage( m_db ) ); >> >> errorMsg.push_back( L"Error executing query: " + err ); >> >> PQclear( res ); >> >> } >> >> else >> >> { >> >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) ); >> >> } >> >> return result; >> >> } >> >> >> >> when ran with the call of >> >> >> >> GetTableOwner( "public", "abcß", owner, errorMsg ); >> >> >> >> returns: >> >> >> >> ERROR: Invalid byte sequence for encoding UTF8. >> >> >> >> Does this mean I found the bug in the library? >> > >> > >> > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not number ofbytes required to represent the intended string: 61 62 63 c3 9f >> > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter length andhence an invalid UTF-8 sequence. >> > >> > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not numberof bytes. so you will end up with buffer-overflows. >> >> So I should use >> https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring >> in both places? > > > size() also returns 4. If you multiply it with sizeof(wchar_t) you will end up with maximum buffers size necessary to holdthe string (minus terminating \0), but not the correct length you are after. I am unsure of the "correct" C++ solution. > > Sorry, > --patrick > > >> >> Thank you. >> >> > >> > HTH, >> > --patrick >> > >> > >> >> >> >> Any idea what I can do? >> >> >> >> Thank you. >> >>
Hi, ALL,
Following code:
int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
const std::wstring &tableName, std::wstring &owner,
std::vector<std::wstring> &errorMsg)
{
int result = 0;
std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
c.relowner AND n.nspname = $1 AND relname = $2";
char *values[2];
values[0] = NULL, values[1] = NULL;
values[0] = new char[schemaName.length() + 1];
values[1] = new char[tableName.length() + 1];
memset( values[0], '\0', schemaName.length() + 1 );
memset( values[1], '\0', tableName.length() + 1 );
strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
).c_str() );
strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
).c_str() );
int len1 = (int) schemaName.length();
int len2 = (int) tableName.length();
int length[2] = { len1, len2 };
int formats[2] = { 1, 1 };
PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
ExecStatusType status = PQresultStatus( res );
if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
{
result = 1;
std::wstring err = m_pimpl->m_myconv.from_bytes(
PQerrorMessage( m_db ) );
errorMsg.push_back( L"Error executing query: " + err );
PQclear( res );
}
else
{
owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
}
return result;
}
when ran with the call of
GetTableOwner( "public", "abcß", owner, errorMsg );
returns:
ERROR: Invalid byte sequence for encoding UTF8.
Does this mean I found the bug in the library?
Any idea what I can do?
Thank you.
Hi, On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote: > > > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote: >> >> Hi, ALL, >> Following code: >> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName, >> const std::wstring &tableName, std::wstring &owner, >> std::vector<std::wstring> &errorMsg) >> { >> int result = 0; >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid = >> c.relowner AND n.nspname = $1 AND relname = $2"; >> char *values[2]; >> values[0] = NULL, values[1] = NULL; >> values[0] = new char[schemaName.length() + 1]; >> values[1] = new char[tableName.length() + 1]; >> memset( values[0], '\0', schemaName.length() + 1 ); >> memset( values[1], '\0', tableName.length() + 1 ); >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str() >> ).c_str() ); >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str() >> ).c_str() ); >> int len1 = (int) schemaName.length(); >> int len2 = (int) tableName.length(); >> int length[2] = { len1, len2 }; >> int formats[2] = { 1, 1 }; >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes( >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 ); >> ExecStatusType status = PQresultStatus( res ); >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK ) >> { >> result = 1; >> std::wstring err = m_pimpl->m_myconv.from_bytes( >> PQerrorMessage( m_db ) ); >> errorMsg.push_back( L"Error executing query: " + err ); >> PQclear( res ); >> } >> else >> { >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) ); >> } >> return result; >> } >> >> when ran with the call of >> >> GetTableOwner( "public", "abcß", owner, errorMsg ); >> >> returns: >> >> ERROR: Invalid byte sequence for encoding UTF8. >> >> Does this mean I found the bug in the library? > > > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not number of bytesrequired to represent the intended string: 61 62 63 c3 9f > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter length andhence an invalid UTF-8 sequence. > > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not number ofbytes. so you will end up with buffer-overflows. So I should use https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring in both places? Thank you. > > HTH, > --patrick > > >> >> Any idea what I can do? >> >> Thank you. >>
Hi, On Fri, Dec 28, 2018 at 5:07 PM Igor Korot <ikorot01@gmail.com> wrote: > > Hi, > > On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote: > > > > > > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote: > >> > >> Hi, ALL, > >> Following code: > >> > >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName, > >> const std::wstring &tableName, std::wstring &owner, > >> std::vector<std::wstring> &errorMsg) > >> { > >> int result = 0; > >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user > >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid = > >> c.relowner AND n.nspname = $1 AND relname = $2"; > >> char *values[2]; > >> values[0] = NULL, values[1] = NULL; > >> values[0] = new char[schemaName.length() + 1]; > >> values[1] = new char[tableName.length() + 1]; > >> memset( values[0], '\0', schemaName.length() + 1 ); > >> memset( values[1], '\0', tableName.length() + 1 ); > >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str() > >> ).c_str() ); > >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str() > >> ).c_str() ); > >> int len1 = (int) schemaName.length(); > >> int len2 = (int) tableName.length(); > >> int length[2] = { len1, len2 }; > >> int formats[2] = { 1, 1 }; > >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes( > >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 ); > >> ExecStatusType status = PQresultStatus( res ); > >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK ) > >> { > >> result = 1; > >> std::wstring err = m_pimpl->m_myconv.from_bytes( > >> PQerrorMessage( m_db ) ); > >> errorMsg.push_back( L"Error executing query: " + err ); > >> PQclear( res ); > >> } > >> else > >> { > >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) ); > >> } > >> return result; > >> } > >> > >> when ran with the call of > >> > >> GetTableOwner( "public", "abcß", owner, errorMsg ); > >> > >> returns: > >> > >> ERROR: Invalid byte sequence for encoding UTF8. > >> > >> Does this mean I found the bug in the library? > > > > > > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not number of bytesrequired to represent the intended string: 61 62 63 c3 9f > > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter length andhence an invalid UTF-8 sequence. > > > > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not numberof bytes. so you will end up with buffer-overflows. > > So I should use https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring in both places? And this solution will work cross-platform, right? Thank you. > > Thank you. > > > > > HTH, > > --patrick > > > > > >> > >> Any idea what I can do? > >> > >> Thank you. > >>
Hi,
On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote:
>
>
> On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote:
>>
>> Hi, ALL,
>> Following code:
>>
>> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
>> const std::wstring &tableName, std::wstring &owner,
>> std::vector<std::wstring> &errorMsg)
>> {
>> int result = 0;
>> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
>> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
>> c.relowner AND n.nspname = $1 AND relname = $2";
>> char *values[2];
>> values[0] = NULL, values[1] = NULL;
>> values[0] = new char[schemaName.length() + 1];
>> values[1] = new char[tableName.length() + 1];
>> memset( values[0], '\0', schemaName.length() + 1 );
>> memset( values[1], '\0', tableName.length() + 1 );
>> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
>> ).c_str() );
>> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
>> ).c_str() );
>> int len1 = (int) schemaName.length();
>> int len2 = (int) tableName.length();
>> int length[2] = { len1, len2 };
>> int formats[2] = { 1, 1 };
>> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
>> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
>> ExecStatusType status = PQresultStatus( res );
>> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
>> {
>> result = 1;
>> std::wstring err = m_pimpl->m_myconv.from_bytes(
>> PQerrorMessage( m_db ) );
>> errorMsg.push_back( L"Error executing query: " + err );
>> PQclear( res );
>> }
>> else
>> {
>> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
>> }
>> return result;
>> }
>>
>> when ran with the call of
>>
>> GetTableOwner( "public", "abcß", owner, errorMsg );
>>
>> returns:
>>
>> ERROR: Invalid byte sequence for encoding UTF8.
>>
>> Does this mean I found the bug in the library?
>
>
> The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not number of bytes required to represent the intended string: 61 62 63 c3 9f
> Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter length and hence an invalid UTF-8 sequence.
>
> Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not number of bytes. so you will end up with buffer-overflows.
So I should use
https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring
in both places?
Thank you.
>
> HTH,
> --patrick
>
>
>>
>> Any idea what I can do?
>>
>> Thank you.
>>
Hi, On Sat, Dec 29, 2018 at 1:37 AM patrick keshishian <pkeshish@gmail.com> wrote: > > On Fri, Dec 28, 2018 at 5:40 PM Igor Korot <ikorot01@gmail.com> wrote: >> >> Hi, Patrick, >> >> Here is my new code: >> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName, >> const std::wstring &tableName, std::wstring &owner, >> std::vector<std::wstring> &errorMsg) >> { >> int result = 0; >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid = >> c.relowner AND n.nspname = $1 AND relname = $2"; >> char *values[2]; >> values[0] = NULL, values[1] = NULL; >> int charlength1 = schemaName.length() * sizeof( wchar_t ), >> charlength2 = tableName.length() * sizeof( wchar_t ); >> values[0] = new char[schemaName.length() * sizeof( wchar_t ) + 1]; >> values[1] = new char[tableName.length() * sizeof( wchar_t ) + 1]; >> memset( values[0], '\0', schemaName.length() * sizeof( wchar_t ) + 1 ); >> memset( values[1], '\0', tableName.length() * sizeof( wchar_t ) + 1 ); >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str() >> ).c_str() ); >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str() >> ).c_str() ); >> int len1 = (int) schemaName.length() * sizeof( wchar_t ); >> int len2 = (int) tableName.length() * sizeof( wchar_t ); >> int length[2] = { len1, len2 }; >> int formats[2] = { 1, 1 }; >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes( >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 ); >> ExecStatusType status = PQresultStatus( res ); >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK ) >> { >> result = 1; >> std::wstring err = m_pimpl->m_myconv.from_bytes( >> PQerrorMessage( m_db ) ); >> errorMsg.push_back( L"Error executing query: " + err ); >> PQclear( res ); >> } >> else >> { >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) ); >> } >> return result; >> } >> >> The charlength2 variable contains the value of 8 and I'm still getting >> the same error. > > > I was hoping someone more versed in C++ would jump in to answer your question. I haven't used C++ in at least a decade. > You need to convert the wchar_t data that wstring stores into UTF-8. Personally, I would use iconv (common enough). But that is very weird. When I check what is stored in the values[1] array, I see the same byte sequence as what I got from the database information_schema.tables.. Maybe I should just upgrade the libpq and try the latest release? > > I assume the PostgresDatabase class is your own (?) I would add a helper function to do the conversion. Here is a very rough template for you to adapt if you think it helps you. Yes, PostgresDatabase is my class. I will look at that later today, but it would definitely be interesting to get someone with the current C++ experience (especially with C++11), because I believe that I am doing a conversion into UTF8. The m_convert variable is declared as: std::wstring_convert<std::codecvt_utf8<wchar_t> > m_myconv; and so I think it is converting to the UTF8. Thank you. > > > #include <err.h> > > #include <stdlib.h> > > #include <string.h> > > #include <iostream> > > #include <string> > > > #include <iconv.h> > > > class PGDB { > > public: > > // your stuff ... > > iconv_t ic; > > > PGDB(void) { > > setlocale(LC_CTYPE, ""); > > ic = iconv_open("UTF-8", "wchar_t"); > > if ((iconv_t)-1 == ic) > > errx(1, "iconv_open"); > > } > > ~PGDB() { > > iconv_close(ic); > > } > > // caller should free() > > char *wchar2utf8(std::wstring const &ws) { > > char *in, *buf, *out; > > size_t bufsz, inbytes, outbytes; > > > in = (char *)ws.data(); > > inbytes = ws.length() * sizeof(wchar_t); > > outbytes = inbytes; > > bufsz = inbytes + 1; // XXX check for overflow > > > buf = (char *)calloc(bufsz, 1); > > if (NULL == buf) > > err(1, NULL); // or throw something > > > out = buf; > > if ((size_t)-1 == iconv(ic, &in, &inbytes, &out, &outbytes)) > > errx(1, "iconv"); // or throw ... > > > // TODO ensure inbytes is 0 (meaning all input consumed) > > return buf; > > } > > }; > > > // demo using above PGDB class/code > > int main(int argc, char *argv[]) > > { > > char *str; > > size_t i, n; > > std::wstring tab; > > PGDB pg; > > > tab = L"ºabcß"; > > str = pg.wchar2utf8(tab); > > > n = strlen(str); > > for (i = 0; i < n; ++i) { > > printf("%02hhx ", str[i]); > > } > > printf("\n"); > > printf("->%s<-\n", str); > > > free(str); > > return 0; > > } > > > > So in GetTableOwner() you'd call wchar2utf8() for the input wstring data and you don't do the new/memset/strcpy. Just rememberto free() the returned utf8 string pointers after call to PQexecParams(). > > cheers, > --patrick > > > >> >> Any idea? >> >> Thank you. >> >> On Fri, Dec 28, 2018 at 5:40 PM patrick keshishian <pkeshish@gmail.com> wrote: >> > >> > On Fri, Dec 28, 2018 at 3:07 PM Igor Korot <ikorot01@gmail.com> wrote: >> >> >> >> Hi, >> >> >> >> On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote: >> >> > >> >> > >> >> > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote: >> >> >> >> >> >> Hi, ALL, >> >> >> Following code: >> >> >> >> >> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName, >> >> >> const std::wstring &tableName, std::wstring &owner, >> >> >> std::vector<std::wstring> &errorMsg) >> >> >> { >> >> >> int result = 0; >> >> >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user >> >> >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid = >> >> >> c.relowner AND n.nspname = $1 AND relname = $2"; >> >> >> char *values[2]; >> >> >> values[0] = NULL, values[1] = NULL; >> >> >> values[0] = new char[schemaName.length() + 1]; >> >> >> values[1] = new char[tableName.length() + 1]; >> >> >> memset( values[0], '\0', schemaName.length() + 1 ); >> >> >> memset( values[1], '\0', tableName.length() + 1 ); >> >> >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str() >> >> >> ).c_str() ); >> >> >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str() >> >> >> ).c_str() ); >> >> >> int len1 = (int) schemaName.length(); >> >> >> int len2 = (int) tableName.length(); >> >> >> int length[2] = { len1, len2 }; >> >> >> int formats[2] = { 1, 1 }; >> >> >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes( >> >> >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 ); >> >> >> ExecStatusType status = PQresultStatus( res ); >> >> >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK ) >> >> >> { >> >> >> result = 1; >> >> >> std::wstring err = m_pimpl->m_myconv.from_bytes( >> >> >> PQerrorMessage( m_db ) ); >> >> >> errorMsg.push_back( L"Error executing query: " + err ); >> >> >> PQclear( res ); >> >> >> } >> >> >> else >> >> >> { >> >> >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) ); >> >> >> } >> >> >> return result; >> >> >> } >> >> >> >> >> >> when ran with the call of >> >> >> >> >> >> GetTableOwner( "public", "abcß", owner, errorMsg ); >> >> >> >> >> >> returns: >> >> >> >> >> >> ERROR: Invalid byte sequence for encoding UTF8. >> >> >> >> >> >> Does this mean I found the bug in the library? >> >> > >> >> > >> >> > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not numberof bytes required to represent the intended string: 61 62 63 c3 9f >> >> > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter lengthand hence an invalid UTF-8 sequence. >> >> > >> >> > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not numberof bytes. so you will end up with buffer-overflows. >> >> >> >> So I should use >> >> https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring >> >> in both places? >> > >> > >> > size() also returns 4. If you multiply it with sizeof(wchar_t) you will end up with maximum buffers size necessary tohold the string (minus terminating \0), but not the correct length you are after. I am unsure of the "correct" C++ solution. >> > >> > Sorry, >> > --patrick >> > >> > >> >> >> >> Thank you. >> >> >> >> > >> >> > HTH, >> >> > --patrick >> >> > >> >> > >> >> >> >> >> >> Any idea what I can do? >> >> >> >> >> >> Thank you. >> >> >>
Hi, Patrick,
Here is my new code:
int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
const std::wstring &tableName, std::wstring &owner,
std::vector<std::wstring> &errorMsg)
{
int result = 0;
std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
c.relowner AND n.nspname = $1 AND relname = $2";
char *values[2];
values[0] = NULL, values[1] = NULL;
int charlength1 = schemaName.length() * sizeof( wchar_t ),
charlength2 = tableName.length() * sizeof( wchar_t );
values[0] = new char[schemaName.length() * sizeof( wchar_t ) + 1];
values[1] = new char[tableName.length() * sizeof( wchar_t ) + 1];
memset( values[0], '\0', schemaName.length() * sizeof( wchar_t ) + 1 );
memset( values[1], '\0', tableName.length() * sizeof( wchar_t ) + 1 );
strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
).c_str() );
strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
).c_str() );
int len1 = (int) schemaName.length() * sizeof( wchar_t );
int len2 = (int) tableName.length() * sizeof( wchar_t );
int length[2] = { len1, len2 };
int formats[2] = { 1, 1 };
PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
ExecStatusType status = PQresultStatus( res );
if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
{
result = 1;
std::wstring err = m_pimpl->m_myconv.from_bytes(
PQerrorMessage( m_db ) );
errorMsg.push_back( L"Error executing query: " + err );
PQclear( res );
}
else
{
owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
}
return result;
}
The charlength2 variable contains the value of 8 and I'm still getting
the same error.
#include <err.h>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <string>
#include <iconv.h>
class PGDB {
public:
// your stuff ...
iconv_t ic;
PGDB(void) {
setlocale(LC_CTYPE, "");
ic = iconv_open("UTF-8", "wchar_t");
if ((iconv_t)-1 == ic)
errx(1, "iconv_open");
}
~PGDB() {
iconv_close(ic);
}
// caller should free()
char *wchar2utf8(std::wstring const &ws) {
char *in, *buf, *out;
size_t bufsz, inbytes, outbytes;
in = (char *)ws.data();
inbytes = ws.length() * sizeof(wchar_t);
outbytes = inbytes;
bufsz = inbytes + 1; // XXX check for overflow
buf = (char *)calloc(bufsz, 1);
if (NULL == buf)
err(1, NULL); // or throw something
out = buf;
if ((size_t)-1 == iconv(ic, &in, &inbytes, &out, &outbytes))
errx(1, "iconv"); // or throw ...
// TODO ensure inbytes is 0 (meaning all input consumed)
return buf;
}
};
// demo using above PGDB class/code
int main(int argc, char *argv[])
{
char *str;
size_t i, n;
std::wstring tab;
PGDB pg;
tab = L"ºabcß";
str = pg.wchar2utf8(tab);
n = strlen(str);
for (i = 0; i < n; ++i) {
printf("%02hhx ", str[i]);
}
printf("\n");
printf("->%s<-\n", str);
free(str);
return 0;
}
Any idea?
Thank you.
On Fri, Dec 28, 2018 at 5:40 PM patrick keshishian <pkeshish@gmail.com> wrote:
>
> On Fri, Dec 28, 2018 at 3:07 PM Igor Korot <ikorot01@gmail.com> wrote:
>>
>> Hi,
>>
>> On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote:
>> >
>> >
>> > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote:
>> >>
>> >> Hi, ALL,
>> >> Following code:
>> >>
>> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
>> >> const std::wstring &tableName, std::wstring &owner,
>> >> std::vector<std::wstring> &errorMsg)
>> >> {
>> >> int result = 0;
>> >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
>> >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
>> >> c.relowner AND n.nspname = $1 AND relname = $2";
>> >> char *values[2];
>> >> values[0] = NULL, values[1] = NULL;
>> >> values[0] = new char[schemaName.length() + 1];
>> >> values[1] = new char[tableName.length() + 1];
>> >> memset( values[0], '\0', schemaName.length() + 1 );
>> >> memset( values[1], '\0', tableName.length() + 1 );
>> >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
>> >> ).c_str() );
>> >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
>> >> ).c_str() );
>> >> int len1 = (int) schemaName.length();
>> >> int len2 = (int) tableName.length();
>> >> int length[2] = { len1, len2 };
>> >> int formats[2] = { 1, 1 };
>> >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
>> >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
>> >> ExecStatusType status = PQresultStatus( res );
>> >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
>> >> {
>> >> result = 1;
>> >> std::wstring err = m_pimpl->m_myconv.from_bytes(
>> >> PQerrorMessage( m_db ) );
>> >> errorMsg.push_back( L"Error executing query: " + err );
>> >> PQclear( res );
>> >> }
>> >> else
>> >> {
>> >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
>> >> }
>> >> return result;
>> >> }
>> >>
>> >> when ran with the call of
>> >>
>> >> GetTableOwner( "public", "abcß", owner, errorMsg );
>> >>
>> >> returns:
>> >>
>> >> ERROR: Invalid byte sequence for encoding UTF8.
>> >>
>> >> Does this mean I found the bug in the library?
>> >
>> >
>> > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not number of bytes required to represent the intended string: 61 62 63 c3 9f
>> > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter length and hence an invalid UTF-8 sequence.
>> >
>> > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not number of bytes. so you will end up with buffer-overflows.
>>
>> So I should use
>> https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring
>> in both places?
>
>
> size() also returns 4. If you multiply it with sizeof(wchar_t) you will end up with maximum buffers size necessary to hold the string (minus terminating \0), but not the correct length you are after. I am unsure of the "correct" C++ solution.
>
> Sorry,
> --patrick
>
>
>>
>> Thank you.
>>
>> >
>> > HTH,
>> > --patrick
>> >
>> >
>> >>
>> >> Any idea what I can do?
>> >>
>> >> Thank you.
>> >>
Hi, So, does anybody have an idea? I do have following code at the beginning of the cpp file: #ifdef WIN32 #include <windows.h> #pragma execution_character_set("utf-8") #endif but even running it on OSX, I am getting this same error. Thank you. On Fri, Dec 28, 2018 at 11:30 PM Igor Korot <ikorot01@gmail.com> wrote: > > Hi, > > On Sat, Dec 29, 2018 at 1:37 AM patrick keshishian <pkeshish@gmail.com> wrote: > > > > On Fri, Dec 28, 2018 at 5:40 PM Igor Korot <ikorot01@gmail.com> wrote: > >> > >> Hi, Patrick, > >> > >> Here is my new code: > >> > >> int PostgresDatabase::GetTableOwner (const std::wstring > &schemaName, > >> const std::wstring &tableName, std::wstring &owner, > >> std::vector<std::wstring> &errorMsg) > >> { > >> int result = 0; > >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user > >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid = > >> c.relowner AND n.nspname = $1 AND relname = $2"; > >> char *values[2]; > >> values[0] = NULL, values[1] = NULL; > >> int charlength1 = schemaName.length() * sizeof( wchar_t ), > >> charlength2 = tableName.length() * sizeof( wchar_t ); > >> values[0] = new char[schemaName.length() * sizeof( wchar_t ) + 1]; > >> values[1] = new char[tableName.length() * sizeof( wchar_t ) + 1]; > >> memset( values[0], '\0', schemaName.length() * sizeof( > wchar_t ) + 1 ); > >> memset( values[1], '\0', tableName.length() * sizeof( > wchar_t ) + 1 ); > >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( > schemaName.c_str() > >> ).c_str() ); > >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str() > >> ).c_str() ); > >> int len1 = (int) schemaName.length() * sizeof( wchar_t ); > >> int len2 = (int) tableName.length() * sizeof( wchar_t ); > >> int length[2] = { len1, len2 }; > >> int formats[2] = { 1, 1 }; > >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes( > >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 ); > >> ExecStatusType status = PQresultStatus( res ); > >> if( status != PGRES_COMMAND_OK && status != > PGRES_TUPLES_OK ) > >> { > >> result = 1; > >> std::wstring err = m_pimpl->m_myconv.from_bytes( > >> PQerrorMessage( m_db ) ); > >> errorMsg.push_back( L"Error executing query: " + err ); > >> PQclear( res ); > >> } > >> else > >> { > >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( > res, 0, 0 ) ); > >> } > >> return result; > >> } > >> > >> The charlength2 variable contains the value of 8 and I'm still getting > >> the same error. > > > > > > I was hoping someone more versed in C++ would jump in to answer > your question. I haven't used C++ in at least a decade. > > You need to convert the wchar_t data that wstring stores into > UTF-8. Personally, I would use iconv (common enough). > > But that is very weird. > When I check what is stored in the values[1] array, I see the same > byte sequence as what I got from the database > information_schema.tables.. > Maybe I should just upgrade the libpq and try the latest release? > > > > > I assume the PostgresDatabase class is your own (?) I would add a > helper function to do the conversion. Here is a very rough template > for you to adapt if you think it helps you. > > Yes, PostgresDatabase is my class. > I will look at that later today, but it would definitely be > interesting to get someone with the current C++ > experience (especially with C++11), because I believe that I am doing > a conversion into UTF8. > > The m_convert variable is declared as: > > std::wstring_convert<std::codecvt_utf8<wchar_t> > m_myconv; > > and so I think it is converting to the UTF8. > > Thank you. > > > > > > > #include <err.h> > > > > #include <stdlib.h> > > > > #include <string.h> > > > > #include <iostream> > > > > #include <string> > > > > > > #include <iconv.h> > > > > > > class PGDB { > > > > public: > > > > // your stuff ... > > > > iconv_t ic; > > > > > > PGDB(void) { > > > > setlocale(LC_CTYPE, ""); > > > > ic = iconv_open("UTF-8", "wchar_t"); > > > > if ((iconv_t)-1 == ic) > > > > errx(1, "iconv_open"); > > > > } > > > > ~PGDB() { > > > > iconv_close(ic); > > > > } > > > > // caller should free() > > > > char *wchar2utf8(std::wstring const &ws) { > > > > char *in, *buf, *out; > > > > size_t bufsz, inbytes, outbytes; > > > > > > in = (char *)ws.data(); > > > > inbytes = ws.length() * sizeof(wchar_t); > > > > outbytes = inbytes; > > > > bufsz = inbytes + 1; // XXX check for overflow > > > > > > buf = (char *)calloc(bufsz, 1); > > > > if (NULL == buf) > > > > err(1, NULL); // or throw something > > > > > > out = buf; > > > > if ((size_t)-1 == iconv(ic, &in, &inbytes, &out, &outbytes)) > > > > errx(1, "iconv"); // or throw ... > > > > > > // TODO ensure inbytes is 0 (meaning all input consumed) > > > > return buf; > > > > } > > > > }; > > > > > > // demo using above PGDB class/code > > > > int main(int argc, char *argv[]) > > > > { > > > > char *str; > > > > size_t i, n; > > > > std::wstring tab; > > > > PGDB pg; > > > > > > tab = L"ºabcß"; > > > > str = pg.wchar2utf8(tab); > > > > > > n = strlen(str); > > > > for (i = 0; i < n; ++i) { > > > > printf("%02hhx ", str[i]); > > > > } > > > > printf("\n"); > > > > printf("->%s<-\n", str); > > > > > > free(str); > > > > return 0; > > > > } > > > > > > > > So in GetTableOwner() you'd call wchar2utf8() for the input wstring data and you don't do the new/memset/strcpy. Justremember to free() the returned utf8 string pointers after call to PQexecParams(). > > > > cheers, > > --patrick > > > > > > > >> > >> Any idea? > >> > >> Thank you. > >> > >> On Fri, Dec 28, 2018 at 5:40 PM patrick keshishian <pkeshish@gmail.com> wrote: > >> > > >> > On Fri, Dec 28, 2018 at 3:07 PM Igor Korot <ikorot01@gmail.com> wrote: > >> >> > >> >> Hi, > >> >> > >> >> On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote: > >> >> > > >> >> > > >> >> > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote: > >> >> >> > >> >> >> Hi, ALL, > >> >> >> Following code: > >> >> >> > >> >> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName, > >> >> >> const std::wstring &tableName, std::wstring &owner, > >> >> >> std::vector<std::wstring> &errorMsg) > >> >> >> { > >> >> >> int result = 0; > >> >> >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user > >> >> >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid = > >> >> >> c.relowner AND n.nspname = $1 AND relname = $2"; > >> >> >> char *values[2]; > >> >> >> values[0] = NULL, values[1] = NULL; > >> >> >> values[0] = new char[schemaName.length() + 1]; > >> >> >> values[1] = new char[tableName.length() + 1]; > >> >> >> memset( values[0], '\0', schemaName.length() + 1 ); > >> >> >> memset( values[1], '\0', tableName.length() + 1 ); > >> >> >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str() > >> >> >> ).c_str() ); > >> >> >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str() > >> >> >> ).c_str() ); > >> >> >> int len1 = (int) schemaName.length(); > >> >> >> int len2 = (int) tableName.length(); > >> >> >> int length[2] = { len1, len2 }; > >> >> >> int formats[2] = { 1, 1 }; > >> >> >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes( > >> >> >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 ); > >> >> >> ExecStatusType status = PQresultStatus( res ); > >> >> >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK ) > >> >> >> { > >> >> >> result = 1; > >> >> >> std::wstring err = m_pimpl->m_myconv.from_bytes( > >> >> >> PQerrorMessage( m_db ) ); > >> >> >> errorMsg.push_back( L"Error executing query: " + err ); > >> >> >> PQclear( res ); > >> >> >> } > >> >> >> else > >> >> >> { > >> >> >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) ); > >> >> >> } > >> >> >> return result; > >> >> >> } > >> >> >> > >> >> >> when ran with the call of > >> >> >> > >> >> >> GetTableOwner( "public", "abcß", owner, errorMsg ); > >> >> >> > >> >> >> returns: > >> >> >> > >> >> >> ERROR: Invalid byte sequence for encoding UTF8. > >> >> >> > >> >> >> Does this mean I found the bug in the library? > >> >> > > >> >> > > >> >> > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not numberof bytes required to represent the intended string: 61 62 63 c3 9f > >> >> > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter lengthand hence an invalid UTF-8 sequence. > >> >> > > >> >> > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, notnumber of bytes. so you will end up with buffer-overflows. > >> >> > >> >> So I should use > >> >> https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring > >> >> in both places? > >> > > >> > > >> > size() also returns 4. If you multiply it with sizeof(wchar_t) you will end up with maximum buffers size necessaryto hold the string (minus terminating \0), but not the correct length you are after. I am unsure of the "correct"C++ solution. > >> > > >> > Sorry, > >> > --patrick > >> > > >> > > >> >> > >> >> Thank you. > >> >> > >> >> > > >> >> > HTH, > >> >> > --patrick > >> >> > > >> >> > > >> >> >> > >> >> >> Any idea what I can do? > >> >> >> > >> >> >> Thank you. > >> >> >>