diff --git a/web/pgadmin/tools/sqleditor/tests/test_encoding_charset.py b/web/pgadmin/tools/sqleditor/tests/test_encoding_charset.py index 8de9ee6e..ecb80cb5 100644 --- a/web/pgadmin/tools/sqleditor/tests/test_encoding_charset.py +++ b/web/pgadmin/tools/sqleditor/tests/test_encoding_charset.py @@ -14,7 +14,7 @@ from pgadmin.browser.server_groups.servers.databases.tests import utils as \ from regression import parent_node_dict from regression.python_test_utils import test_utils import json -from pgadmin.utils import server_utils +from pgadmin.utils import server_utils, IS_PY2 class TestEncodingCharset(BaseTestGenerator): @@ -31,13 +31,6 @@ class TestEncodingCharset(BaseTestGenerator): lc_collate='C', test_str='A' )), - ( - 'With Encoding WIN1252', - dict( - db_encoding='WIN1252', - lc_collate='C', - test_str='A' - )), ( 'With Encoding EUC_CN', dict( @@ -50,14 +43,14 @@ class TestEncodingCharset(BaseTestGenerator): dict( db_encoding='SQL_ASCII', lc_collate='C', - test_str='\\255' + test_str='Tif' )), ( 'With Encoding LATIN1', dict( db_encoding='LATIN1', lc_collate='C', - test_str='Ň' + test_str='äöüßÑ' )), ( 'With Encoding LATIN2', @@ -66,7 +59,174 @@ class TestEncodingCharset(BaseTestGenerator): lc_collate='C', test_str='§' )), - + ( + 'With Encoding LATIN9', + dict( + db_encoding='LATIN9', + lc_collate='C', + test_str='äöüß' + )), + ( + 'With Encoding EUC_JIS_2004', + dict( + db_encoding='EUC_JIS_2004', + lc_collate='C', + test_str='じんぼはりんごをたべる' + )), + ( + 'With Encoding WIN1256', + dict( + db_encoding='WIN1256', + lc_collate='C', + test_str='صباح الخير' + )), + ( + 'With Encoding WIN866', + dict( + db_encoding='WIN866', + lc_collate='C', + test_str='Альтернативная' + )), + ( + 'With Encoding WIN874', + dict( + db_encoding='WIN874', + lc_collate='C', + test_str='กลิ่นหอม' + )), + ( + 'With Encoding WIN1250', + dict( + db_encoding='WIN1250', + lc_collate='C', + test_str='ŔÁÄÇ' + )), + ( + 'With Encoding WIN1251', + dict( + db_encoding='WIN1251', + lc_collate='C', + test_str='ЖИЙЮ' + )), + ( + 'With Encoding WIN1252', + dict( + db_encoding='WIN1252', + lc_collate='C', + test_str='ÆØÙü' + )), + ( + 'With Encoding WIN1253', + dict( + db_encoding='WIN1253', + lc_collate='C', + test_str='ΨΪμΫ' + )), + ( + 'With Encoding WIN1254', + dict( + db_encoding='WIN1254', + lc_collate='C', + test_str='ĞğØŠ' + )), + ( + 'With Encoding WIN1255', + dict( + db_encoding='WIN1255', + lc_collate='C', + test_str='₪¥©¾' + )), + ( + 'With Encoding WIN1256', + dict( + db_encoding='WIN1256', + lc_collate='C', + test_str='بؤغق' + )), + ( + 'With Encoding WIN1257', + dict( + db_encoding='WIN1257', + lc_collate='C', + test_str='‰ķģž' + )), + ( + 'With Encoding WIN1258', + dict( + db_encoding='WIN1258', + lc_collate='C', + test_str='₫SHYÑđ' + )), + ( + 'With Encoding EUC_CN', + dict( + db_encoding='EUC_CN', + lc_collate='C', + test_str='汉字不灭' + )), + ( + 'With Encoding EUC_JP', + dict( + db_encoding='EUC_JP', + lc_collate='C', + test_str='での日本' + )), + ( + 'With Encoding EUC_KR', + dict( + db_encoding='EUC_KR', + lc_collate='C', + test_str='ㄱㄲㄴㄷ' + )), + ( + 'With Encoding EUC_TW', + dict( + db_encoding='EUC_TW', + lc_collate='C', + test_str='中文' + )), + ( + 'With Encoding ISO_8859_5', + dict( + db_encoding='ISO_8859_5', + lc_collate='C', + test_str='ЁЎФЮ' + )), + ( + 'With Encoding ISO_8859_6', + dict( + db_encoding='ISO_8859_6', + lc_collate='C', + test_str='العَرَبِيَّة' + )), + ( + 'With Encoding ISO_8859_7', + dict( + db_encoding='ISO_8859_7', + lc_collate='C', + test_str='ελληνικά' + )), + ( + 'With Encoding ISO_8859_8', + dict( + db_encoding='ISO_8859_8', + lc_collate='C', + test_str='דבא' + )), + ( + 'With Encoding KOI8R', + dict( + db_encoding='KOI8R', + lc_collate='C', + test_str='Альтернативная' + )), + ( + 'With Encoding KOI8U', + dict( + db_encoding='KOI8U', + lc_collate='C', + test_str='українська' + )), ] def setUp(self): @@ -113,6 +273,11 @@ class TestEncodingCharset(BaseTestGenerator): self.assertEquals(response.status_code, 200) response_data = json.loads(response.data.decode('utf-8')) self.assertEquals(response_data['data']['rows_fetched_to'], 1) + if IS_PY2 and type(response_data['data']['result'][0][0]) == unicode: + result = response_data['data']['result'][0][0].encode('utf-8') + else: + result = response_data['data']['result'][0][0] + self.assertEquals(result, self.test_str) database_utils.disconnect_database(self, self.encode_sid, self.encode_did) diff --git a/web/pgadmin/utils/driver/psycopg2/connection.py b/web/pgadmin/utils/driver/psycopg2/connection.py index 012266d2..be60bd2e 100644 --- a/web/pgadmin/utils/driver/psycopg2/connection.py +++ b/web/pgadmin/utils/driver/psycopg2/connection.py @@ -400,7 +400,7 @@ class Connection(BaseConnection): if self.use_binary_placeholder: register_binary_typecasters(self.conn) - postgres_encoding, self.python_encoding = \ + postgres_encoding, self.python_encoding, typecast_encoding = \ getEncoding(self.conn.encoding) # Note that we use 'UPDATE pg_settings' for setting bytea_output as a @@ -647,11 +647,7 @@ WHERE params: Extra parameters """ - if sys.version_info < (3,): - if type(query) == unicode: - query = query.encode('utf-8') - else: - query = query.encode('utf-8') + query = query.encode(self.python_encoding) params = self.escape_params_sqlascii(params) cur.execute(query, params) @@ -680,16 +676,13 @@ WHERE return False, str(cur) query_id = random.randint(1, 9999999) - if IS_PY2 and type(query) == unicode: - query = query.encode('utf-8') - current_app.logger.log( 25, u"Execute (with server cursor) for server #{server_id} - " u"{conn_id} (Query-id: {query_id}):\n{query}".format( server_id=self.manager.sid, conn_id=self.conn_id, - query=query.decode('utf-8') if + query=query.decode(self.python_encoding) if sys.version_info < (3,) else query, query_id=query_id ) @@ -943,11 +936,9 @@ WHERE formatted exception message """ - if sys.version_info < (3,): - if type(query) == unicode: - query = query.encode('utf-8') - else: - query = query.encode('utf-8') + encoding = self.python_encoding + + query = query.encode(encoding) # Convert the params based on python_encoding params = self.escape_params_sqlascii(params) @@ -965,7 +956,7 @@ WHERE u"{query_id}):\n{query}".format( server_id=self.manager.sid, conn_id=self.conn_id, - query=query.decode('utf-8'), + query=query.decode(encoding), query_id=query_id ) ) @@ -984,7 +975,7 @@ WHERE u"Error Message:{errmsg}".format( server_id=self.manager.sid, conn_id=self.conn_id, - query=query.decode('utf-8'), + query=query.decode(encoding), errmsg=errmsg, query_id=query_id ) diff --git a/web/pgadmin/utils/driver/psycopg2/encoding.py b/web/pgadmin/utils/driver/psycopg2/encoding.py index 1f4a04f7..ecf2e36f 100644 --- a/web/pgadmin/utils/driver/psycopg2/encoding.py +++ b/web/pgadmin/utils/driver/psycopg2/encoding.py @@ -10,20 +10,52 @@ # Get Postgres and Python encoding encode_dict = { - 'SQL_ASCII': ['SQL_ASCII', 'raw_unicode_escape'], - 'SQLASCII': ['SQL_ASCII', 'raw_unicode_escape'], - 'MULE_INTERNAL': ['MULE_INTERNAL', 'raw_unicode_escape'], - 'MULEINTERNAL': ['MULEINTERNAL', 'raw_unicode_escape'], - 'LATIN1': ['LATIN1', 'latin1'], - 'LATIN2': ['LATIN2', 'latin2'], - 'LATIN3': ['LATIN3', 'latin3'], - 'LATIN4': ['LATIN4', 'latin4'], - 'LATIN5': ['LATIN5', 'latin5'], - 'LATIN6': ['LATIN6', 'latin6'], - 'LATIN7': ['LATIN7', 'latin7'], - 'LATIN8': ['LATIN8', 'latin8'], - 'LATIN9': ['LATIN9', 'latin9'], - 'LATIN10': ['LATIN10', 'latin10'] + 'SQL_ASCII': ['SQL_ASCII', 'raw_unicode_escape', 'unicode_escape'], + 'SQLASCII': ['SQL_ASCII', 'raw_unicode_escape', 'unicode_escape'], + 'MULE_INTERNAL': ['MULE_INTERNAL', 'raw_unicode_escape', 'unicode_escape'], + 'MULEINTERNAL': ['MULEINTERNAL', 'raw_unicode_escape', 'unicode_escape'], + 'LATIN1': ['LATIN1', 'latin1', 'latin1'], + 'LATIN2': ['LATIN2', 'latin2', 'latin2'], + 'LATIN3': ['LATIN3', 'latin3', 'latin3'], + 'LATIN4': ['LATIN4', 'latin4', 'latin4'], + 'LATIN5': ['LATIN5', 'latin5', 'latin5'], + 'LATIN6': ['LATIN6', 'latin6', 'latin6'], + 'LATIN7': ['LATIN7', 'latin7', 'latin7'], + 'LATIN8': ['LATIN8', 'latin8', 'latin8'], + 'LATIN9': ['LATIN9', 'latin9', 'latin9'], + 'LATIN10': ['LATIN10', 'latin10', 'latin10'], + 'WIN866': ['WIN866', 'cp866', 'cp866'], + 'WIN874': ['WIN874', 'cp874', 'cp874'], + 'WIN1250': ['WIN1250', 'cp1250', 'cp1250'], + 'WIN1251': ['WIN1251', 'cp1251', 'cp1251'], + 'WIN1252': ['WIN1252', 'cp1252', 'cp1252'], + 'WIN1253': ['WIN1253', 'cp1253', 'cp1253'], + 'WIN1254': ['WIN1254', 'cp1254', 'cp1254'], + 'WIN1255': ['WIN1255', 'cp1255', 'cp1255'], + 'WIN1256': ['WIN1256', 'cp1256', 'cp1256'], + 'WIN1257': ['WIN1257', 'cp1257', 'cp1257'], + 'WIN1258': ['WIN1258', 'cp1258', 'cp1258'], + 'EUC_JIS_2004': ['EUC_JIS_2004', 'eucjis2004', 'eucjis2004'], + 'EUCJIS2004': ['EUCJIS2004', 'eucjis2004', 'eucjis2004'], + 'EUC_CN': ['EUC_CN', 'euc-cn', 'euc-cn'], + 'EUCCN': ['EUCCN', 'euc-cn', 'euc-cn'], + 'EUC_JP': ['EUC_JP', 'euc_jp', 'euc_jp'], + 'EUCJP': ['EUCJP', 'euc_jp', 'euc_jp'], + 'EUC_KR': ['EUC_KR', 'euc_kr', 'euc_kr'], + 'EUCKR': ['EUCKR', 'euc_kr', 'euc_kr'], + 'EUC_TW': ['BIG5', 'big5', 'big5'], + 'EUCTW': ['BIG5', 'big5', 'big5'], + 'ISO_8859_5': ['ISO_8859_5', 'iso8859_5', 'iso8859_5'], + 'ISO88595': ['ISO88595', 'iso8859_5', 'iso8859_5'], + 'ISO_8859_6': ['ISO_8859_6', 'iso8859_6', 'iso8859_6'], + 'ISO88596': ['ISO88596', 'iso8859_6', 'iso8859_6'], + 'ISO_8859_7': ['ISO_8859_7', 'iso8859_7', 'iso8859_7'], + 'ISO88597': ['ISO88597', 'iso8859_7', 'iso8859_7'], + 'ISO_8859_8': ['ISO_8859_8', 'iso8859_8', 'iso8859_8'], + 'ISO88598': ['ISO88598', 'iso8859_8', 'iso8859_8'], + 'KOI8R': ['KOI8R', 'koi8_r', 'koi8_r'], + 'KOI8U': ['KOI8U', 'koi8_u', 'koi8_u'], + } @@ -33,7 +65,7 @@ def getEncoding(key): :return: [Postgres_encoding, Python_encoding] - Postgres and Python encoding """ - return encode_dict.get(key, ['UNICODE', 'utf-8']) + return encode_dict.get(key, ['UNICODE', 'utf-8', 'utf-8']) def configureDriverEncodings(encodings): @@ -43,5 +75,5 @@ def configureDriverEncodings(encodings): # because for parameterized DML, param values are converted based on # python encoding of pyscopg2s internal encodings dict. for key, val in encode_dict.items(): - postgres_encoding, python_encoding = val + postgres_encoding, python_encoding, typecast_encoding = val encodings[key] = python_encoding diff --git a/web/pgadmin/utils/driver/psycopg2/typecast.py b/web/pgadmin/utils/driver/psycopg2/typecast.py index fe0edccd..898b1715 100644 --- a/web/pgadmin/utils/driver/psycopg2/typecast.py +++ b/web/pgadmin/utils/driver/psycopg2/typecast.py @@ -19,7 +19,7 @@ import psycopg2 from psycopg2.extensions import encodings from psycopg2.extras import Json as psycopg2_json -from .encoding import configureDriverEncodings +from .encoding import configureDriverEncodings, getEncoding configureDriverEncodings(encodings) @@ -182,20 +182,22 @@ def register_string_typecasters(connection): # characters. Here we unescape them using unicode_escape # and send ahead. When insert update is done, the characters # are escaped again and sent to the DB. - if connection.encoding in ('SQL_ASCII', 'SQLASCII', - 'MULE_INTERNAL', 'MULEINTERNAL'): + + postgres_encoding, python_encoding, typecast_encoding = \ + getEncoding(connection.encoding) + if postgres_encoding != 'UNICODE': if sys.version_info >= (3,): def non_ascii_escape(value, cursor): if value is None: return None return bytes( value, encodings[cursor.connection.encoding] - ).decode('unicode_escape', errors='replace') + ).decode(typecast_encoding, errors='replace') else: def non_ascii_escape(value, cursor): if value is None: return None - return value.decode('unicode_escape', errors='replace') + return value.decode(typecast_encoding, errors='replace') # return value unicode_type = psycopg2.extensions.new_type( diff --git a/web/pgadmin/utils/tests/test_encoding.py b/web/pgadmin/utils/tests/test_encoding.py index e625fd0c..40650fa9 100644 --- a/web/pgadmin/utils/tests/test_encoding.py +++ b/web/pgadmin/utils/tests/test_encoding.py @@ -16,79 +16,207 @@ class TestEncoding(BaseTestGenerator): 'When the database encoding is SQL_ASCII', dict( db_encoding='SQL_ASCII', - expected_return_value=['SQL_ASCII', 'raw_unicode_escape'] + expected_return_value=['SQL_ASCII', 'raw_unicode_escape', + 'unicode_escape'] ) ), ( 'When the database encoding is MULEINTERNAL', dict( db_encoding='MULEINTERNAL', - expected_return_value=['MULEINTERNAL', 'raw_unicode_escape'] + expected_return_value=['MULEINTERNAL', 'raw_unicode_escape', + 'unicode_escape'] ) ), ( 'When the database encoding is LATIN1', dict( db_encoding='LATIN1', - expected_return_value=['LATIN1', 'latin1'] + expected_return_value=['LATIN1', 'latin1', 'latin1'] ) ), ( 'When the database encoding is LATIN2', dict( db_encoding='LATIN2', - expected_return_value=['LATIN2', 'latin2'] + expected_return_value=['LATIN2', 'latin2', 'latin2'] ) ), ( 'When the database encoding is LATIN3', dict( db_encoding='LATIN3', - expected_return_value=['LATIN3', 'latin3'] + expected_return_value=['LATIN3', 'latin3', 'latin3'] ) ), ( 'When the database encoding is LATIN4', dict( db_encoding='LATIN4', - expected_return_value=['LATIN4', 'latin4'] + expected_return_value=['LATIN4', 'latin4', 'latin4'] ) ), ( 'When the database encoding is LATIN5', dict( db_encoding='LATIN5', - expected_return_value=['LATIN5', 'latin5'] + expected_return_value=['LATIN5', 'latin5', 'latin5'] ) ), ( 'When the database encoding is LATIN6', dict( db_encoding='LATIN6', - expected_return_value=['LATIN6', 'latin6'] + expected_return_value=['LATIN6', 'latin6', 'latin6'] ) ), ( 'When the database encoding is LATIN7', dict( db_encoding='LATIN7', - expected_return_value=['LATIN7', 'latin7'] + expected_return_value=['LATIN7', 'latin7', 'latin7'] ) ), ( 'When the database encoding is LATIN8', dict( db_encoding='LATIN8', - expected_return_value=['LATIN8', 'latin8'] + expected_return_value=['LATIN8', 'latin8', 'latin8'] ) ), ( 'When the database encoding is LATIN9', dict( db_encoding='LATIN9', - expected_return_value=['LATIN9', 'latin9'] + expected_return_value=['LATIN9', 'latin9', 'latin9'] ) ), ( 'When the database encoding is LATIN10', dict( db_encoding='LATIN10', - expected_return_value=['LATIN10', 'latin10'] + expected_return_value=['LATIN10', 'latin10', 'latin10'] + ) + ), ( + 'When the database encoding is WIN1250', + dict( + db_encoding='WIN1250', + expected_return_value=['WIN1250', 'cp1250', 'cp1250'] + ) + ), ( + 'When the database encoding is WIN1251', + dict( + db_encoding='WIN1251', + expected_return_value=['WIN1251', 'cp1251', 'cp1251'] + ) + ), ( + 'When the database encoding is WIN1252', + dict( + db_encoding='WIN1252', + expected_return_value=['WIN1252', 'cp1252', 'cp1252'] + ) + ), ( + 'When the database encoding is WIN1253', + dict( + db_encoding='WIN1253', + expected_return_value=['WIN1253', 'cp1253', 'cp1253'] + ) + ), ( + 'When the database encoding is WIN1254', + dict( + db_encoding='WIN1254', + expected_return_value=['WIN1254', 'cp1254', 'cp1254'] + ) + ), ( + 'When the database encoding is WIN1255', + dict( + db_encoding='WIN1255', + expected_return_value=['WIN1255', 'cp1255', 'cp1255'] + ) + ), ( + 'When the database encoding is WIN1256', + dict( + db_encoding='WIN1256', + expected_return_value=['WIN1256', 'cp1256', 'cp1256'] + ) + ), ( + 'When the database encoding is WIN1257', + dict( + db_encoding='WIN1257', + expected_return_value=['WIN1257', 'cp1257', 'cp1257'] ) ), ( 'When the database encoding is WIN1258', dict( db_encoding='WIN1258', - expected_return_value=['UNICODE', 'utf-8'] + expected_return_value=['WIN1258', 'cp1258', 'cp1258'] + ) + ), ( + 'When the database encoding is EUC_JIS_2004', + dict( + db_encoding='EUC_JIS_2004', + expected_return_value=['EUC_JIS_2004', 'eucjis2004', 'eucjis2004'] + ) + ), ( + 'When the database encoding is EUC_CN', + dict( + db_encoding='EUC_CN', + expected_return_value=['EUC_CN', 'euc-cn', 'euc-cn'] + ) + ), ( + 'When the database encoding is EUC_JP', + dict( + db_encoding='EUC_JP', + expected_return_value=['EUC_JP', 'euc_jp', 'euc_jp'] + ) + ), ( + 'When the database encoding is EUC_KR', + dict( + db_encoding='EUC_KR', + expected_return_value=['EUC_KR', 'euc_kr', 'euc_kr'] + ) + ), ( + 'When the database encoding is EUC_TW', + dict( + db_encoding='EUC_TW', + expected_return_value=['BIG5', 'big5', 'big5'] + ) + ), ( + 'When the database encoding is ISO_8859_5', + dict( + db_encoding='ISO_8859_5', + expected_return_value=['ISO_8859_5', 'iso8859_5', 'iso8859_5'] + ) + ), ( + 'When the database encoding is ISO_8859_6', + dict( + db_encoding='ISO_8859_6', + expected_return_value=['ISO_8859_6', 'iso8859_6', 'iso8859_6'] + ) + ), ( + 'When the database encoding is ISO_8859_7', + dict( + db_encoding='ISO_8859_7', + expected_return_value=['ISO_8859_7', 'iso8859_7', 'iso8859_7'] + ) + ), ( + 'When the database encoding is ISO_8859_8', + dict( + db_encoding='ISO_8859_8', + expected_return_value=['ISO_8859_8', 'iso8859_8', 'iso8859_8'] + ) + ), ( + 'When the database encoding is KOI8R', + dict( + db_encoding='KOI8R', + expected_return_value=['KOI8R', 'koi8_r', 'koi8_r'] + ) + ), ( + 'When the database encoding is KOI8U', + dict( + db_encoding='KOI8U', + expected_return_value=['KOI8U', 'koi8_u', 'koi8_u'] + ) + ), ( + 'When the database encoding is WIN866', + dict( + db_encoding='WIN866', + expected_return_value=['WIN866', 'cp866', 'cp866'] + ) + ), ( + 'When the database encoding is WIN874', + dict( + db_encoding='WIN874', + expected_return_value=['WIN874', 'cp874', 'cp874'] ) ), ]