Changeset 613 for trunk


Ignore:
Timestamp:
Nov 23, 2015, 2:15:43 PM (4 years ago)
Author:
cito
Message:

Add some more testing of unicode issues

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/module/TEST_PyGreSQL_classic_connection.py

    r611 r613  
    2828# We need a database to test against.  If LOCAL_PyGreSQL.py exists we will
    2929# get our information from that.  Otherwise we use the defaults.
     30# These tests should be run with various PostgreSQL versions and databases
     31# created with different encodings and locales.  Particularly, make sure the
     32# tests are running against databases created with both SQL_ASCII and UTF8.
    3033dbname = 'unittest'
    3134dbhost = None
     
    4144except NameError:  # Python >= 3.0
    4245    long = int
     46
     47try:
     48    unicode
     49except NameError:  # Python >= 3.0
     50    unicode = str
    4351
    4452unicode_strings = str is not bytes
     
    541549    def setUp(self):
    542550        self.c = connect()
    543         self.c.query('set client_encoding = utf8')
     551        self.c.query('set client_encoding=utf8')
    544552
    545553    def tearDown(self):
     
    614622    def testQueryWithUnicodeParams(self):
    615623        query = self.c.query
    616         query('set client_encoding = utf8')
     624        try:
     625            query('set client_encoding=utf8')
     626            query("select 'wörld'").getresult()[0][0] == 'wörld'
     627        except pg.ProgrammingError:
     628            self.skipTest("database does not support utf8")
    617629        self.assertEqual(query("select $1||', '||$2||'!'",
    618630            ('Hello', u'wörld')).getresult(), [('Hello, wörld!',)])
    619         self.assertEqual(query("select $1||', '||$2||'!'",
    620             ('Hello', u'ЌОр')).getresult(),
    621             [('Hello, ЌОр!',)])
    622         query('set client_encoding = latin1')
     631
     632    def testQueryWithUnicodeParamsLatin1(self):
     633        query = self.c.query
     634        try:
     635            query('set client_encoding=latin1')
     636            query("select 'wörld'").getresult()[0][0] == 'wörld'
     637        except pg.ProgrammingError:
     638            self.skipTest("database does not support latin1")
    623639        r = query("select $1||', '||$2||'!'", ('Hello', u'wörld')).getresult()
    624640        if unicode_strings:
     
    628644        self.assertRaises(UnicodeError, query, "select $1||', '||$2||'!'",
    629645            ('Hello', u'ЌОр'))
    630         query('set client_encoding = iso_8859_1')
    631         r = query("select $1||', '||$2||'!'", ('Hello', u'wörld')).getresult()
     646        query('set client_encoding=iso_8859_1')
     647        r = query("select $1||', '||$2||'!'",
     648            ('Hello', u'wörld')).getresult()
    632649        if unicode_strings:
    633650            self.assertEqual(r, [('Hello, wörld!',)])
     
    636653        self.assertRaises(UnicodeError, query, "select $1||', '||$2||'!'",
    637654            ('Hello', u'ЌОр'))
    638         query('set client_encoding = iso_8859_5')
     655        query('set client_encoding=sql_ascii')
    639656        self.assertRaises(UnicodeError, query, "select $1||', '||$2||'!'",
    640657            ('Hello', u'wörld'))
    641         r = query("select $1||', '||$2||'!'", ('Hello', u'ЌОр')).getresult()
     658
     659    def testQueryWithUnicodeParamsCyrillic(self):
     660        query = self.c.query
     661        try:
     662            query('set client_encoding=iso_8859_5')
     663            query("select 'ЌОр'").getresult()[0][0] == 'ЌОр'
     664        except pg.ProgrammingError:
     665            self.skipTest("database does not support cyrillic")
     666        self.assertRaises(UnicodeError, query, "select $1||', '||$2||'!'",
     667            ('Hello', u'wörld'))
     668        r = query("select $1||', '||$2||'!'",
     669            ('Hello', u'ЌОр')).getresult()
    642670        if unicode_strings:
    643671            self.assertEqual(r, [('Hello, ЌОр!',)])
    644672        else:
    645673            self.assertEqual(r, [(u'Hello, ЌОр!'.encode('cyrillic'),)])
    646         query('set client_encoding = sql_ascii')
     674        query('set client_encoding=sql_ascii')
    647675        self.assertRaises(UnicodeError, query, "select $1||', '||$2||'!'",
    648             ('Hello', u'wörld'))
     676            ('Hello', u'ЌОр!'))
    649677
    650678    def testQueryWithMixedParams(self):
     
    690718            "d numeric, f4 real, f8 double precision, m money,"
    691719            "c char(1), v4 varchar(4), c4 char(4), t text)")
     720        # Check whether the test database uses SQL_ASCII - this means
     721        # that it does not consider encoding when calculating lengths.
     722        c.query("set client_encoding=utf8")
     723        cls.has_encoding = c.query(
     724            "select length('À') - length('a')").getresult()[0][0] == 0
    692725        c.close()
    693726
     
    718751            2.345678, 2.25, 2.125, '2.75', 'y', 'q', 'ijk', 'mnop\nstux!')]
    719752
    720     def get_back(self):
     753    @classmethod
     754    def db_len(cls, s, encoding):
     755        if cls.has_encoding:
     756            s = s if isinstance(s, unicode) else s.decode(encoding)
     757        else:
     758            s = s.encode(encoding) if isinstance(s, unicode) else s
     759        return len(s)
     760
     761    def get_back(self, encoding='utf-8'):
    721762        """Convert boolean and decimal values back."""
    722763        data = []
     
    752793            if row[10] is not None:  # char(1)
    753794                self.assertIsInstance(row[10], str)
    754                 self.assertEqual(len(row[10]), 1)
     795                self.assertEqual(self.db_len(row[10], encoding), 1)
    755796            if row[11] is not None:  # varchar(4)
    756797                self.assertIsInstance(row[11], str)
    757                 self.assertLessEqual(len(row[11]), 4)
     798                self.assertLessEqual(self.db_len(row[11], encoding), 4)
    758799            if row[12] is not None:  # char(4)
    759800                self.assertIsInstance(row[12], str)
    760                 self.assertEqual(len(row[12]), 4)
     801                self.assertEqual(self.db_len(row[12], encoding), 4)
    761802                row[12] = row[12].rstrip()
    762803            if row[13] is not None:  # text
     
    768809    def testInserttable1Row(self):
    769810        data = self.data[2:3]
    770         self.c.inserttable("test", data)
     811        self.c.inserttable('test', data)
    771812        self.assertEqual(self.get_back(), data)
    772813
    773814    def testInserttable4Rows(self):
    774815        data = self.data
    775         self.c.inserttable("test", data)
     816        self.c.inserttable('test', data)
    776817        self.assertEqual(self.get_back(), data)
    777818
     
    779820        num_rows = 100
    780821        data = self.data[2:3] * num_rows
    781         self.c.inserttable("test", data)
     822        self.c.inserttable('test', data)
    782823        r = self.c.query("select count(*) from test").getresult()[0][0]
    783824        self.assertEqual(r, num_rows)
     
    787828        data = self.data[2:3]
    788829        for _i in range(num_rows):
    789             self.c.inserttable("test", data)
     830            self.c.inserttable('test', data)
    790831        r = self.c.query("select count(*) from test").getresult()[0][0]
    791832        self.assertEqual(r, num_rows)
     
    793834    def testInserttableNullValues(self):
    794835        data = [(None,) * 14] * 100
    795         self.c.inserttable("test", data)
     836        self.c.inserttable('test', data)
    796837        self.assertEqual(self.get_back(), data)
    797838
     
    801842            1.0 + 1.0 / 32, 1.0 + 1.0 / 32, None,
    802843            "1", "1234", "1234", "1234" * 100)]
    803         self.c.inserttable("test", data)
     844        self.c.inserttable('test', data)
    804845        self.assertEqual(self.get_back(), data)
     846
     847    def testInserttableByteValues(self):
     848        try:
     849            self.c.query("select '€', 'kÀse', 'сыр', 'pont-l''évêque'")
     850        except pg.ProgrammingError:
     851            self.skipTest("database does not support utf8")
     852        # non-ascii chars do not fit in char(1) when there is no encoding
     853        c = u'€' if self.has_encoding else u'$'
     854        row_unicode = (0, 0, long(0), False, u'1970-01-01', u'00:00:00',
     855            0.0, 0.0, 0.0, u'0.0',
     856            c, u'bÀd', u'bÀd', u"kÀse сыр pont-l'évêque")
     857        row_bytes = tuple(s.encode('utf-8')
     858            if isinstance(s, unicode) else s for s in row_unicode)
     859        data = [row_bytes] * 2
     860        self.c.inserttable('test', data)
     861        if unicode_strings:
     862            data = [row_unicode] * 2
     863        self.assertEqual(self.get_back(), data)
     864
     865    def testInserttableUnicodeUtf8(self):
     866        try:
     867            self.c.query("select '€', 'kÀse', 'сыр', 'pont-l''évêque'")
     868        except pg.ProgrammingError:
     869            self.skipTest("database does not support utf8")
     870        # non-ascii chars do not fit in char(1) when there is no encoding
     871        c = u'€' if self.has_encoding else u'$'
     872        row_unicode = (0, 0, long(0), False, u'1970-01-01', u'00:00:00',
     873            0.0, 0.0, 0.0, u'0.0',
     874            c, u'bÀd', u'bÀd', u"kÀse сыр pont-l'évêque")
     875        data = [row_unicode] * 2
     876        self.c.inserttable('test', data)
     877        if not unicode_strings:
     878            row_bytes = tuple(s.encode('utf-8')
     879                if isinstance(s, unicode) else s for s in row_unicode)
     880            data = [row_bytes] * 2
     881        self.assertEqual(self.get_back(), data)
     882
     883    def testInserttableUnicodeLatin1(self):
     884
     885        try:
     886            self.c.query("set client_encoding=latin1")
     887            self.c.query("select 'Â¥'")
     888        except pg.ProgrammingError:
     889            self.skipTest("database does not support latin1")
     890        # non-ascii chars do not fit in char(1) when there is no encoding
     891        c = u'€' if self.has_encoding else u'$'
     892        row_unicode = (0, 0, long(0), False, u'1970-01-01', u'00:00:00',
     893            0.0, 0.0, 0.0, u'0.0',
     894            c, u'bÀd', u'bÀd', u"for kÀse and pont-l'évêque pay in €")
     895        data = [row_unicode]
     896        # cannot encode € sign with latin1 encoding
     897        self.assertRaises(UnicodeEncodeError, self.c.inserttable, 'test', data)
     898        row_unicode = tuple(s.replace(u'€', u'Â¥')
     899            if isinstance(s, unicode) else s for s in row_unicode)
     900        data = [row_unicode] * 2
     901        self.c.inserttable('test', data)
     902        if not unicode_strings:
     903            row_bytes = tuple(s.encode('latin1')
     904                if isinstance(s, unicode) else s for s in row_unicode)
     905            data = [row_bytes] * 2
     906        self.assertEqual(self.get_back('latin1'), data)
     907
     908    def testInserttableUnicodeLatin9(self):
     909        try:
     910            self.c.query("set client_encoding=latin9")
     911            self.c.query("select '€'")
     912        except pg.ProgrammingError:
     913            self.skipTest("database does not support latin9")
     914            return
     915        # non-ascii chars do not fit in char(1) when there is no encoding
     916        c = u'€' if self.has_encoding else u'$'
     917        row_unicode = (0, 0, long(0), False, u'1970-01-01', u'00:00:00',
     918            0.0, 0.0, 0.0, u'0.0',
     919            c, u'bÀd', u'bÀd', u"for kÀse and pont-l'évêque pay in €")
     920        data = [row_unicode] * 2
     921        self.c.inserttable('test', data)
     922        if not unicode_strings:
     923            row_bytes = tuple(s.encode('latin9')
     924                if isinstance(s, unicode) else s for s in row_unicode)
     925            data = [row_bytes] * 2
     926        self.assertEqual(self.get_back('latin9'), data)
     927
     928    def testInserttableNoEncoding(self):
     929        self.c.query("set client_encoding=sql_ascii")
     930        # non-ascii chars do not fit in char(1) when there is no encoding
     931        c = u'€' if self.has_encoding else u'$'
     932        row_unicode = (0, 0, long(0), False, u'1970-01-01', u'00:00:00',
     933            0.0, 0.0, 0.0, u'0.0',
     934            c, u'bÀd', u'bÀd', u"for kÀse and pont-l'évêque pay in €")
     935        data = [row_unicode]
     936        # cannot encode non-ascii unicode without a specific encoding
     937        self.assertRaises(UnicodeEncodeError, self.c.inserttable, 'test', data)
    805938
    806939
     
    10261159                break
    10271160        else:
    1028             self.fail("Cannot set English money locale")
     1161            self.SkipTest("cannot set English money locale")
    10291162        pg.set_decimal_point('.')
    1030         r = query("select '34.25'::money").getresult()[0][0]
     1163        try:
     1164            r = query("select '34.25'::money")
     1165        except pg.ProgrammingError:
     1166            # this can happen if the currency signs cannot be
     1167            # converted using the encoding of the test database
     1168            self.skipTest('database does not support money')
     1169        r = r.getresult()[0][0]
    10311170        self.assertIsInstance(r, d)
    10321171        self.assertEqual(r, d('34.25'))
     
    10431182                break
    10441183        else:
    1045             self.fail("Cannot set English money locale")
     1184            self.SkipTest("cannot set German money locale")
    10461185        pg.set_decimal_point(',')
    1047         r = query("select '34,25'::money").getresult()[0][0]
     1186        try:
     1187            r = query("select '34,25'::money")
     1188        except pg.ProgrammingError:
     1189            self.skipTest('database does not support money')
     1190        r = r.getresult()[0][0]
    10481191        self.assertIsInstance(r, d)
    10491192        self.assertEqual(r, d('34.25'))
    10501193        pg.set_decimal_point('.')
    1051         r = query("select '34,25'::money").getresult()[0][0]
     1194        try:
     1195            r = query("select '34,25'::money")
     1196        except pg.ProgrammingError:
     1197            self.skipTest('database does not support money')
     1198        r = r.getresult()[0][0]
    10521199        self.assertNotEqual(r, d('34.25'))
    10531200        pg.set_decimal_point(point)
Note: See TracChangeset for help on using the changeset viewer.