Changeset 781 for trunk/pgdb.py


Ignore:
Timestamp:
Jan 25, 2016, 3:44:52 PM (4 years ago)
Author:
cito
Message:

Add full support for PostgreSQL array types

At the core of this patch is a fast parser for the peculiar syntax of
literal array expressions in PostgreSQL that was added to the C module.
This is not trivial, because PostgreSQL arrays can be multidimensional
and the syntax is different from Python and SQL expressions.

The Python pg and pgdb modules make use of this parser so that they can
return database columns containing PostgreSQL arrays to Python as lists.
Also added quoting methods that allow passing PostgreSQL arrays as lists
to insert()/update() and execute/executemany(). These methods are simpler
and were implemented in Python but needed support from the regex module.

The patch also adds makes getresult() in pg automatically return bytea
values in unescaped form as bytes strings. Before, it was necessary to
call unescape_bytea manually. The pgdb module did this already.

The patch includes some more refactorings and simplifications regarding
the quoting and casting in pg and pgdb.

Some references to antique PostgreSQL types that are not used any more
in the supported PostgreSQL versions have been removed.

Also added documentation and tests for the new features.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/pgdb.py

    r774 r781  
    7373from math import isnan, isinf
    7474from collections import namedtuple
     75from re import compile as regex
    7576from json import loads as jsondecode, dumps as jsonencode
    7677
     
    166167    @staticmethod
    167168    def typecast(typ, value):
    168         """Cast value to database type."""
     169        """Cast value according to database type."""
    169170        if value is None:
    170171            # for NULL values, no typecast is necessary
     
    172173        cast = _cast.get(typ)
    173174        if cast is None:
     175            if typ.startswith('_'):
     176                # cast as an array type
     177                cast = _cast.get(typ[1:])
     178                return cast_array(value, cast)
    174179            # no typecast available or necessary
    175180            return value
     
    193198
    194199
     200_re_array_escape = regex(r'(["\\])')
     201_re_array_quote = regex(r'[{},"\\\s]|^[Nn][Uu][Ll][Ll]$')
     202
    195203class _quotedict(dict):
    196204    """Dictionary with auto quoting of its items.
     
    241249    def _quote(self, val):
    242250        """Quote value depending on its type."""
     251        if val is None:
     252            return 'NULL'
    243253        if isinstance(val, (datetime, date, time, timedelta, Json)):
    244254            val = str(val)
     
    250260            else:
    251261                val = self._cnx.escape_string(val)
    252             val = "'%s'" % val
    253         elif isinstance(val, (int, long)):
    254             pass
    255         elif isinstance(val, float):
     262            return "'%s'" % val
     263        if isinstance(val, float):
    256264            if isinf(val):
    257265                return "'-Infinity'" if val < 0 else "'Infinity'"
    258             elif isnan(val):
     266            if isnan(val):
    259267                return "'NaN'"
    260         elif val is None:
    261             val = 'NULL'
    262         elif isinstance(val, list):
     268            return val
     269        if isinstance(val, (int, long, Decimal)):
     270            return val
     271        if isinstance(val, list):
     272            return "'%s'" % self._quote_array(val)
     273        if isinstance(val, tuple):
    263274            q = self._quote
    264             val = 'ARRAY[%s]' % ','.join(str(q(v)) for v in val)
    265         elif isinstance(val, tuple):
    266             q = self._quote
    267             val = 'ROW(%s)' % ','.join(str(q(v)) for v in val)
    268         elif Decimal is not float and isinstance(val, Decimal):
    269             pass
    270         elif hasattr(val, '__pg_repr__'):
    271             val = val.__pg_repr__()
    272         else:
     275            return 'ROW(%s)' % ','.join(str(q(v)) for v in val)
     276        try:
     277            return val.__pg_repr__()
     278        except AttributeError:
    273279            raise InterfaceError(
    274280                'do not know how to handle type %s' % type(val))
    275         return val
     281
     282    def _quote_array(self, val):
     283        """Quote value as a literal constant for an array."""
     284        # We could also cast to an array constructor here, but that is more
     285        # verbose and you need to know the base type to build emtpy arrays.
     286        if isinstance(val, list):
     287            return '{%s}' % ','.join(self._quote_array(v) for v in val)
     288        if val is None:
     289            return 'null'
     290        if isinstance(val, (int, long, float)):
     291            return str(val)
     292        if isinstance(val, bool):
     293            return 't' if val else 'f'
     294        if isinstance(val, basestring):
     295            if not val:
     296                return '""'
     297            if _re_array_quote.search(val):
     298                return '"%s"' % _re_array_escape.sub(r'\\\1', val)
     299            return val
     300        try:
     301            return val.__pg_repr__()
     302        except AttributeError:
     303            raise InterfaceError(
     304                'do not know how to handle type %s' % type(val))
    276305
    277306    def _quoteparams(self, string, parameters):
     
    902931    def __eq__(self, other):
    903932        if isinstance(other, basestring):
     933            if other.startswith('_'):
     934                other = other[1:]
    904935            return other in self
    905936        else:
     
    908939    def __ne__(self, other):
    909940        if isinstance(other, basestring):
     941            if other.startswith('_'):
     942                other = other[1:]
    910943            return other not in self
    911944        else:
    912945            return super(Type, self).__ne__(other)
     946
     947
     948class ArrayType:
     949    """Type class for PostgreSQL array types."""
     950
     951    def __eq__(self, other):
     952        if isinstance(other, basestring):
     953            return other.startswith('_')
     954        else:
     955            return isinstance(other, ArrayType)
     956
     957    def __ne__(self, other):
     958        if isinstance(other, basestring):
     959            return not other.startswith('_')
     960        else:
     961            return not isinstance(other, ArrayType)
    913962
    914963
     
    918967BINARY = Type('bytea')
    919968NUMBER = Type('int2 int4 serial int8 float4 float8 numeric money')
    920 DATETIME = Type('date time timetz timestamp timestamptz datetime abstime'
    921     ' interval tinterval timespan reltime')
    922 ROWID = Type('oid oid8')
     969DATETIME = Type('date time timetz timestamp timestamptz interval'
     970    ' abstime reltime')  # these are very old
     971ROWID = Type('oid')
    923972
    924973
     
    934983DATE = Type('date')
    935984TIME = Type('time timetz')
    936 TIMESTAMP = Type('timestamp timestamptz datetime abstime')
    937 INTERVAL = Type('interval tinterval timespan reltime')
     985TIMESTAMP = Type('timestamp timestamptz')
     986INTERVAL = Type('interval')
    938987JSON = Type('json jsonb')
     988
     989# Type object for arrays (also equate to their base types):
     990
     991ARRAY = ArrayType()
    939992
    940993
Note: See TracChangeset for help on using the changeset viewer.