Changeset 814 for trunk/pgdb.py


Ignore:
Timestamp:
Feb 3, 2016, 3:23:20 PM (3 years ago)
Author:
cito
Message:

Add typecasting of dates, times, timestamps, intervals

So far, PyGreSQL has returned these types only as strings (in various
formats depending on the DateStyle? setting) and left it to the user
to parse and interpret the strings. These types are now properly cast
into the corresponding detetime types of Python, and this works with
any setting of DatesStyle?, even if you change DateStyle? in the middle
of a database session.

To implement this, a fast method for getting the datestyle (cached and
without roundtrip to the database) has been added. Also, the typecast
mechanism has been extended so that typecast functions can optionally
also take the connection as argument.

The date and time typecast functions have been implemented in Python
using the new typecast registry and added to both pg and pgdb. Some
duplication of code in the two modules was unavoidable, since we don't
want the modules to be dependent of each other or install additional
helper modules. One day we might want to change this, put everything
in one package and factor out some of the functionality.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/pgdb.py

    r812 r814  
    7373from math import isnan, isinf
    7474from collections import namedtuple
     75from re import compile as regex
    7576from json import loads as jsondecode, dumps as jsonencode
    7677
     
    110111
    111112### Internal Type Handling
     113
     114try:
     115    from inspect import signature
     116except ImportError:  # Python < 3.3
     117    from inspect import getargspec
     118
     119    get_args = lambda func: getargspec(func).args
     120else:
     121    get_args = lambda func: list(signature(func).parameters)
     122
     123try:
     124    if datetime.strptime('+0100', '%z') is None:
     125        raise ValueError
     126except ValueError:  # Python < 3.2
     127    timezones = None
     128else:
     129    # time zones used in Postgres timestamptz output
     130    timezones = dict(CET='+0100', EET='+0200', EST='-0500',
     131        GMT='+0000', HST='-1000', MET='+0100', MST='-0700',
     132        UCT='+0000', UTC='+0000', WET='+0000')
     133
    112134
    113135def decimal_type(decimal_type=None):
     
    140162    """Cast an int2vector value."""
    141163    return [int(v) for v in value.split()]
     164
     165
     166def cast_date(value, connection):
     167    """Cast a date value."""
     168    # The output format depends on the server setting DateStyle.  The default
     169    # setting ISO and the setting for German are actually unambiguous.  The
     170    # order of days and months in the other two settings is however ambiguous,
     171    # so at least here we need to consult the setting to properly parse values.
     172    if value == '-infinity':
     173        return date.min
     174    if value == 'infinity':
     175        return date.max
     176    value = value.split()
     177    if value[-1] == 'BC':
     178        return date.min
     179    value = value[0]
     180    if len(value) > 10:
     181        return date.max
     182    fmt = connection.date_format()
     183    return datetime.strptime(value, fmt).date()
     184
     185
     186def cast_time(value):
     187    """Cast a time value."""
     188    fmt = '%H:%M:%S.%f' if len(value) > 8 else '%H:%M:%S'
     189    return datetime.strptime(value, fmt).time()
     190
     191
     192_re_timezone = regex('(.*)([+-].*)')
     193
     194
     195def cast_timetz(value):
     196    """Cast a timetz value."""
     197    tz = _re_timezone.match(value)
     198    if tz:
     199        value, tz = tz.groups()
     200    else:
     201        tz = '+0000'
     202    fmt = '%H:%M:%S.%f' if len(value) > 8 else '%H:%M:%S'
     203    if timezones:
     204        if tz.startswith(('+', '-')):
     205            if len(tz) < 5:
     206                tz += '00'
     207            else:
     208                tz = tz.replace(':', '')
     209        elif tz in timezones:
     210            tz = timezones[tz]
     211        else:
     212            tz = '+0000'
     213        value += tz
     214        fmt += '%z'
     215    return datetime.strptime(value, fmt).timetz()
     216
     217
     218def cast_timestamp(value, connection):
     219    """Cast a timestamp value."""
     220    if value == '-infinity':
     221        return datetime.min
     222    if value == 'infinity':
     223        return datetime.max
     224    value = value.split()
     225    if value[-1] == 'BC':
     226        return datetime.min
     227    fmt = connection.date_format()
     228    if fmt.endswith('-%Y') and len(value) > 2:
     229        value = value[1:5]
     230        if len(value[3]) > 4:
     231            return datetime.max
     232        fmt = ['%d %b' if fmt.startswith('%d') else '%b %d',
     233            '%H:%M:%S.%f' if len(value[2]) > 8 else '%H:%M:%S', '%Y']
     234    else:
     235        if len(value[0]) > 10:
     236            return datetime.max
     237        fmt = [fmt, '%H:%M:%S.%f' if len(value[1]) > 8 else '%H:%M:%S']
     238    return datetime.strptime(' '.join(value), ' '.join(fmt))
     239
     240
     241def cast_timestamptz(value, connection):
     242    """Cast a timestamptz value."""
     243    if value == '-infinity':
     244        return datetime.min
     245    if value == 'infinity':
     246        return datetime.max
     247    value = value.split()
     248    if value[-1] == 'BC':
     249        return datetime.min
     250    fmt = connection.date_format()
     251    if fmt.endswith('-%Y') and len(value) > 2:
     252        value = value[1:]
     253        if len(value[3]) > 4:
     254            return datetime.max
     255        fmt = ['%d %b' if fmt.startswith('%d') else '%b %d',
     256            '%H:%M:%S.%f' if len(value[2]) > 8 else '%H:%M:%S', '%Y']
     257        value, tz = value[:-1], value[-1]
     258    else:
     259        if fmt.startswith('%Y-'):
     260            tz = _re_timezone.match(value[1])
     261            if tz:
     262                value[1], tz = tz.groups()
     263            else:
     264                tz = '+0000'
     265        else:
     266            value, tz = value[:-1], value[-1]
     267        if len(value[0]) > 10:
     268            return datetime.max
     269        fmt = [fmt, '%H:%M:%S.%f' if len(value[1]) > 8 else '%H:%M:%S']
     270    if timezones:
     271        if tz.startswith(('+', '-')):
     272            if len(tz) < 5:
     273                tz += '00'
     274            else:
     275                tz = tz.replace(':', '')
     276        elif tz in timezones:
     277            tz = timezones[tz]
     278        else:
     279            tz = '+0000'
     280        value.append(tz)
     281        fmt.append('%z')
     282    return datetime.strptime(' '.join(value), ' '.join(fmt))
     283
     284_re_interval_sql_standard = regex(
     285    '(?:([+-])?([0-9]+)-([0-9]+) ?)?'
     286    '(?:([+-]?[0-9]+)(?!:) ?)?'
     287    '(?:([+-])?([0-9]+):([0-9]+):([0-9]+)(?:\\.([0-9]+))?)?')
     288
     289_re_interval_postgres = regex(
     290    '(?:([+-]?[0-9]+) ?years? ?)?'
     291    '(?:([+-]?[0-9]+) ?mons? ?)?'
     292    '(?:([+-]?[0-9]+) ?days? ?)?'
     293    '(?:([+-])?([0-9]+):([0-9]+):([0-9]+)(?:\\.([0-9]+))?)?')
     294
     295_re_interval_postgres_verbose = regex(
     296    '@ ?(?:([+-]?[0-9]+) ?years? ?)?'
     297    '(?:([+-]?[0-9]+) ?mons? ?)?'
     298    '(?:([+-]?[0-9]+) ?days? ?)?'
     299    '(?:([+-]?[0-9]+) ?hours? ?)?'
     300    '(?:([+-]?[0-9]+) ?mins? ?)?'
     301    '(?:([+-])?([0-9]+)(?:\\.([0-9]+))? ?secs?)? ?(ago)?')
     302
     303_re_interval_iso_8601 = regex(
     304    'P(?:([+-]?[0-9]+)Y)?'
     305    '(?:([+-]?[0-9]+)M)?'
     306    '(?:([+-]?[0-9]+)D)?'
     307    '(?:T(?:([+-]?[0-9]+)H)?'
     308    '(?:([+-]?[0-9]+)M)?'
     309    '(?:([+-])?([0-9]+)(?:\\.([0-9]+))?S)?)?')
     310
     311
     312def cast_interval(value):
     313    """Cast an interval value."""
     314    # The output format depends on the server setting IntervalStyle, but it's
     315    # not necessary to consult this setting to parse it.  It's faster to just
     316    # check all possible formats, and there is no ambiguity here.
     317    m = _re_interval_iso_8601.match(value)
     318    if m:
     319        m = [d or '0' for d in m.groups()]
     320        secs_ago = m.pop(5) == '-'
     321        m = [int(d) for d in m]
     322        years, mons, days, hours, mins, secs, usecs = m
     323        if secs_ago:
     324            secs = -secs
     325            usecs = -usecs
     326    else:
     327        m = _re_interval_postgres_verbose.match(value)
     328        if m:
     329            m, ago = [d or '0' for d in m.groups()[:8]], m.group(9)
     330            secs_ago = m.pop(5) == '-'
     331            m = [-int(d) for d in m] if ago else [int(d) for d in m]
     332            years, mons, days, hours, mins, secs, usecs = m
     333            if secs_ago:
     334                secs = - secs
     335                usecs = -usecs
     336        else:
     337            m = _re_interval_postgres.match(value)
     338            if m and any(m.groups()):
     339                m = [d or '0' for d in m.groups()]
     340                hours_ago = m.pop(3) == '-'
     341                m = [int(d) for d in m]
     342                years, mons, days, hours, mins, secs, usecs = m
     343                if hours_ago:
     344                    hours = -hours
     345                    mins = -mins
     346                    secs = -secs
     347                    usecs = -usecs
     348            else:
     349                m = _re_interval_sql_standard.match(value)
     350                if m and any(m.groups()):
     351                    m = [d or '0' for d in m.groups()]
     352                    years_ago = m.pop(0) == '-'
     353                    hours_ago = m.pop(3) == '-'
     354                    m = [int(d) for d in m]
     355                    years, mons, days, hours, mins, secs, usecs = m
     356                    if years_ago:
     357                        years = -years
     358                        mons = -mons
     359                    if hours_ago:
     360                        hours = -hours
     361                        mins = -mins
     362                        secs = -secs
     363                        usecs = -usecs
     364                else:
     365                    raise ValueError('Cannot parse interval: %s' % value)
     366    days += 365 * years + 30 * mons
     367    return timedelta(days=days, hours=hours, minutes=mins,
     368        seconds=secs, microseconds=usecs)
    142369
    143370
     
    161388        'float4': float, 'float8': float,
    162389        'numeric': Decimal, 'money': cast_money,
     390        'date': cast_date, 'interval': cast_interval,
     391        'time': cast_time, 'timetz': cast_timetz,
     392        'timestamp': cast_timestamp, 'timestamptz': cast_timestamptz,
    163393        'int2vector': cast_int2vector,
    164394        'anyarray': cast_array, 'record': cast_record}
     395
     396    connection = None  # will be set in local connection specific instances
    165397
    166398    def __missing__(self, typ):
     
    175407        if cast:
    176408            # store default for faster access
     409            cast = self._add_connection(cast)
    177410            self[typ] = cast
    178411        elif typ.startswith('_'):
     
    185418        return cast
    186419
     420    @staticmethod
     421    def _needs_connection(func):
     422        """Check if a typecast function needs a connection argument."""
     423        try:
     424            args = get_args(func)
     425        except (TypeError, ValueError):
     426            return False
     427        else:
     428            return 'connection' in args[1:]
     429
     430    def _add_connection(self, cast):
     431        """Add a connection argument to the typecast function if necessary."""
     432        if not self.connection or not self._needs_connection(cast):
     433            return cast
     434        connection = self.connection
     435        return lambda value: cast(value, connection=connection)
     436
    187437    def get(self, typ, default=None):
    188438        """Get the typecast function for the given database type."""
     
    201451                raise TypeError("Cast parameter must be callable")
    202452            for t in typ:
    203                 self[t] = cast
     453                self[t] = self._add_connection(cast)
    204454                self.pop('_%s' % t, None)
    205455
     
    219469                cast = defaults.get(t)
    220470                if cast:
    221                     self[t] = cast
     471                    self[t] = self._add_connection(cast)
    222472                    t = '_%s' % t
    223473                    cast = defaults.get(t)
    224474                    if cast:
    225                         self[t] = cast
     475                        self[t] = self._add_connection(cast)
    226476                    else:
    227477                        self.pop(t, None)
     
    272522
    273523    defaults = _typecasts
     524
     525    connection = None  # will be set in a connection specific instance
    274526
    275527    def __missing__(self, typ):
     
    283535            cast = self.defaults.get(typ)
    284536            if cast:
     537                cast = self._add_connection(cast)
    285538                self[typ] = cast
    286539            else:
     
    338591        self._typecasts = LocalTypecasts()
    339592        self._typecasts.get_fields = self.get_fields
     593        self._typecasts.connection = cnx
    340594
    341595    def __missing__(self, key):
     
    12991553# Additional type helpers for PyGreSQL:
    13001554
     1555def Interval(days, hours=0, minutes=0, seconds=0, microseconds=0):
     1556    """Construct an object holding a time inverval value."""
     1557    return timedelta(days, hours=hours, minutes=minutes, seconds=seconds,
     1558        microseconds=microseconds)
     1559
    13011560class Bytea(bytes):
    13021561    """Construct an object capable of holding a bytea value."""
Note: See TracChangeset for help on using the changeset viewer.