You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1614 lines
57 KiB
1614 lines
57 KiB
# -*- coding: utf-8 -*-
|
|
"""
|
|
This module offers a generic date/time string parser which is able to parse
|
|
most known formats to represent a date and/or time.
|
|
|
|
This module attempts to be forgiving with regards to unlikely input formats,
|
|
returning a datetime object even for dates which are ambiguous. If an element
|
|
of a date/time stamp is omitted, the following rules are applied:
|
|
|
|
- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
|
|
on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
|
|
specified.
|
|
- If a time zone is omitted, a timezone-naive datetime is returned.
|
|
|
|
If any other elements are missing, they are taken from the
|
|
:class:`datetime.datetime` object passed to the parameter ``default``. If this
|
|
results in a day number exceeding the valid number of days per month, the
|
|
value falls back to the end of the month.
|
|
|
|
Additional resources about date/time string formats can be found below:
|
|
|
|
- `A summary of the international standard date and time notation
|
|
<https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
|
|
- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
|
|
- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
|
|
- `CPAN ParseDate module
|
|
<https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
|
|
- `Java SimpleDateFormat Class
|
|
<https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
|
|
"""
|
|
from __future__ import unicode_literals
|
|
|
|
import datetime
|
|
import re
|
|
import string
|
|
import time
|
|
import warnings
|
|
|
|
from calendar import monthrange
|
|
from io import StringIO
|
|
|
|
import six
|
|
from six import integer_types, text_type
|
|
|
|
from decimal import Decimal
|
|
|
|
from warnings import warn
|
|
|
|
from .. import relativedelta
|
|
from .. import tz
|
|
|
|
__all__ = ["parse", "parserinfo", "ParserError"]
|
|
|
|
|
|
# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth
|
|
# making public and/or figuring out if there is something we can
|
|
# take off their plate.
|
|
class _timelex(object):
|
|
# Fractional seconds are sometimes split by a comma
|
|
_split_decimal = re.compile("([.,])")
|
|
|
|
def __init__(self, instream):
|
|
if isinstance(instream, (bytes, bytearray)):
|
|
instream = instream.decode()
|
|
|
|
if isinstance(instream, text_type):
|
|
instream = StringIO(instream)
|
|
elif getattr(instream, 'read', None) is None:
|
|
raise TypeError('Parser must be a string or character stream, not '
|
|
'{itype}'.format(itype=instream.__class__.__name__))
|
|
|
|
self.instream = instream
|
|
self.charstack = []
|
|
self.tokenstack = []
|
|
self.eof = False
|
|
|
|
def get_token(self):
|
|
"""
|
|
This function breaks the time string into lexical units (tokens), which
|
|
can be parsed by the parser. Lexical units are demarcated by changes in
|
|
the character set, so any continuous string of letters is considered
|
|
one unit, any continuous string of numbers is considered one unit.
|
|
|
|
The main complication arises from the fact that dots ('.') can be used
|
|
both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
|
|
"4:30:21.447"). As such, it is necessary to read the full context of
|
|
any dot-separated strings before breaking it into tokens; as such, this
|
|
function maintains a "token stack", for when the ambiguous context
|
|
demands that multiple tokens be parsed at once.
|
|
"""
|
|
if self.tokenstack:
|
|
return self.tokenstack.pop(0)
|
|
|
|
seenletters = False
|
|
token = None
|
|
state = None
|
|
|
|
while not self.eof:
|
|
# We only realize that we've reached the end of a token when we
|
|
# find a character that's not part of the current token - since
|
|
# that character may be part of the next token, it's stored in the
|
|
# charstack.
|
|
if self.charstack:
|
|
nextchar = self.charstack.pop(0)
|
|
else:
|
|
nextchar = self.instream.read(1)
|
|
while nextchar == '\x00':
|
|
nextchar = self.instream.read(1)
|
|
|
|
if not nextchar:
|
|
self.eof = True
|
|
break
|
|
elif not state:
|
|
# First character of the token - determines if we're starting
|
|
# to parse a word, a number or something else.
|
|
token = nextchar
|
|
if self.isword(nextchar):
|
|
state = 'a'
|
|
elif self.isnum(nextchar):
|
|
state = '0'
|
|
elif self.isspace(nextchar):
|
|
token = ' '
|
|
break # emit token
|
|
else:
|
|
break # emit token
|
|
elif state == 'a':
|
|
# If we've already started reading a word, we keep reading
|
|
# letters until we find something that's not part of a word.
|
|
seenletters = True
|
|
if self.isword(nextchar):
|
|
token += nextchar
|
|
elif nextchar == '.':
|
|
token += nextchar
|
|
state = 'a.'
|
|
else:
|
|
self.charstack.append(nextchar)
|
|
break # emit token
|
|
elif state == '0':
|
|
# If we've already started reading a number, we keep reading
|
|
# numbers until we find something that doesn't fit.
|
|
if self.isnum(nextchar):
|
|
token += nextchar
|
|
elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
|
|
token += nextchar
|
|
state = '0.'
|
|
else:
|
|
self.charstack.append(nextchar)
|
|
break # emit token
|
|
elif state == 'a.':
|
|
# If we've seen some letters and a dot separator, continue
|
|
# parsing, and the tokens will be broken up later.
|
|
seenletters = True
|
|
if nextchar == '.' or self.isword(nextchar):
|
|
token += nextchar
|
|
elif self.isnum(nextchar) and token[-1] == '.':
|
|
token += nextchar
|
|
state = '0.'
|
|
else:
|
|
self.charstack.append(nextchar)
|
|
break # emit token
|
|
elif state == '0.':
|
|
# If we've seen at least one dot separator, keep going, we'll
|
|
# break up the tokens later.
|
|
if nextchar == '.' or self.isnum(nextchar):
|
|
token += nextchar
|
|
elif self.isword(nextchar) and token[-1] == '.':
|
|
token += nextchar
|
|
state = 'a.'
|
|
else:
|
|
self.charstack.append(nextchar)
|
|
break # emit token
|
|
|
|
if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
|
|
token[-1] in '.,')):
|
|
l = self._split_decimal.split(token)
|
|
token = l[0]
|
|
for tok in l[1:]:
|
|
if tok:
|
|
self.tokenstack.append(tok)
|
|
|
|
if state == '0.' and token.count('.') == 0:
|
|
token = token.replace(',', '.')
|
|
|
|
return token
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
token = self.get_token()
|
|
if token is None:
|
|
raise StopIteration
|
|
|
|
return token
|
|
|
|
def next(self):
|
|
return self.__next__() # Python 2.x support
|
|
|
|
@classmethod
|
|
def split(cls, s):
|
|
return list(cls(s))
|
|
|
|
@classmethod
|
|
def isword(cls, nextchar):
|
|
""" Whether or not the next character is part of a word """
|
|
return nextchar.isalpha()
|
|
|
|
@classmethod
|
|
def isnum(cls, nextchar):
|
|
""" Whether the next character is part of a number """
|
|
return nextchar.isdigit()
|
|
|
|
@classmethod
|
|
def isspace(cls, nextchar):
|
|
""" Whether the next character is whitespace """
|
|
return nextchar.isspace()
|
|
|
|
|
|
class _resultbase(object):
|
|
|
|
def __init__(self):
|
|
for attr in self.__slots__:
|
|
setattr(self, attr, None)
|
|
|
|
def _repr(self, classname):
|
|
l = []
|
|
for attr in self.__slots__:
|
|
value = getattr(self, attr)
|
|
if value is not None:
|
|
l.append("%s=%s" % (attr, repr(value)))
|
|
return "%s(%s)" % (classname, ", ".join(l))
|
|
|
|
def __len__(self):
|
|
return (sum(getattr(self, attr) is not None
|
|
for attr in self.__slots__))
|
|
|
|
def __repr__(self):
|
|
return self._repr(self.__class__.__name__)
|
|
|
|
|
|
class parserinfo(object):
|
|
"""
|
|
Class which handles what inputs are accepted. Subclass this to customize
|
|
the language and acceptable values for each parameter.
|
|
|
|
:param dayfirst:
|
|
Whether to interpret the first value in an ambiguous 3-integer date
|
|
(e.g. 01/05/09) as the day (``True``) or month (``False``). If
|
|
``yearfirst`` is set to ``True``, this distinguishes between YDM
|
|
and YMD. Default is ``False``.
|
|
|
|
:param yearfirst:
|
|
Whether to interpret the first value in an ambiguous 3-integer date
|
|
(e.g. 01/05/09) as the year. If ``True``, the first number is taken
|
|
to be the year, otherwise the last number is taken to be the year.
|
|
Default is ``False``.
|
|
"""
|
|
|
|
# m from a.m/p.m, t from ISO T separator
|
|
JUMP = [" ", ".", ",", ";", "-", "/", "'",
|
|
"at", "on", "and", "ad", "m", "t", "of",
|
|
"st", "nd", "rd", "th"]
|
|
|
|
WEEKDAYS = [("Mon", "Monday"),
|
|
("Tue", "Tuesday"), # TODO: "Tues"
|
|
("Wed", "Wednesday"),
|
|
("Thu", "Thursday"), # TODO: "Thurs"
|
|
("Fri", "Friday"),
|
|
("Sat", "Saturday"),
|
|
("Sun", "Sunday")]
|
|
MONTHS = [("Jan", "January"),
|
|
("Feb", "February"), # TODO: "Febr"
|
|
("Mar", "March"),
|
|
("Apr", "April"),
|
|
("May", "May"),
|
|
("Jun", "June"),
|
|
("Jul", "July"),
|
|
("Aug", "August"),
|
|
("Sep", "Sept", "September"),
|
|
("Oct", "October"),
|
|
("Nov", "November"),
|
|
("Dec", "December")]
|
|
HMS = [("h", "hour", "hours"),
|
|
("m", "minute", "minutes"),
|
|
("s", "second", "seconds")]
|
|
AMPM = [("am", "a"),
|
|
("pm", "p")]
|
|
UTCZONE = ["UTC", "GMT", "Z", "z"]
|
|
PERTAIN = ["of"]
|
|
TZOFFSET = {}
|
|
# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
|
|
# "Anno Domini", "Year of Our Lord"]
|
|
|
|
def __init__(self, dayfirst=False, yearfirst=False):
|
|
self._jump = self._convert(self.JUMP)
|
|
self._weekdays = self._convert(self.WEEKDAYS)
|
|
self._months = self._convert(self.MONTHS)
|
|
self._hms = self._convert(self.HMS)
|
|
self._ampm = self._convert(self.AMPM)
|
|
self._utczone = self._convert(self.UTCZONE)
|
|
self._pertain = self._convert(self.PERTAIN)
|
|
|
|
self.dayfirst = dayfirst
|
|
self.yearfirst = yearfirst
|
|
|
|
self._year = time.localtime().tm_year
|
|
self._century = self._year // 100 * 100
|
|
|
|
def _convert(self, lst):
|
|
dct = {}
|
|
for i, v in enumerate(lst):
|
|
if isinstance(v, tuple):
|
|
for v in v:
|
|
dct[v.lower()] = i
|
|
else:
|
|
dct[v.lower()] = i
|
|
return dct
|
|
|
|
def jump(self, name):
|
|
return name.lower() in self._jump
|
|
|
|
def weekday(self, name):
|
|
try:
|
|
return self._weekdays[name.lower()]
|
|
except KeyError:
|
|
pass
|
|
return None
|
|
|
|
def month(self, name):
|
|
try:
|
|
return self._months[name.lower()] + 1
|
|
except KeyError:
|
|
pass
|
|
return None
|
|
|
|
def hms(self, name):
|
|
try:
|
|
return self._hms[name.lower()]
|
|
except KeyError:
|
|
return None
|
|
|
|
def ampm(self, name):
|
|
try:
|
|
return self._ampm[name.lower()]
|
|
except KeyError:
|
|
return None
|
|
|
|
def pertain(self, name):
|
|
return name.lower() in self._pertain
|
|
|
|
def utczone(self, name):
|
|
return name.lower() in self._utczone
|
|
|
|
def tzoffset(self, name):
|
|
if name in self._utczone:
|
|
return 0
|
|
|
|
return self.TZOFFSET.get(name)
|
|
|
|
def convertyear(self, year, century_specified=False):
|
|
"""
|
|
Converts two-digit years to year within [-50, 49]
|
|
range of self._year (current local time)
|
|
"""
|
|
|
|
# Function contract is that the year is always positive
|
|
assert year >= 0
|
|
|
|
if year < 100 and not century_specified:
|
|
# assume current century to start
|
|
year += self._century
|
|
|
|
if year >= self._year + 50: # if too far in future
|
|
year -= 100
|
|
elif year < self._year - 50: # if too far in past
|
|
year += 100
|
|
|
|
return year
|
|
|
|
def validate(self, res):
|
|
# move to info
|
|
if res.year is not None:
|
|
res.year = self.convertyear(res.year, res.century_specified)
|
|
|
|
if ((res.tzoffset == 0 and not res.tzname) or
|
|
(res.tzname == 'Z' or res.tzname == 'z')):
|
|
res.tzname = "UTC"
|
|
res.tzoffset = 0
|
|
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
|
|
res.tzoffset = 0
|
|
return True
|
|
|
|
|
|
class _ymd(list):
|
|
def __init__(self, *args, **kwargs):
|
|
super(self.__class__, self).__init__(*args, **kwargs)
|
|
self.century_specified = False
|
|
self.dstridx = None
|
|
self.mstridx = None
|
|
self.ystridx = None
|
|
|
|
@property
|
|
def has_year(self):
|
|
return self.ystridx is not None
|
|
|
|
@property
|
|
def has_month(self):
|
|
return self.mstridx is not None
|
|
|
|
@property
|
|
def has_day(self):
|
|
return self.dstridx is not None
|
|
|
|
def could_be_day(self, value):
|
|
if self.has_day:
|
|
return False
|
|
elif not self.has_month:
|
|
return 1 <= value <= 31
|
|
elif not self.has_year:
|
|
# Be permissive, assume leap year
|
|
month = self[self.mstridx]
|
|
return 1 <= value <= monthrange(2000, month)[1]
|
|
else:
|
|
month = self[self.mstridx]
|
|
year = self[self.ystridx]
|
|
return 1 <= value <= monthrange(year, month)[1]
|
|
|
|
def append(self, val, label=None):
|
|
if hasattr(val, '__len__'):
|
|
if val.isdigit() and len(val) > 2:
|
|
self.century_specified = True
|
|
if label not in [None, 'Y']: # pragma: no cover
|
|
raise ValueError(label)
|
|
label = 'Y'
|
|
elif val > 100:
|
|
self.century_specified = True
|
|
if label not in [None, 'Y']: # pragma: no cover
|
|
raise ValueError(label)
|
|
label = 'Y'
|
|
|
|
super(self.__class__, self).append(int(val))
|
|
|
|
if label == 'M':
|
|
if self.has_month:
|
|
raise ValueError('Month is already set')
|
|
self.mstridx = len(self) - 1
|
|
elif label == 'D':
|
|
if self.has_day:
|
|
raise ValueError('Day is already set')
|
|
self.dstridx = len(self) - 1
|
|
elif label == 'Y':
|
|
if self.has_year:
|
|
raise ValueError('Year is already set')
|
|
self.ystridx = len(self) - 1
|
|
|
|
def _resolve_from_stridxs(self, strids):
|
|
"""
|
|
Try to resolve the identities of year/month/day elements using
|
|
ystridx, mstridx, and dstridx, if enough of these are specified.
|
|
"""
|
|
if len(self) == 3 and len(strids) == 2:
|
|
# we can back out the remaining stridx value
|
|
missing = [x for x in range(3) if x not in strids.values()]
|
|
key = [x for x in ['y', 'm', 'd'] if x not in strids]
|
|
assert len(missing) == len(key) == 1
|
|
key = key[0]
|
|
val = missing[0]
|
|
strids[key] = val
|
|
|
|
assert len(self) == len(strids) # otherwise this should not be called
|
|
out = {key: self[strids[key]] for key in strids}
|
|
return (out.get('y'), out.get('m'), out.get('d'))
|
|
|
|
def resolve_ymd(self, yearfirst, dayfirst):
|
|
len_ymd = len(self)
|
|
year, month, day = (None, None, None)
|
|
|
|
strids = (('y', self.ystridx),
|
|
('m', self.mstridx),
|
|
('d', self.dstridx))
|
|
|
|
strids = {key: val for key, val in strids if val is not None}
|
|
if (len(self) == len(strids) > 0 or
|
|
(len(self) == 3 and len(strids) == 2)):
|
|
return self._resolve_from_stridxs(strids)
|
|
|
|
mstridx = self.mstridx
|
|
|
|
if len_ymd > 3:
|
|
raise ValueError("More than three YMD values")
|
|
elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
|
|
# One member, or two members with a month string
|
|
if mstridx is not None:
|
|
month = self[mstridx]
|
|
# since mstridx is 0 or 1, self[mstridx-1] always
|
|
# looks up the other element
|
|
other = self[mstridx - 1]
|
|
else:
|
|
other = self[0]
|
|
|
|
if len_ymd > 1 or mstridx is None:
|
|
if other > 31:
|
|
year = other
|
|
else:
|
|
day = other
|
|
|
|
elif len_ymd == 2:
|
|
# Two members with numbers
|
|
if self[0] > 31:
|
|
# 99-01
|
|
year, month = self
|
|
elif self[1] > 31:
|
|
# 01-99
|
|
month, year = self
|
|
elif dayfirst and self[1] <= 12:
|
|
# 13-01
|
|
day, month = self
|
|
else:
|
|
# 01-13
|
|
month, day = self
|
|
|
|
elif len_ymd == 3:
|
|
# Three members
|
|
if mstridx == 0:
|
|
if self[1] > 31:
|
|
# Apr-2003-25
|
|
month, year, day = self
|
|
else:
|
|
month, day, year = self
|
|
elif mstridx == 1:
|
|
if self[0] > 31 or (yearfirst and self[2] <= 31):
|
|
# 99-Jan-01
|
|
year, month, day = self
|
|
else:
|
|
# 01-Jan-01
|
|
# Give precedence to day-first, since
|
|
# two-digit years is usually hand-written.
|
|
day, month, year = self
|
|
|
|
elif mstridx == 2:
|
|
# WTF!?
|
|
if self[1] > 31:
|
|
# 01-99-Jan
|
|
day, year, month = self
|
|
else:
|
|
# 99-01-Jan
|
|
year, day, month = self
|
|
|
|
else:
|
|
if (self[0] > 31 or
|
|
self.ystridx == 0 or
|
|
(yearfirst and self[1] <= 12 and self[2] <= 31)):
|
|
# 99-01-01
|
|
if dayfirst and self[2] <= 12:
|
|
year, day, month = self
|
|
else:
|
|
year, month, day = self
|
|
elif self[0] > 12 or (dayfirst and self[1] <= 12):
|
|
# 13-01-01
|
|
day, month, year = self
|
|
else:
|
|
# 01-13-01
|
|
month, day, year = self
|
|
|
|
return year, month, day
|
|
|
|
|
|
class parser(object):
|
|
def __init__(self, info=None):
|
|
self.info = info or parserinfo()
|
|
|
|
def parse(self, timestr, default=None,
|
|
ignoretz=False, tzinfos=None, **kwargs):
|
|
"""
|
|
Parse the date/time string into a :class:`datetime.datetime` object.
|
|
|
|
:param timestr:
|
|
Any date/time string using the supported formats.
|
|
|
|
:param default:
|
|
The default datetime object, if this is a datetime object and not
|
|
``None``, elements specified in ``timestr`` replace elements in the
|
|
default object.
|
|
|
|
:param ignoretz:
|
|
If set ``True``, time zones in parsed strings are ignored and a
|
|
naive :class:`datetime.datetime` object is returned.
|
|
|
|
:param tzinfos:
|
|
Additional time zone names / aliases which may be present in the
|
|
string. This argument maps time zone names (and optionally offsets
|
|
from those time zones) to time zones. This parameter can be a
|
|
dictionary with timezone aliases mapping time zone names to time
|
|
zones or a function taking two parameters (``tzname`` and
|
|
``tzoffset``) and returning a time zone.
|
|
|
|
The timezones to which the names are mapped can be an integer
|
|
offset from UTC in seconds or a :class:`tzinfo` object.
|
|
|
|
.. doctest::
|
|
:options: +NORMALIZE_WHITESPACE
|
|
|
|
>>> from dateutil.parser import parse
|
|
>>> from dateutil.tz import gettz
|
|
>>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
|
|
>>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
|
|
datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
|
|
>>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
|
|
datetime.datetime(2012, 1, 19, 17, 21,
|
|
tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
|
|
|
|
This parameter is ignored if ``ignoretz`` is set.
|
|
|
|
:param \\*\\*kwargs:
|
|
Keyword arguments as passed to ``_parse()``.
|
|
|
|
:return:
|
|
Returns a :class:`datetime.datetime` object or, if the
|
|
``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
|
|
first element being a :class:`datetime.datetime` object, the second
|
|
a tuple containing the fuzzy tokens.
|
|
|
|
:raises ParserError:
|
|
Raised for invalid or unknown string format, if the provided
|
|
:class:`tzinfo` is not in a valid format, or if an invalid date
|
|
would be created.
|
|
|
|
:raises TypeError:
|
|
Raised for non-string or character stream input.
|
|
|
|
:raises OverflowError:
|
|
Raised if the parsed date exceeds the largest valid C integer on
|
|
your system.
|
|
"""
|
|
|
|
if default is None:
|
|
default = datetime.datetime.now().replace(hour=0, minute=0,
|
|
second=0, microsecond=0)
|
|
|
|
res, skipped_tokens = self._parse(timestr, **kwargs)
|
|
|
|
if res is None:
|
|
raise ParserError("Unknown string format: %s", timestr)
|
|
|
|
if len(res) == 0:
|
|
raise ParserError("String does not contain a date: %s", timestr)
|
|
|
|
try:
|
|
ret = self._build_naive(res, default)
|
|
except ValueError as e:
|
|
six.raise_from(ParserError(str(e) + ": %s", timestr), e)
|
|
|
|
if not ignoretz:
|
|
ret = self._build_tzaware(ret, res, tzinfos)
|
|
|
|
if kwargs.get('fuzzy_with_tokens', False):
|
|
return ret, skipped_tokens
|
|
else:
|
|
return ret
|
|
|
|
class _result(_resultbase):
|
|
__slots__ = ["year", "month", "day", "weekday",
|
|
"hour", "minute", "second", "microsecond",
|
|
"tzname", "tzoffset", "ampm","any_unused_tokens"]
|
|
|
|
def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
|
|
fuzzy_with_tokens=False):
|
|
"""
|
|
Private method which performs the heavy lifting of parsing, called from
|
|
``parse()``, which passes on its ``kwargs`` to this function.
|
|
|
|
:param timestr:
|
|
The string to parse.
|
|
|
|
:param dayfirst:
|
|
Whether to interpret the first value in an ambiguous 3-integer date
|
|
(e.g. 01/05/09) as the day (``True``) or month (``False``). If
|
|
``yearfirst`` is set to ``True``, this distinguishes between YDM
|
|
and YMD. If set to ``None``, this value is retrieved from the
|
|
current :class:`parserinfo` object (which itself defaults to
|
|
``False``).
|
|
|
|
:param yearfirst:
|
|
Whether to interpret the first value in an ambiguous 3-integer date
|
|
(e.g. 01/05/09) as the year. If ``True``, the first number is taken
|
|
to be the year, otherwise the last number is taken to be the year.
|
|
If this is set to ``None``, the value is retrieved from the current
|
|
:class:`parserinfo` object (which itself defaults to ``False``).
|
|
|
|
:param fuzzy:
|
|
Whether to allow fuzzy parsing, allowing for string like "Today is
|
|
January 1, 2047 at 8:21:00AM".
|
|
|
|
:param fuzzy_with_tokens:
|
|
If ``True``, ``fuzzy`` is automatically set to True, and the parser
|
|
will return a tuple where the first element is the parsed
|
|
:class:`datetime.datetime` datetimestamp and the second element is
|
|
a tuple containing the portions of the string which were ignored:
|
|
|
|
.. doctest::
|
|
|
|
>>> from dateutil.parser import parse
|
|
>>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
|
|
(datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
|
|
|
|
"""
|
|
if fuzzy_with_tokens:
|
|
fuzzy = True
|
|
|
|
info = self.info
|
|
|
|
if dayfirst is None:
|
|
dayfirst = info.dayfirst
|
|
|
|
if yearfirst is None:
|
|
yearfirst = info.yearfirst
|
|
|
|
res = self._result()
|
|
l = _timelex.split(timestr) # Splits the timestr into tokens
|
|
|
|
skipped_idxs = []
|
|
|
|
# year/month/day list
|
|
ymd = _ymd()
|
|
|
|
len_l = len(l)
|
|
i = 0
|
|
try:
|
|
while i < len_l:
|
|
|
|
# Check if it's a number
|
|
value_repr = l[i]
|
|
try:
|
|
value = float(value_repr)
|
|
except ValueError:
|
|
value = None
|
|
|
|
if value is not None:
|
|
# Numeric token
|
|
i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
|
|
|
|
# Check weekday
|
|
elif info.weekday(l[i]) is not None:
|
|
value = info.weekday(l[i])
|
|
res.weekday = value
|
|
|
|
# Check month name
|
|
elif info.month(l[i]) is not None:
|
|
value = info.month(l[i])
|
|
ymd.append(value, 'M')
|
|
|
|
if i + 1 < len_l:
|
|
if l[i + 1] in ('-', '/'):
|
|
# Jan-01[-99]
|
|
sep = l[i + 1]
|
|
ymd.append(l[i + 2])
|
|
|
|
if i + 3 < len_l and l[i + 3] == sep:
|
|
# Jan-01-99
|
|
ymd.append(l[i + 4])
|
|
i += 2
|
|
|
|
i += 2
|
|
|
|
elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
|
|
info.pertain(l[i + 2])):
|
|
# Jan of 01
|
|
# In this case, 01 is clearly year
|
|
if l[i + 4].isdigit():
|
|
# Convert it here to become unambiguous
|
|
value = int(l[i + 4])
|
|
year = str(info.convertyear(value))
|
|
ymd.append(year, 'Y')
|
|
else:
|
|
# Wrong guess
|
|
pass
|
|
# TODO: not hit in tests
|
|
i += 4
|
|
|
|
# Check am/pm
|
|
elif info.ampm(l[i]) is not None:
|
|
value = info.ampm(l[i])
|
|
val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
|
|
|
|
if val_is_ampm:
|
|
res.hour = self._adjust_ampm(res.hour, value)
|
|
res.ampm = value
|
|
|
|
elif fuzzy:
|
|
skipped_idxs.append(i)
|
|
|
|
# Check for a timezone name
|
|
elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
|
|
res.tzname = l[i]
|
|
res.tzoffset = info.tzoffset(res.tzname)
|
|
|
|
# Check for something like GMT+3, or BRST+3. Notice
|
|
# that it doesn't mean "I am 3 hours after GMT", but
|
|
# "my time +3 is GMT". If found, we reverse the
|
|
# logic so that timezone parsing code will get it
|
|
# right.
|
|
if i + 1 < len_l and l[i + 1] in ('+', '-'):
|
|
l[i + 1] = ('+', '-')[l[i + 1] == '+']
|
|
res.tzoffset = None
|
|
if info.utczone(res.tzname):
|
|
# With something like GMT+3, the timezone
|
|
# is *not* GMT.
|
|
res.tzname = None
|
|
|
|
# Check for a numbered timezone
|
|
elif res.hour is not None and l[i] in ('+', '-'):
|
|
signal = (-1, 1)[l[i] == '+']
|
|
len_li = len(l[i + 1])
|
|
|
|
# TODO: check that l[i + 1] is integer?
|
|
if len_li == 4:
|
|
# -0300
|
|
hour_offset = int(l[i + 1][:2])
|
|
min_offset = int(l[i + 1][2:])
|
|
elif i + 2 < len_l and l[i + 2] == ':':
|
|
# -03:00
|
|
hour_offset = int(l[i + 1])
|
|
min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like?
|
|
i += 2
|
|
elif len_li <= 2:
|
|
# -[0]3
|
|
hour_offset = int(l[i + 1][:2])
|
|
min_offset = 0
|
|
else:
|
|
raise ValueError(timestr)
|
|
|
|
res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
|
|
|
|
# Look for a timezone name between parenthesis
|
|
if (i + 5 < len_l and
|
|
info.jump(l[i + 2]) and l[i + 3] == '(' and
|
|
l[i + 5] == ')' and
|
|
3 <= len(l[i + 4]) and
|
|
self._could_be_tzname(res.hour, res.tzname,
|
|
None, l[i + 4])):
|
|
# -0300 (BRST)
|
|
res.tzname = l[i + 4]
|
|
i += 4
|
|
|
|
i += 1
|
|
|
|
# Check jumps
|
|
elif not (info.jump(l[i]) or fuzzy):
|
|
raise ValueError(timestr)
|
|
|
|
else:
|
|
skipped_idxs.append(i)
|
|
i += 1
|
|
|
|
# Process year/month/day
|
|
year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
|
|
|
|
res.century_specified = ymd.century_specified
|
|
res.year = year
|
|
res.month = month
|
|
res.day = day
|
|
|
|
except (IndexError, ValueError):
|
|
return None, None
|
|
|
|
if not info.validate(res):
|
|
return None, None
|
|
|
|
if fuzzy_with_tokens:
|
|
skipped_tokens = self._recombine_skipped(l, skipped_idxs)
|
|
return res, tuple(skipped_tokens)
|
|
else:
|
|
return res, None
|
|
|
|
def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
|
|
# Token is a number
|
|
value_repr = tokens[idx]
|
|
try:
|
|
value = self._to_decimal(value_repr)
|
|
except Exception as e:
|
|
six.raise_from(ValueError('Unknown numeric token'), e)
|
|
|
|
len_li = len(value_repr)
|
|
|
|
len_l = len(tokens)
|
|
|
|
if (len(ymd) == 3 and len_li in (2, 4) and
|
|
res.hour is None and
|
|
(idx + 1 >= len_l or
|
|
(tokens[idx + 1] != ':' and
|
|
info.hms(tokens[idx + 1]) is None))):
|
|
# 19990101T23[59]
|
|
s = tokens[idx]
|
|
res.hour = int(s[:2])
|
|
|
|
if len_li == 4:
|
|
res.minute = int(s[2:])
|
|
|
|
elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
|
|
# YYMMDD or HHMMSS[.ss]
|
|
s = tokens[idx]
|
|
|
|
if not ymd and '.' not in tokens[idx]:
|
|
ymd.append(s[:2])
|
|
ymd.append(s[2:4])
|
|
ymd.append(s[4:])
|
|
else:
|
|
# 19990101T235959[.59]
|
|
|
|
# TODO: Check if res attributes already set.
|
|
res.hour = int(s[:2])
|
|
res.minute = int(s[2:4])
|
|
res.second, res.microsecond = self._parsems(s[4:])
|
|
|
|
elif len_li in (8, 12, 14):
|
|
# YYYYMMDD
|
|
s = tokens[idx]
|
|
ymd.append(s[:4], 'Y')
|
|
ymd.append(s[4:6])
|
|
ymd.append(s[6:8])
|
|
|
|
if len_li > 8:
|
|
res.hour = int(s[8:10])
|
|
res.minute = int(s[10:12])
|
|
|
|
if len_li > 12:
|
|
res.second = int(s[12:])
|
|
|
|
elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
|
|
# HH[ ]h or MM[ ]m or SS[.ss][ ]s
|
|
hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
|
|
(idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
|
|
if hms is not None:
|
|
# TODO: checking that hour/minute/second are not
|
|
# already set?
|
|
self._assign_hms(res, value_repr, hms)
|
|
|
|
elif idx + 2 < len_l and tokens[idx + 1] == ':':
|
|
# HH:MM[:SS[.ss]]
|
|
res.hour = int(value)
|
|
value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this?
|
|
(res.minute, res.second) = self._parse_min_sec(value)
|
|
|
|
if idx + 4 < len_l and tokens[idx + 3] == ':':
|
|
res.second, res.microsecond = self._parsems(tokens[idx + 4])
|
|
|
|
idx += 2
|
|
|
|
idx += 2
|
|
|
|
elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
|
|
sep = tokens[idx + 1]
|
|
ymd.append(value_repr)
|
|
|
|
if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
|
|
if tokens[idx + 2].isdigit():
|
|
# 01-01[-01]
|
|
ymd.append(tokens[idx + 2])
|
|
else:
|
|
# 01-Jan[-01]
|
|
value = info.month(tokens[idx + 2])
|
|
|
|
if value is not None:
|
|
ymd.append(value, 'M')
|
|
else:
|
|
raise ValueError()
|
|
|
|
if idx + 3 < len_l and tokens[idx + 3] == sep:
|
|
# We have three members
|
|
value = info.month(tokens[idx + 4])
|
|
|
|
if value is not None:
|
|
ymd.append(value, 'M')
|
|
else:
|
|
ymd.append(tokens[idx + 4])
|
|
idx += 2
|
|
|
|
idx += 1
|
|
idx += 1
|
|
|
|
elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
|
|
if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
|
|
# 12 am
|
|
hour = int(value)
|
|
res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
|
|
idx += 1
|
|
else:
|
|
# Year, month or day
|
|
ymd.append(value)
|
|
idx += 1
|
|
|
|
elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
|
|
# 12am
|
|
hour = int(value)
|
|
res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
|
|
idx += 1
|
|
|
|
elif ymd.could_be_day(value):
|
|
ymd.append(value)
|
|
|
|
elif not fuzzy:
|
|
raise ValueError()
|
|
|
|
return idx
|
|
|
|
def _find_hms_idx(self, idx, tokens, info, allow_jump):
|
|
len_l = len(tokens)
|
|
|
|
if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
|
|
# There is an "h", "m", or "s" label following this token. We take
|
|
# assign the upcoming label to the current token.
|
|
# e.g. the "12" in 12h"
|
|
hms_idx = idx + 1
|
|
|
|
elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
|
|
info.hms(tokens[idx+2]) is not None):
|
|
# There is a space and then an "h", "m", or "s" label.
|
|
# e.g. the "12" in "12 h"
|
|
hms_idx = idx + 2
|
|
|
|
elif idx > 0 and info.hms(tokens[idx-1]) is not None:
|
|
# There is a "h", "m", or "s" preceding this token. Since neither
|
|
# of the previous cases was hit, there is no label following this
|
|
# token, so we use the previous label.
|
|
# e.g. the "04" in "12h04"
|
|
hms_idx = idx-1
|
|
|
|
elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
|
|
info.hms(tokens[idx-2]) is not None):
|
|
# If we are looking at the final token, we allow for a
|
|
# backward-looking check to skip over a space.
|
|
# TODO: Are we sure this is the right condition here?
|
|
hms_idx = idx - 2
|
|
|
|
else:
|
|
hms_idx = None
|
|
|
|
return hms_idx
|
|
|
|
def _assign_hms(self, res, value_repr, hms):
|
|
# See GH issue #427, fixing float rounding
|
|
value = self._to_decimal(value_repr)
|
|
|
|
if hms == 0:
|
|
# Hour
|
|
res.hour = int(value)
|
|
if value % 1:
|
|
res.minute = int(60*(value % 1))
|
|
|
|
elif hms == 1:
|
|
(res.minute, res.second) = self._parse_min_sec(value)
|
|
|
|
elif hms == 2:
|
|
(res.second, res.microsecond) = self._parsems(value_repr)
|
|
|
|
def _could_be_tzname(self, hour, tzname, tzoffset, token):
|
|
return (hour is not None and
|
|
tzname is None and
|
|
tzoffset is None and
|
|
len(token) <= 5 and
|
|
(all(x in string.ascii_uppercase for x in token)
|
|
or token in self.info.UTCZONE))
|
|
|
|
def _ampm_valid(self, hour, ampm, fuzzy):
|
|
"""
|
|
For fuzzy parsing, 'a' or 'am' (both valid English words)
|
|
may erroneously trigger the AM/PM flag. Deal with that
|
|
here.
|
|
"""
|
|
val_is_ampm = True
|
|
|
|
# If there's already an AM/PM flag, this one isn't one.
|
|
if fuzzy and ampm is not None:
|
|
val_is_ampm = False
|
|
|
|
# If AM/PM is found and hour is not, raise a ValueError
|
|
if hour is None:
|
|
if fuzzy:
|
|
val_is_ampm = False
|
|
else:
|
|
raise ValueError('No hour specified with AM or PM flag.')
|
|
elif not 0 <= hour <= 12:
|
|
# If AM/PM is found, it's a 12 hour clock, so raise
|
|
# an error for invalid range
|
|
if fuzzy:
|
|
val_is_ampm = False
|
|
else:
|
|
raise ValueError('Invalid hour specified for 12-hour clock.')
|
|
|
|
return val_is_ampm
|
|
|
|
def _adjust_ampm(self, hour, ampm):
|
|
if hour < 12 and ampm == 1:
|
|
hour += 12
|
|
elif hour == 12 and ampm == 0:
|
|
hour = 0
|
|
return hour
|
|
|
|
def _parse_min_sec(self, value):
|
|
# TODO: Every usage of this function sets res.second to the return
|
|
# value. Are there any cases where second will be returned as None and
|
|
# we *don't* want to set res.second = None?
|
|
minute = int(value)
|
|
second = None
|
|
|
|
sec_remainder = value % 1
|
|
if sec_remainder:
|
|
second = int(60 * sec_remainder)
|
|
return (minute, second)
|
|
|
|
def _parse_hms(self, idx, tokens, info, hms_idx):
|
|
# TODO: Is this going to admit a lot of false-positives for when we
|
|
# just happen to have digits and "h", "m" or "s" characters in non-date
|
|
# text? I guess hex hashes won't have that problem, but there's plenty
|
|
# of random junk out there.
|
|
if hms_idx is None:
|
|
hms = None
|
|
new_idx = idx
|
|
elif hms_idx > idx:
|
|
hms = info.hms(tokens[hms_idx])
|
|
new_idx = hms_idx
|
|
else:
|
|
# Looking backwards, increment one.
|
|
hms = info.hms(tokens[hms_idx]) + 1
|
|
new_idx = idx
|
|
|
|
return (new_idx, hms)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Handling for individual tokens. These are kept as methods instead
|
|
# of functions for the sake of customizability via subclassing.
|
|
|
|
def _parsems(self, value):
|
|
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
|
|
if "." not in value:
|
|
return int(value), 0
|
|
else:
|
|
i, f = value.split(".")
|
|
return int(i), int(f.ljust(6, "0")[:6])
|
|
|
|
def _to_decimal(self, val):
|
|
try:
|
|
decimal_value = Decimal(val)
|
|
# See GH 662, edge case, infinite value should not be converted
|
|
# via `_to_decimal`
|
|
if not decimal_value.is_finite():
|
|
raise ValueError("Converted decimal value is infinite or NaN")
|
|
except Exception as e:
|
|
msg = "Could not convert %s to decimal" % val
|
|
six.raise_from(ValueError(msg), e)
|
|
else:
|
|
return decimal_value
|
|
|
|
# ------------------------------------------------------------------
|
|
# Post-Parsing construction of datetime output. These are kept as
|
|
# methods instead of functions for the sake of customizability via
|
|
# subclassing.
|
|
|
|
def _build_tzinfo(self, tzinfos, tzname, tzoffset):
|
|
if callable(tzinfos):
|
|
tzdata = tzinfos(tzname, tzoffset)
|
|
else:
|
|
tzdata = tzinfos.get(tzname)
|
|
# handle case where tzinfo is paased an options that returns None
|
|
# eg tzinfos = {'BRST' : None}
|
|
if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
|
|
tzinfo = tzdata
|
|
elif isinstance(tzdata, text_type):
|
|
tzinfo = tz.tzstr(tzdata)
|
|
elif isinstance(tzdata, integer_types):
|
|
tzinfo = tz.tzoffset(tzname, tzdata)
|
|
else:
|
|
raise TypeError("Offset must be tzinfo subclass, tz string, "
|
|
"or int offset.")
|
|
return tzinfo
|
|
|
|
def _build_tzaware(self, naive, res, tzinfos):
|
|
if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
|
|
tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
|
|
aware = naive.replace(tzinfo=tzinfo)
|
|
aware = self._assign_tzname(aware, res.tzname)
|
|
|
|
elif res.tzname and res.tzname in time.tzname:
|
|
aware = naive.replace(tzinfo=tz.tzlocal())
|
|
|
|
# Handle ambiguous local datetime
|
|
aware = self._assign_tzname(aware, res.tzname)
|
|
|
|
# This is mostly relevant for winter GMT zones parsed in the UK
|
|
if (aware.tzname() != res.tzname and
|
|
res.tzname in self.info.UTCZONE):
|
|
aware = aware.replace(tzinfo=tz.UTC)
|
|
|
|
elif res.tzoffset == 0:
|
|
aware = naive.replace(tzinfo=tz.UTC)
|
|
|
|
elif res.tzoffset:
|
|
aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
|
|
|
|
elif not res.tzname and not res.tzoffset:
|
|
# i.e. no timezone information was found.
|
|
aware = naive
|
|
|
|
elif res.tzname:
|
|
# tz-like string was parsed but we don't know what to do
|
|
# with it
|
|
warnings.warn("tzname {tzname} identified but not understood. "
|
|
"Pass `tzinfos` argument in order to correctly "
|
|
"return a timezone-aware datetime. In a future "
|
|
"version, this will raise an "
|
|
"exception.".format(tzname=res.tzname),
|
|
category=UnknownTimezoneWarning)
|
|
aware = naive
|
|
|
|
return aware
|
|
|
|
def _build_naive(self, res, default):
|
|
repl = {}
|
|
for attr in ("year", "month", "day", "hour",
|
|
"minute", "second", "microsecond"):
|
|
value = getattr(res, attr)
|
|
if value is not None:
|
|
repl[attr] = value
|
|
|
|
if 'day' not in repl:
|
|
# If the default day exceeds the last day of the month, fall back
|
|
# to the end of the month.
|
|
cyear = default.year if res.year is None else res.year
|
|
cmonth = default.month if res.month is None else res.month
|
|
cday = default.day if res.day is None else res.day
|
|
|
|
if cday > monthrange(cyear, cmonth)[1]:
|
|
repl['day'] = monthrange(cyear, cmonth)[1]
|
|
|
|
naive = default.replace(**repl)
|
|
|
|
if res.weekday is not None and not res.day:
|
|
naive = naive + relativedelta.relativedelta(weekday=res.weekday)
|
|
|
|
return naive
|
|
|
|
def _assign_tzname(self, dt, tzname):
|
|
if dt.tzname() != tzname:
|
|
new_dt = tz.enfold(dt, fold=1)
|
|
if new_dt.tzname() == tzname:
|
|
return new_dt
|
|
|
|
return dt
|
|
|
|
def _recombine_skipped(self, tokens, skipped_idxs):
|
|
"""
|
|
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
|
|
>>> skipped_idxs = [0, 1, 2, 5]
|
|
>>> _recombine_skipped(tokens, skipped_idxs)
|
|
["foo bar", "baz"]
|
|
"""
|
|
skipped_tokens = []
|
|
for i, idx in enumerate(sorted(skipped_idxs)):
|
|
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
|
|
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
|
|
else:
|
|
skipped_tokens.append(tokens[idx])
|
|
|
|
return skipped_tokens
|
|
|
|
|
|
DEFAULTPARSER = parser()
|
|
|
|
|
|
def parse(timestr, parserinfo=None, **kwargs):
|
|
"""
|
|
|
|
Parse a string in one of the supported formats, using the
|
|
``parserinfo`` parameters.
|
|
|
|
:param timestr:
|
|
A string containing a date/time stamp.
|
|
|
|
:param parserinfo:
|
|
A :class:`parserinfo` object containing parameters for the parser.
|
|
If ``None``, the default arguments to the :class:`parserinfo`
|
|
constructor are used.
|
|
|
|
The ``**kwargs`` parameter takes the following keyword arguments:
|
|
|
|
:param default:
|
|
The default datetime object, if this is a datetime object and not
|
|
``None``, elements specified in ``timestr`` replace elements in the
|
|
default object.
|
|
|
|
:param ignoretz:
|
|
If set ``True``, time zones in parsed strings are ignored and a naive
|
|
:class:`datetime` object is returned.
|
|
|
|
:param tzinfos:
|
|
Additional time zone names / aliases which may be present in the
|
|
string. This argument maps time zone names (and optionally offsets
|
|
from those time zones) to time zones. This parameter can be a
|
|
dictionary with timezone aliases mapping time zone names to time
|
|
zones or a function taking two parameters (``tzname`` and
|
|
``tzoffset``) and returning a time zone.
|
|
|
|
The timezones to which the names are mapped can be an integer
|
|
offset from UTC in seconds or a :class:`tzinfo` object.
|
|
|
|
.. doctest::
|
|
:options: +NORMALIZE_WHITESPACE
|
|
|
|
>>> from dateutil.parser import parse
|
|
>>> from dateutil.tz import gettz
|
|
>>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
|
|
>>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
|
|
datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
|
|
>>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
|
|
datetime.datetime(2012, 1, 19, 17, 21,
|
|
tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
|
|
|
|
This parameter is ignored if ``ignoretz`` is set.
|
|
|
|
:param dayfirst:
|
|
Whether to interpret the first value in an ambiguous 3-integer date
|
|
(e.g. 01/05/09) as the day (``True``) or month (``False``). If
|
|
``yearfirst`` is set to ``True``, this distinguishes between YDM and
|
|
YMD. If set to ``None``, this value is retrieved from the current
|
|
:class:`parserinfo` object (which itself defaults to ``False``).
|
|
|
|
:param yearfirst:
|
|
Whether to interpret the first value in an ambiguous 3-integer date
|
|
(e.g. 01/05/09) as the year. If ``True``, the first number is taken to
|
|
be the year, otherwise the last number is taken to be the year. If
|
|
this is set to ``None``, the value is retrieved from the current
|
|
:class:`parserinfo` object (which itself defaults to ``False``).
|
|
|
|
:param fuzzy:
|
|
Whether to allow fuzzy parsing, allowing for string like "Today is
|
|
January 1, 2047 at 8:21:00AM".
|
|
|
|
:param fuzzy_with_tokens:
|
|
If ``True``, ``fuzzy`` is automatically set to True, and the parser
|
|
will return a tuple where the first element is the parsed
|
|
:class:`datetime.datetime` datetimestamp and the second element is
|
|
a tuple containing the portions of the string which were ignored:
|
|
|
|
.. doctest::
|
|
|
|
>>> from dateutil.parser import parse
|
|
>>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
|
|
(datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
|
|
|
|
:return:
|
|
Returns a :class:`datetime.datetime` object or, if the
|
|
``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
|
|
first element being a :class:`datetime.datetime` object, the second
|
|
a tuple containing the fuzzy tokens.
|
|
|
|
:raises ParserError:
|
|
Raised for invalid or unknown string formats, if the provided
|
|
:class:`tzinfo` is not in a valid format, or if an invalid date would
|
|
be created.
|
|
|
|
:raises OverflowError:
|
|
Raised if the parsed date exceeds the largest valid C integer on
|
|
your system.
|
|
"""
|
|
if parserinfo:
|
|
return parser(parserinfo).parse(timestr, **kwargs)
|
|
else:
|
|
return DEFAULTPARSER.parse(timestr, **kwargs)
|
|
|
|
|
|
class _tzparser(object):
|
|
|
|
class _result(_resultbase):
|
|
|
|
__slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
|
|
"start", "end"]
|
|
|
|
class _attr(_resultbase):
|
|
__slots__ = ["month", "week", "weekday",
|
|
"yday", "jyday", "day", "time"]
|
|
|
|
def __repr__(self):
|
|
return self._repr("")
|
|
|
|
def __init__(self):
|
|
_resultbase.__init__(self)
|
|
self.start = self._attr()
|
|
self.end = self._attr()
|
|
|
|
def parse(self, tzstr):
|
|
res = self._result()
|
|
l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
|
|
used_idxs = list()
|
|
try:
|
|
|
|
len_l = len(l)
|
|
|
|
i = 0
|
|
while i < len_l:
|
|
# BRST+3[BRDT[+2]]
|
|
j = i
|
|
while j < len_l and not [x for x in l[j]
|
|
if x in "0123456789:,-+"]:
|
|
j += 1
|
|
if j != i:
|
|
if not res.stdabbr:
|
|
offattr = "stdoffset"
|
|
res.stdabbr = "".join(l[i:j])
|
|
else:
|
|
offattr = "dstoffset"
|
|
res.dstabbr = "".join(l[i:j])
|
|
|
|
for ii in range(j):
|
|
used_idxs.append(ii)
|
|
i = j
|
|
if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
|
|
"0123456789")):
|
|
if l[i] in ('+', '-'):
|
|
# Yes, that's right. See the TZ variable
|
|
# documentation.
|
|
signal = (1, -1)[l[i] == '+']
|
|
used_idxs.append(i)
|
|
i += 1
|
|
else:
|
|
signal = -1
|
|
len_li = len(l[i])
|
|
if len_li == 4:
|
|
# -0300
|
|
setattr(res, offattr, (int(l[i][:2]) * 3600 +
|
|
int(l[i][2:]) * 60) * signal)
|
|
elif i + 1 < len_l and l[i + 1] == ':':
|
|
# -03:00
|
|
setattr(res, offattr,
|
|
(int(l[i]) * 3600 +
|
|
int(l[i + 2]) * 60) * signal)
|
|
used_idxs.append(i)
|
|
i += 2
|
|
elif len_li <= 2:
|
|
# -[0]3
|
|
setattr(res, offattr,
|
|
int(l[i][:2]) * 3600 * signal)
|
|
else:
|
|
return None
|
|
used_idxs.append(i)
|
|
i += 1
|
|
if res.dstabbr:
|
|
break
|
|
else:
|
|
break
|
|
|
|
|
|
if i < len_l:
|
|
for j in range(i, len_l):
|
|
if l[j] == ';':
|
|
l[j] = ','
|
|
|
|
assert l[i] == ','
|
|
|
|
i += 1
|
|
|
|
if i >= len_l:
|
|
pass
|
|
elif (8 <= l.count(',') <= 9 and
|
|
not [y for x in l[i:] if x != ','
|
|
for y in x if y not in "0123456789+-"]):
|
|
# GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
|
|
for x in (res.start, res.end):
|
|
x.month = int(l[i])
|
|
used_idxs.append(i)
|
|
i += 2
|
|
if l[i] == '-':
|
|
value = int(l[i + 1]) * -1
|
|
used_idxs.append(i)
|
|
i += 1
|
|
else:
|
|
value = int(l[i])
|
|
used_idxs.append(i)
|
|
i += 2
|
|
if value:
|
|
x.week = value
|
|
x.weekday = (int(l[i]) - 1) % 7
|
|
else:
|
|
x.day = int(l[i])
|
|
used_idxs.append(i)
|
|
i += 2
|
|
x.time = int(l[i])
|
|
used_idxs.append(i)
|
|
i += 2
|
|
if i < len_l:
|
|
if l[i] in ('-', '+'):
|
|
signal = (-1, 1)[l[i] == "+"]
|
|
used_idxs.append(i)
|
|
i += 1
|
|
else:
|
|
signal = 1
|
|
used_idxs.append(i)
|
|
res.dstoffset = (res.stdoffset + int(l[i]) * signal)
|
|
|
|
# This was a made-up format that is not in normal use
|
|
warn(('Parsed time zone "%s"' % tzstr) +
|
|
'is in a non-standard dateutil-specific format, which ' +
|
|
'is now deprecated; support for parsing this format ' +
|
|
'will be removed in future versions. It is recommended ' +
|
|
'that you switch to a standard format like the GNU ' +
|
|
'TZ variable format.', tz.DeprecatedTzFormatWarning)
|
|
elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
|
|
not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
|
|
'.', '-', ':')
|
|
for y in x if y not in "0123456789"]):
|
|
for x in (res.start, res.end):
|
|
if l[i] == 'J':
|
|
# non-leap year day (1 based)
|
|
used_idxs.append(i)
|
|
i += 1
|
|
x.jyday = int(l[i])
|
|
elif l[i] == 'M':
|
|
# month[-.]week[-.]weekday
|
|
used_idxs.append(i)
|
|
i += 1
|
|
x.month = int(l[i])
|
|
used_idxs.append(i)
|
|
i += 1
|
|
assert l[i] in ('-', '.')
|
|
used_idxs.append(i)
|
|
i += 1
|
|
x.week = int(l[i])
|
|
if x.week == 5:
|
|
x.week = -1
|
|
used_idxs.append(i)
|
|
i += 1
|
|
assert l[i] in ('-', '.')
|
|
used_idxs.append(i)
|
|
i += 1
|
|
x.weekday = (int(l[i]) - 1) % 7
|
|
else:
|
|
# year day (zero based)
|
|
x.yday = int(l[i]) + 1
|
|
|
|
used_idxs.append(i)
|
|
i += 1
|
|
|
|
if i < len_l and l[i] == '/':
|
|
used_idxs.append(i)
|
|
i += 1
|
|
# start time
|
|
len_li = len(l[i])
|
|
if len_li == 4:
|
|
# -0300
|
|
x.time = (int(l[i][:2]) * 3600 +
|
|
int(l[i][2:]) * 60)
|
|
elif i + 1 < len_l and l[i + 1] == ':':
|
|
# -03:00
|
|
x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
|
|
used_idxs.append(i)
|
|
i += 2
|
|
if i + 1 < len_l and l[i + 1] == ':':
|
|
used_idxs.append(i)
|
|
i += 2
|
|
x.time += int(l[i])
|
|
elif len_li <= 2:
|
|
# -[0]3
|
|
x.time = (int(l[i][:2]) * 3600)
|
|
else:
|
|
return None
|
|
used_idxs.append(i)
|
|
i += 1
|
|
|
|
assert i == len_l or l[i] == ','
|
|
|
|
i += 1
|
|
|
|
assert i >= len_l
|
|
|
|
except (IndexError, ValueError, AssertionError):
|
|
return None
|
|
|
|
unused_idxs = set(range(len_l)).difference(used_idxs)
|
|
res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
|
|
return res
|
|
|
|
|
|
DEFAULTTZPARSER = _tzparser()
|
|
|
|
|
|
def _parsetz(tzstr):
|
|
return DEFAULTTZPARSER.parse(tzstr)
|
|
|
|
|
|
class ParserError(ValueError):
|
|
"""Exception subclass used for any failure to parse a datetime string.
|
|
|
|
This is a subclass of :py:exc:`ValueError`, and should be raised any time
|
|
earlier versions of ``dateutil`` would have raised ``ValueError``.
|
|
|
|
.. versionadded:: 2.8.1
|
|
"""
|
|
def __str__(self):
|
|
try:
|
|
return self.args[0] % self.args[1:]
|
|
except (TypeError, IndexError):
|
|
return super(ParserError, self).__str__()
|
|
|
|
def __repr__(self):
|
|
args = ", ".join("'%s'" % arg for arg in self.args)
|
|
return "%s(%s)" % (self.__class__.__name__, args)
|
|
|
|
|
|
class UnknownTimezoneWarning(RuntimeWarning):
|
|
"""Raised when the parser finds a timezone it cannot parse into a tzinfo.
|
|
|
|
.. versionadded:: 2.7.0
|
|
"""
|
|
# vim:ts=4:sw=4:et
|