Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/dateutil/parser/_parser.py: 12%
811 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1# -*- coding: utf-8 -*-
2"""
3This module offers a generic date/time string parser which is able to parse
4most known formats to represent a date and/or time.
6This module attempts to be forgiving with regards to unlikely input formats,
7returning a datetime object even for dates which are ambiguous. If an element
8of a date/time stamp is omitted, the following rules are applied:
10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
12 specified.
13- If a time zone is omitted, a timezone-naive datetime is returned.
15If any other elements are missing, they are taken from the
16:class:`datetime.datetime` object passed to the parameter ``default``. If this
17results in a day number exceeding the valid number of days per month, the
18value falls back to the end of the month.
20Additional resources about date/time string formats can be found below:
22- `A summary of the international standard date and time notation
23 <https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
24- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
26- `CPAN ParseDate module
27 <https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
28- `Java SimpleDateFormat Class
29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
30"""
31from __future__ import unicode_literals
33import datetime
34import re
35import string
36import time
37import warnings
39from calendar import monthrange
40from io import StringIO
42import six
43from six import integer_types, text_type
45from decimal import Decimal
47from warnings import warn
49from .. import relativedelta
50from .. import tz
52__all__ = ["parse", "parserinfo", "ParserError"]
55# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth
56# making public and/or figuring out if there is something we can
57# take off their plate.
58class _timelex(object):
59 # Fractional seconds are sometimes split by a comma
60 _split_decimal = re.compile("([.,])")
62 def __init__(self, instream):
63 if isinstance(instream, (bytes, bytearray)):
64 instream = instream.decode()
66 if isinstance(instream, text_type):
67 instream = StringIO(instream)
68 elif getattr(instream, 'read', None) is None:
69 raise TypeError('Parser must be a string or character stream, not '
70 '{itype}'.format(itype=instream.__class__.__name__))
72 self.instream = instream
73 self.charstack = []
74 self.tokenstack = []
75 self.eof = False
77 def get_token(self):
78 """
79 This function breaks the time string into lexical units (tokens), which
80 can be parsed by the parser. Lexical units are demarcated by changes in
81 the character set, so any continuous string of letters is considered
82 one unit, any continuous string of numbers is considered one unit.
84 The main complication arises from the fact that dots ('.') can be used
85 both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
86 "4:30:21.447"). As such, it is necessary to read the full context of
87 any dot-separated strings before breaking it into tokens; as such, this
88 function maintains a "token stack", for when the ambiguous context
89 demands that multiple tokens be parsed at once.
90 """
91 if self.tokenstack:
92 return self.tokenstack.pop(0)
94 seenletters = False
95 token = None
96 state = None
98 while not self.eof:
99 # We only realize that we've reached the end of a token when we
100 # find a character that's not part of the current token - since
101 # that character may be part of the next token, it's stored in the
102 # charstack.
103 if self.charstack:
104 nextchar = self.charstack.pop(0)
105 else:
106 nextchar = self.instream.read(1)
107 while nextchar == '\x00':
108 nextchar = self.instream.read(1)
110 if not nextchar:
111 self.eof = True
112 break
113 elif not state:
114 # First character of the token - determines if we're starting
115 # to parse a word, a number or something else.
116 token = nextchar
117 if self.isword(nextchar):
118 state = 'a'
119 elif self.isnum(nextchar):
120 state = '0'
121 elif self.isspace(nextchar):
122 token = ' '
123 break # emit token
124 else:
125 break # emit token
126 elif state == 'a':
127 # If we've already started reading a word, we keep reading
128 # letters until we find something that's not part of a word.
129 seenletters = True
130 if self.isword(nextchar):
131 token += nextchar
132 elif nextchar == '.':
133 token += nextchar
134 state = 'a.'
135 else:
136 self.charstack.append(nextchar)
137 break # emit token
138 elif state == '0':
139 # If we've already started reading a number, we keep reading
140 # numbers until we find something that doesn't fit.
141 if self.isnum(nextchar):
142 token += nextchar
143 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
144 token += nextchar
145 state = '0.'
146 else:
147 self.charstack.append(nextchar)
148 break # emit token
149 elif state == 'a.':
150 # If we've seen some letters and a dot separator, continue
151 # parsing, and the tokens will be broken up later.
152 seenletters = True
153 if nextchar == '.' or self.isword(nextchar):
154 token += nextchar
155 elif self.isnum(nextchar) and token[-1] == '.':
156 token += nextchar
157 state = '0.'
158 else:
159 self.charstack.append(nextchar)
160 break # emit token
161 elif state == '0.':
162 # If we've seen at least one dot separator, keep going, we'll
163 # break up the tokens later.
164 if nextchar == '.' or self.isnum(nextchar):
165 token += nextchar
166 elif self.isword(nextchar) and token[-1] == '.':
167 token += nextchar
168 state = 'a.'
169 else:
170 self.charstack.append(nextchar)
171 break # emit token
173 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
174 token[-1] in '.,')):
175 l = self._split_decimal.split(token)
176 token = l[0]
177 for tok in l[1:]:
178 if tok:
179 self.tokenstack.append(tok)
181 if state == '0.' and token.count('.') == 0:
182 token = token.replace(',', '.')
184 return token
186 def __iter__(self):
187 return self
189 def __next__(self):
190 token = self.get_token()
191 if token is None:
192 raise StopIteration
194 return token
196 def next(self):
197 return self.__next__() # Python 2.x support
199 @classmethod
200 def split(cls, s):
201 return list(cls(s))
203 @classmethod
204 def isword(cls, nextchar):
205 """ Whether or not the next character is part of a word """
206 return nextchar.isalpha()
208 @classmethod
209 def isnum(cls, nextchar):
210 """ Whether the next character is part of a number """
211 return nextchar.isdigit()
213 @classmethod
214 def isspace(cls, nextchar):
215 """ Whether the next character is whitespace """
216 return nextchar.isspace()
219class _resultbase(object):
221 def __init__(self):
222 for attr in self.__slots__:
223 setattr(self, attr, None)
225 def _repr(self, classname):
226 l = []
227 for attr in self.__slots__:
228 value = getattr(self, attr)
229 if value is not None:
230 l.append("%s=%s" % (attr, repr(value)))
231 return "%s(%s)" % (classname, ", ".join(l))
233 def __len__(self):
234 return (sum(getattr(self, attr) is not None
235 for attr in self.__slots__))
237 def __repr__(self):
238 return self._repr(self.__class__.__name__)
241class parserinfo(object):
242 """
243 Class which handles what inputs are accepted. Subclass this to customize
244 the language and acceptable values for each parameter.
246 :param dayfirst:
247 Whether to interpret the first value in an ambiguous 3-integer date
248 (e.g. 01/05/09) as the day (``True``) or month (``False``). If
249 ``yearfirst`` is set to ``True``, this distinguishes between YDM
250 and YMD. Default is ``False``.
252 :param yearfirst:
253 Whether to interpret the first value in an ambiguous 3-integer date
254 (e.g. 01/05/09) as the year. If ``True``, the first number is taken
255 to be the year, otherwise the last number is taken to be the year.
256 Default is ``False``.
257 """
259 # m from a.m/p.m, t from ISO T separator
260 JUMP = [" ", ".", ",", ";", "-", "/", "'",
261 "at", "on", "and", "ad", "m", "t", "of",
262 "st", "nd", "rd", "th"]
264 WEEKDAYS = [("Mon", "Monday"),
265 ("Tue", "Tuesday"), # TODO: "Tues"
266 ("Wed", "Wednesday"),
267 ("Thu", "Thursday"), # TODO: "Thurs"
268 ("Fri", "Friday"),
269 ("Sat", "Saturday"),
270 ("Sun", "Sunday")]
271 MONTHS = [("Jan", "January"),
272 ("Feb", "February"), # TODO: "Febr"
273 ("Mar", "March"),
274 ("Apr", "April"),
275 ("May", "May"),
276 ("Jun", "June"),
277 ("Jul", "July"),
278 ("Aug", "August"),
279 ("Sep", "Sept", "September"),
280 ("Oct", "October"),
281 ("Nov", "November"),
282 ("Dec", "December")]
283 HMS = [("h", "hour", "hours"),
284 ("m", "minute", "minutes"),
285 ("s", "second", "seconds")]
286 AMPM = [("am", "a"),
287 ("pm", "p")]
288 UTCZONE = ["UTC", "GMT", "Z", "z"]
289 PERTAIN = ["of"]
290 TZOFFSET = {}
291 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
292 # "Anno Domini", "Year of Our Lord"]
294 def __init__(self, dayfirst=False, yearfirst=False):
295 self._jump = self._convert(self.JUMP)
296 self._weekdays = self._convert(self.WEEKDAYS)
297 self._months = self._convert(self.MONTHS)
298 self._hms = self._convert(self.HMS)
299 self._ampm = self._convert(self.AMPM)
300 self._utczone = self._convert(self.UTCZONE)
301 self._pertain = self._convert(self.PERTAIN)
303 self.dayfirst = dayfirst
304 self.yearfirst = yearfirst
306 self._year = time.localtime().tm_year
307 self._century = self._year // 100 * 100
309 def _convert(self, lst):
310 dct = {}
311 for i, v in enumerate(lst):
312 if isinstance(v, tuple):
313 for v in v:
314 dct[v.lower()] = i
315 else:
316 dct[v.lower()] = i
317 return dct
319 def jump(self, name):
320 return name.lower() in self._jump
322 def weekday(self, name):
323 try:
324 return self._weekdays[name.lower()]
325 except KeyError:
326 pass
327 return None
329 def month(self, name):
330 try:
331 return self._months[name.lower()] + 1
332 except KeyError:
333 pass
334 return None
336 def hms(self, name):
337 try:
338 return self._hms[name.lower()]
339 except KeyError:
340 return None
342 def ampm(self, name):
343 try:
344 return self._ampm[name.lower()]
345 except KeyError:
346 return None
348 def pertain(self, name):
349 return name.lower() in self._pertain
351 def utczone(self, name):
352 return name.lower() in self._utczone
354 def tzoffset(self, name):
355 if name in self._utczone:
356 return 0
358 return self.TZOFFSET.get(name)
360 def convertyear(self, year, century_specified=False):
361 """
362 Converts two-digit years to year within [-50, 49]
363 range of self._year (current local time)
364 """
366 # Function contract is that the year is always positive
367 assert year >= 0
369 if year < 100 and not century_specified:
370 # assume current century to start
371 year += self._century
373 if year >= self._year + 50: # if too far in future
374 year -= 100
375 elif year < self._year - 50: # if too far in past
376 year += 100
378 return year
380 def validate(self, res):
381 # move to info
382 if res.year is not None:
383 res.year = self.convertyear(res.year, res.century_specified)
385 if ((res.tzoffset == 0 and not res.tzname) or
386 (res.tzname == 'Z' or res.tzname == 'z')):
387 res.tzname = "UTC"
388 res.tzoffset = 0
389 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
390 res.tzoffset = 0
391 return True
394class _ymd(list):
395 def __init__(self, *args, **kwargs):
396 super(self.__class__, self).__init__(*args, **kwargs)
397 self.century_specified = False
398 self.dstridx = None
399 self.mstridx = None
400 self.ystridx = None
402 @property
403 def has_year(self):
404 return self.ystridx is not None
406 @property
407 def has_month(self):
408 return self.mstridx is not None
410 @property
411 def has_day(self):
412 return self.dstridx is not None
414 def could_be_day(self, value):
415 if self.has_day:
416 return False
417 elif not self.has_month:
418 return 1 <= value <= 31
419 elif not self.has_year:
420 # Be permissive, assume leap year
421 month = self[self.mstridx]
422 return 1 <= value <= monthrange(2000, month)[1]
423 else:
424 month = self[self.mstridx]
425 year = self[self.ystridx]
426 return 1 <= value <= monthrange(year, month)[1]
428 def append(self, val, label=None):
429 if hasattr(val, '__len__'):
430 if val.isdigit() and len(val) > 2:
431 self.century_specified = True
432 if label not in [None, 'Y']: # pragma: no cover
433 raise ValueError(label)
434 label = 'Y'
435 elif val > 100:
436 self.century_specified = True
437 if label not in [None, 'Y']: # pragma: no cover
438 raise ValueError(label)
439 label = 'Y'
441 super(self.__class__, self).append(int(val))
443 if label == 'M':
444 if self.has_month:
445 raise ValueError('Month is already set')
446 self.mstridx = len(self) - 1
447 elif label == 'D':
448 if self.has_day:
449 raise ValueError('Day is already set')
450 self.dstridx = len(self) - 1
451 elif label == 'Y':
452 if self.has_year:
453 raise ValueError('Year is already set')
454 self.ystridx = len(self) - 1
456 def _resolve_from_stridxs(self, strids):
457 """
458 Try to resolve the identities of year/month/day elements using
459 ystridx, mstridx, and dstridx, if enough of these are specified.
460 """
461 if len(self) == 3 and len(strids) == 2:
462 # we can back out the remaining stridx value
463 missing = [x for x in range(3) if x not in strids.values()]
464 key = [x for x in ['y', 'm', 'd'] if x not in strids]
465 assert len(missing) == len(key) == 1
466 key = key[0]
467 val = missing[0]
468 strids[key] = val
470 assert len(self) == len(strids) # otherwise this should not be called
471 out = {key: self[strids[key]] for key in strids}
472 return (out.get('y'), out.get('m'), out.get('d'))
474 def resolve_ymd(self, yearfirst, dayfirst):
475 len_ymd = len(self)
476 year, month, day = (None, None, None)
478 strids = (('y', self.ystridx),
479 ('m', self.mstridx),
480 ('d', self.dstridx))
482 strids = {key: val for key, val in strids if val is not None}
483 if (len(self) == len(strids) > 0 or
484 (len(self) == 3 and len(strids) == 2)):
485 return self._resolve_from_stridxs(strids)
487 mstridx = self.mstridx
489 if len_ymd > 3:
490 raise ValueError("More than three YMD values")
491 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
492 # One member, or two members with a month string
493 if mstridx is not None:
494 month = self[mstridx]
495 # since mstridx is 0 or 1, self[mstridx-1] always
496 # looks up the other element
497 other = self[mstridx - 1]
498 else:
499 other = self[0]
501 if len_ymd > 1 or mstridx is None:
502 if other > 31:
503 year = other
504 else:
505 day = other
507 elif len_ymd == 2:
508 # Two members with numbers
509 if self[0] > 31:
510 # 99-01
511 year, month = self
512 elif self[1] > 31:
513 # 01-99
514 month, year = self
515 elif dayfirst and self[1] <= 12:
516 # 13-01
517 day, month = self
518 else:
519 # 01-13
520 month, day = self
522 elif len_ymd == 3:
523 # Three members
524 if mstridx == 0:
525 if self[1] > 31:
526 # Apr-2003-25
527 month, year, day = self
528 else:
529 month, day, year = self
530 elif mstridx == 1:
531 if self[0] > 31 or (yearfirst and self[2] <= 31):
532 # 99-Jan-01
533 year, month, day = self
534 else:
535 # 01-Jan-01
536 # Give precedence to day-first, since
537 # two-digit years is usually hand-written.
538 day, month, year = self
540 elif mstridx == 2:
541 # WTF!?
542 if self[1] > 31:
543 # 01-99-Jan
544 day, year, month = self
545 else:
546 # 99-01-Jan
547 year, day, month = self
549 else:
550 if (self[0] > 31 or
551 self.ystridx == 0 or
552 (yearfirst and self[1] <= 12 and self[2] <= 31)):
553 # 99-01-01
554 if dayfirst and self[2] <= 12:
555 year, day, month = self
556 else:
557 year, month, day = self
558 elif self[0] > 12 or (dayfirst and self[1] <= 12):
559 # 13-01-01
560 day, month, year = self
561 else:
562 # 01-13-01
563 month, day, year = self
565 return year, month, day
568class parser(object):
569 def __init__(self, info=None):
570 self.info = info or parserinfo()
572 def parse(self, timestr, default=None,
573 ignoretz=False, tzinfos=None, **kwargs):
574 """
575 Parse the date/time string into a :class:`datetime.datetime` object.
577 :param timestr:
578 Any date/time string using the supported formats.
580 :param default:
581 The default datetime object, if this is a datetime object and not
582 ``None``, elements specified in ``timestr`` replace elements in the
583 default object.
585 :param ignoretz:
586 If set ``True``, time zones in parsed strings are ignored and a
587 naive :class:`datetime.datetime` object is returned.
589 :param tzinfos:
590 Additional time zone names / aliases which may be present in the
591 string. This argument maps time zone names (and optionally offsets
592 from those time zones) to time zones. This parameter can be a
593 dictionary with timezone aliases mapping time zone names to time
594 zones or a function taking two parameters (``tzname`` and
595 ``tzoffset``) and returning a time zone.
597 The timezones to which the names are mapped can be an integer
598 offset from UTC in seconds or a :class:`tzinfo` object.
600 .. doctest::
601 :options: +NORMALIZE_WHITESPACE
603 >>> from dateutil.parser import parse
604 >>> from dateutil.tz import gettz
605 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
606 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
607 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
608 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
609 datetime.datetime(2012, 1, 19, 17, 21,
610 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
612 This parameter is ignored if ``ignoretz`` is set.
614 :param \\*\\*kwargs:
615 Keyword arguments as passed to ``_parse()``.
617 :return:
618 Returns a :class:`datetime.datetime` object or, if the
619 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
620 first element being a :class:`datetime.datetime` object, the second
621 a tuple containing the fuzzy tokens.
623 :raises ParserError:
624 Raised for invalid or unknown string format, if the provided
625 :class:`tzinfo` is not in a valid format, or if an invalid date
626 would be created.
628 :raises TypeError:
629 Raised for non-string or character stream input.
631 :raises OverflowError:
632 Raised if the parsed date exceeds the largest valid C integer on
633 your system.
634 """
636 if default is None:
637 default = datetime.datetime.now().replace(hour=0, minute=0,
638 second=0, microsecond=0)
640 res, skipped_tokens = self._parse(timestr, **kwargs)
642 if res is None:
643 raise ParserError("Unknown string format: %s", timestr)
645 if len(res) == 0:
646 raise ParserError("String does not contain a date: %s", timestr)
648 try:
649 ret = self._build_naive(res, default)
650 except ValueError as e:
651 six.raise_from(ParserError(str(e) + ": %s", timestr), e)
653 if not ignoretz:
654 ret = self._build_tzaware(ret, res, tzinfos)
656 if kwargs.get('fuzzy_with_tokens', False):
657 return ret, skipped_tokens
658 else:
659 return ret
661 class _result(_resultbase):
662 __slots__ = ["year", "month", "day", "weekday",
663 "hour", "minute", "second", "microsecond",
664 "tzname", "tzoffset", "ampm","any_unused_tokens"]
666 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
667 fuzzy_with_tokens=False):
668 """
669 Private method which performs the heavy lifting of parsing, called from
670 ``parse()``, which passes on its ``kwargs`` to this function.
672 :param timestr:
673 The string to parse.
675 :param dayfirst:
676 Whether to interpret the first value in an ambiguous 3-integer date
677 (e.g. 01/05/09) as the day (``True``) or month (``False``). If
678 ``yearfirst`` is set to ``True``, this distinguishes between YDM
679 and YMD. If set to ``None``, this value is retrieved from the
680 current :class:`parserinfo` object (which itself defaults to
681 ``False``).
683 :param yearfirst:
684 Whether to interpret the first value in an ambiguous 3-integer date
685 (e.g. 01/05/09) as the year. If ``True``, the first number is taken
686 to be the year, otherwise the last number is taken to be the year.
687 If this is set to ``None``, the value is retrieved from the current
688 :class:`parserinfo` object (which itself defaults to ``False``).
690 :param fuzzy:
691 Whether to allow fuzzy parsing, allowing for string like "Today is
692 January 1, 2047 at 8:21:00AM".
694 :param fuzzy_with_tokens:
695 If ``True``, ``fuzzy`` is automatically set to True, and the parser
696 will return a tuple where the first element is the parsed
697 :class:`datetime.datetime` datetimestamp and the second element is
698 a tuple containing the portions of the string which were ignored:
700 .. doctest::
702 >>> from dateutil.parser import parse
703 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
704 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
706 """
707 if fuzzy_with_tokens:
708 fuzzy = True
710 info = self.info
712 if dayfirst is None:
713 dayfirst = info.dayfirst
715 if yearfirst is None:
716 yearfirst = info.yearfirst
718 res = self._result()
719 l = _timelex.split(timestr) # Splits the timestr into tokens
721 skipped_idxs = []
723 # year/month/day list
724 ymd = _ymd()
726 len_l = len(l)
727 i = 0
728 try:
729 while i < len_l:
731 # Check if it's a number
732 value_repr = l[i]
733 try:
734 value = float(value_repr)
735 except ValueError:
736 value = None
738 if value is not None:
739 # Numeric token
740 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
742 # Check weekday
743 elif info.weekday(l[i]) is not None:
744 value = info.weekday(l[i])
745 res.weekday = value
747 # Check month name
748 elif info.month(l[i]) is not None:
749 value = info.month(l[i])
750 ymd.append(value, 'M')
752 if i + 1 < len_l:
753 if l[i + 1] in ('-', '/'):
754 # Jan-01[-99]
755 sep = l[i + 1]
756 ymd.append(l[i + 2])
758 if i + 3 < len_l and l[i + 3] == sep:
759 # Jan-01-99
760 ymd.append(l[i + 4])
761 i += 2
763 i += 2
765 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
766 info.pertain(l[i + 2])):
767 # Jan of 01
768 # In this case, 01 is clearly year
769 if l[i + 4].isdigit():
770 # Convert it here to become unambiguous
771 value = int(l[i + 4])
772 year = str(info.convertyear(value))
773 ymd.append(year, 'Y')
774 else:
775 # Wrong guess
776 pass
777 # TODO: not hit in tests
778 i += 4
780 # Check am/pm
781 elif info.ampm(l[i]) is not None:
782 value = info.ampm(l[i])
783 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
785 if val_is_ampm:
786 res.hour = self._adjust_ampm(res.hour, value)
787 res.ampm = value
789 elif fuzzy:
790 skipped_idxs.append(i)
792 # Check for a timezone name
793 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
794 res.tzname = l[i]
795 res.tzoffset = info.tzoffset(res.tzname)
797 # Check for something like GMT+3, or BRST+3. Notice
798 # that it doesn't mean "I am 3 hours after GMT", but
799 # "my time +3 is GMT". If found, we reverse the
800 # logic so that timezone parsing code will get it
801 # right.
802 if i + 1 < len_l and l[i + 1] in ('+', '-'):
803 l[i + 1] = ('+', '-')[l[i + 1] == '+']
804 res.tzoffset = None
805 if info.utczone(res.tzname):
806 # With something like GMT+3, the timezone
807 # is *not* GMT.
808 res.tzname = None
810 # Check for a numbered timezone
811 elif res.hour is not None and l[i] in ('+', '-'):
812 signal = (-1, 1)[l[i] == '+']
813 len_li = len(l[i + 1])
815 # TODO: check that l[i + 1] is integer?
816 if len_li == 4:
817 # -0300
818 hour_offset = int(l[i + 1][:2])
819 min_offset = int(l[i + 1][2:])
820 elif i + 2 < len_l and l[i + 2] == ':':
821 # -03:00
822 hour_offset = int(l[i + 1])
823 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like?
824 i += 2
825 elif len_li <= 2:
826 # -[0]3
827 hour_offset = int(l[i + 1][:2])
828 min_offset = 0
829 else:
830 raise ValueError(timestr)
832 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
834 # Look for a timezone name between parenthesis
835 if (i + 5 < len_l and
836 info.jump(l[i + 2]) and l[i + 3] == '(' and
837 l[i + 5] == ')' and
838 3 <= len(l[i + 4]) and
839 self._could_be_tzname(res.hour, res.tzname,
840 None, l[i + 4])):
841 # -0300 (BRST)
842 res.tzname = l[i + 4]
843 i += 4
845 i += 1
847 # Check jumps
848 elif not (info.jump(l[i]) or fuzzy):
849 raise ValueError(timestr)
851 else:
852 skipped_idxs.append(i)
853 i += 1
855 # Process year/month/day
856 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
858 res.century_specified = ymd.century_specified
859 res.year = year
860 res.month = month
861 res.day = day
863 except (IndexError, ValueError):
864 return None, None
866 if not info.validate(res):
867 return None, None
869 if fuzzy_with_tokens:
870 skipped_tokens = self._recombine_skipped(l, skipped_idxs)
871 return res, tuple(skipped_tokens)
872 else:
873 return res, None
875 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
876 # Token is a number
877 value_repr = tokens[idx]
878 try:
879 value = self._to_decimal(value_repr)
880 except Exception as e:
881 six.raise_from(ValueError('Unknown numeric token'), e)
883 len_li = len(value_repr)
885 len_l = len(tokens)
887 if (len(ymd) == 3 and len_li in (2, 4) and
888 res.hour is None and
889 (idx + 1 >= len_l or
890 (tokens[idx + 1] != ':' and
891 info.hms(tokens[idx + 1]) is None))):
892 # 19990101T23[59]
893 s = tokens[idx]
894 res.hour = int(s[:2])
896 if len_li == 4:
897 res.minute = int(s[2:])
899 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
900 # YYMMDD or HHMMSS[.ss]
901 s = tokens[idx]
903 if not ymd and '.' not in tokens[idx]:
904 ymd.append(s[:2])
905 ymd.append(s[2:4])
906 ymd.append(s[4:])
907 else:
908 # 19990101T235959[.59]
910 # TODO: Check if res attributes already set.
911 res.hour = int(s[:2])
912 res.minute = int(s[2:4])
913 res.second, res.microsecond = self._parsems(s[4:])
915 elif len_li in (8, 12, 14):
916 # YYYYMMDD
917 s = tokens[idx]
918 ymd.append(s[:4], 'Y')
919 ymd.append(s[4:6])
920 ymd.append(s[6:8])
922 if len_li > 8:
923 res.hour = int(s[8:10])
924 res.minute = int(s[10:12])
926 if len_li > 12:
927 res.second = int(s[12:])
929 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
930 # HH[ ]h or MM[ ]m or SS[.ss][ ]s
931 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
932 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
933 if hms is not None:
934 # TODO: checking that hour/minute/second are not
935 # already set?
936 self._assign_hms(res, value_repr, hms)
938 elif idx + 2 < len_l and tokens[idx + 1] == ':':
939 # HH:MM[:SS[.ss]]
940 res.hour = int(value)
941 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this?
942 (res.minute, res.second) = self._parse_min_sec(value)
944 if idx + 4 < len_l and tokens[idx + 3] == ':':
945 res.second, res.microsecond = self._parsems(tokens[idx + 4])
947 idx += 2
949 idx += 2
951 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
952 sep = tokens[idx + 1]
953 ymd.append(value_repr)
955 if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
956 if tokens[idx + 2].isdigit():
957 # 01-01[-01]
958 ymd.append(tokens[idx + 2])
959 else:
960 # 01-Jan[-01]
961 value = info.month(tokens[idx + 2])
963 if value is not None:
964 ymd.append(value, 'M')
965 else:
966 raise ValueError()
968 if idx + 3 < len_l and tokens[idx + 3] == sep:
969 # We have three members
970 value = info.month(tokens[idx + 4])
972 if value is not None:
973 ymd.append(value, 'M')
974 else:
975 ymd.append(tokens[idx + 4])
976 idx += 2
978 idx += 1
979 idx += 1
981 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
982 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
983 # 12 am
984 hour = int(value)
985 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
986 idx += 1
987 else:
988 # Year, month or day
989 ymd.append(value)
990 idx += 1
992 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
993 # 12am
994 hour = int(value)
995 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
996 idx += 1
998 elif ymd.could_be_day(value):
999 ymd.append(value)
1001 elif not fuzzy:
1002 raise ValueError()
1004 return idx
1006 def _find_hms_idx(self, idx, tokens, info, allow_jump):
1007 len_l = len(tokens)
1009 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
1010 # There is an "h", "m", or "s" label following this token. We take
1011 # assign the upcoming label to the current token.
1012 # e.g. the "12" in 12h"
1013 hms_idx = idx + 1
1015 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
1016 info.hms(tokens[idx+2]) is not None):
1017 # There is a space and then an "h", "m", or "s" label.
1018 # e.g. the "12" in "12 h"
1019 hms_idx = idx + 2
1021 elif idx > 0 and info.hms(tokens[idx-1]) is not None:
1022 # There is a "h", "m", or "s" preceding this token. Since neither
1023 # of the previous cases was hit, there is no label following this
1024 # token, so we use the previous label.
1025 # e.g. the "04" in "12h04"
1026 hms_idx = idx-1
1028 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
1029 info.hms(tokens[idx-2]) is not None):
1030 # If we are looking at the final token, we allow for a
1031 # backward-looking check to skip over a space.
1032 # TODO: Are we sure this is the right condition here?
1033 hms_idx = idx - 2
1035 else:
1036 hms_idx = None
1038 return hms_idx
1040 def _assign_hms(self, res, value_repr, hms):
1041 # See GH issue #427, fixing float rounding
1042 value = self._to_decimal(value_repr)
1044 if hms == 0:
1045 # Hour
1046 res.hour = int(value)
1047 if value % 1:
1048 res.minute = int(60*(value % 1))
1050 elif hms == 1:
1051 (res.minute, res.second) = self._parse_min_sec(value)
1053 elif hms == 2:
1054 (res.second, res.microsecond) = self._parsems(value_repr)
1056 def _could_be_tzname(self, hour, tzname, tzoffset, token):
1057 return (hour is not None and
1058 tzname is None and
1059 tzoffset is None and
1060 len(token) <= 5 and
1061 (all(x in string.ascii_uppercase for x in token)
1062 or token in self.info.UTCZONE))
1064 def _ampm_valid(self, hour, ampm, fuzzy):
1065 """
1066 For fuzzy parsing, 'a' or 'am' (both valid English words)
1067 may erroneously trigger the AM/PM flag. Deal with that
1068 here.
1069 """
1070 val_is_ampm = True
1072 # If there's already an AM/PM flag, this one isn't one.
1073 if fuzzy and ampm is not None:
1074 val_is_ampm = False
1076 # If AM/PM is found and hour is not, raise a ValueError
1077 if hour is None:
1078 if fuzzy:
1079 val_is_ampm = False
1080 else:
1081 raise ValueError('No hour specified with AM or PM flag.')
1082 elif not 0 <= hour <= 12:
1083 # If AM/PM is found, it's a 12 hour clock, so raise
1084 # an error for invalid range
1085 if fuzzy:
1086 val_is_ampm = False
1087 else:
1088 raise ValueError('Invalid hour specified for 12-hour clock.')
1090 return val_is_ampm
1092 def _adjust_ampm(self, hour, ampm):
1093 if hour < 12 and ampm == 1:
1094 hour += 12
1095 elif hour == 12 and ampm == 0:
1096 hour = 0
1097 return hour
1099 def _parse_min_sec(self, value):
1100 # TODO: Every usage of this function sets res.second to the return
1101 # value. Are there any cases where second will be returned as None and
1102 # we *don't* want to set res.second = None?
1103 minute = int(value)
1104 second = None
1106 sec_remainder = value % 1
1107 if sec_remainder:
1108 second = int(60 * sec_remainder)
1109 return (minute, second)
1111 def _parse_hms(self, idx, tokens, info, hms_idx):
1112 # TODO: Is this going to admit a lot of false-positives for when we
1113 # just happen to have digits and "h", "m" or "s" characters in non-date
1114 # text? I guess hex hashes won't have that problem, but there's plenty
1115 # of random junk out there.
1116 if hms_idx is None:
1117 hms = None
1118 new_idx = idx
1119 elif hms_idx > idx:
1120 hms = info.hms(tokens[hms_idx])
1121 new_idx = hms_idx
1122 else:
1123 # Looking backwards, increment one.
1124 hms = info.hms(tokens[hms_idx]) + 1
1125 new_idx = idx
1127 return (new_idx, hms)
1129 # ------------------------------------------------------------------
1130 # Handling for individual tokens. These are kept as methods instead
1131 # of functions for the sake of customizability via subclassing.
1133 def _parsems(self, value):
1134 """Parse a I[.F] seconds value into (seconds, microseconds)."""
1135 if "." not in value:
1136 return int(value), 0
1137 else:
1138 i, f = value.split(".")
1139 return int(i), int(f.ljust(6, "0")[:6])
1141 def _to_decimal(self, val):
1142 try:
1143 decimal_value = Decimal(val)
1144 # See GH 662, edge case, infinite value should not be converted
1145 # via `_to_decimal`
1146 if not decimal_value.is_finite():
1147 raise ValueError("Converted decimal value is infinite or NaN")
1148 except Exception as e:
1149 msg = "Could not convert %s to decimal" % val
1150 six.raise_from(ValueError(msg), e)
1151 else:
1152 return decimal_value
1154 # ------------------------------------------------------------------
1155 # Post-Parsing construction of datetime output. These are kept as
1156 # methods instead of functions for the sake of customizability via
1157 # subclassing.
1159 def _build_tzinfo(self, tzinfos, tzname, tzoffset):
1160 if callable(tzinfos):
1161 tzdata = tzinfos(tzname, tzoffset)
1162 else:
1163 tzdata = tzinfos.get(tzname)
1164 # handle case where tzinfo is paased an options that returns None
1165 # eg tzinfos = {'BRST' : None}
1166 if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
1167 tzinfo = tzdata
1168 elif isinstance(tzdata, text_type):
1169 tzinfo = tz.tzstr(tzdata)
1170 elif isinstance(tzdata, integer_types):
1171 tzinfo = tz.tzoffset(tzname, tzdata)
1172 else:
1173 raise TypeError("Offset must be tzinfo subclass, tz string, "
1174 "or int offset.")
1175 return tzinfo
1177 def _build_tzaware(self, naive, res, tzinfos):
1178 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
1179 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
1180 aware = naive.replace(tzinfo=tzinfo)
1181 aware = self._assign_tzname(aware, res.tzname)
1183 elif res.tzname and res.tzname in time.tzname:
1184 aware = naive.replace(tzinfo=tz.tzlocal())
1186 # Handle ambiguous local datetime
1187 aware = self._assign_tzname(aware, res.tzname)
1189 # This is mostly relevant for winter GMT zones parsed in the UK
1190 if (aware.tzname() != res.tzname and
1191 res.tzname in self.info.UTCZONE):
1192 aware = aware.replace(tzinfo=tz.UTC)
1194 elif res.tzoffset == 0:
1195 aware = naive.replace(tzinfo=tz.UTC)
1197 elif res.tzoffset:
1198 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
1200 elif not res.tzname and not res.tzoffset:
1201 # i.e. no timezone information was found.
1202 aware = naive
1204 elif res.tzname:
1205 # tz-like string was parsed but we don't know what to do
1206 # with it
1207 warnings.warn("tzname {tzname} identified but not understood. "
1208 "Pass `tzinfos` argument in order to correctly "
1209 "return a timezone-aware datetime. In a future "
1210 "version, this will raise an "
1211 "exception.".format(tzname=res.tzname),
1212 category=UnknownTimezoneWarning)
1213 aware = naive
1215 return aware
1217 def _build_naive(self, res, default):
1218 repl = {}
1219 for attr in ("year", "month", "day", "hour",
1220 "minute", "second", "microsecond"):
1221 value = getattr(res, attr)
1222 if value is not None:
1223 repl[attr] = value
1225 if 'day' not in repl:
1226 # If the default day exceeds the last day of the month, fall back
1227 # to the end of the month.
1228 cyear = default.year if res.year is None else res.year
1229 cmonth = default.month if res.month is None else res.month
1230 cday = default.day if res.day is None else res.day
1232 if cday > monthrange(cyear, cmonth)[1]:
1233 repl['day'] = monthrange(cyear, cmonth)[1]
1235 naive = default.replace(**repl)
1237 if res.weekday is not None and not res.day:
1238 naive = naive + relativedelta.relativedelta(weekday=res.weekday)
1240 return naive
1242 def _assign_tzname(self, dt, tzname):
1243 if dt.tzname() != tzname:
1244 new_dt = tz.enfold(dt, fold=1)
1245 if new_dt.tzname() == tzname:
1246 return new_dt
1248 return dt
1250 def _recombine_skipped(self, tokens, skipped_idxs):
1251 """
1252 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
1253 >>> skipped_idxs = [0, 1, 2, 5]
1254 >>> _recombine_skipped(tokens, skipped_idxs)
1255 ["foo bar", "baz"]
1256 """
1257 skipped_tokens = []
1258 for i, idx in enumerate(sorted(skipped_idxs)):
1259 if i > 0 and idx - 1 == skipped_idxs[i - 1]:
1260 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
1261 else:
1262 skipped_tokens.append(tokens[idx])
1264 return skipped_tokens
1267DEFAULTPARSER = parser()
1270def parse(timestr, parserinfo=None, **kwargs):
1271 """
1273 Parse a string in one of the supported formats, using the
1274 ``parserinfo`` parameters.
1276 :param timestr:
1277 A string containing a date/time stamp.
1279 :param parserinfo:
1280 A :class:`parserinfo` object containing parameters for the parser.
1281 If ``None``, the default arguments to the :class:`parserinfo`
1282 constructor are used.
1284 The ``**kwargs`` parameter takes the following keyword arguments:
1286 :param default:
1287 The default datetime object, if this is a datetime object and not
1288 ``None``, elements specified in ``timestr`` replace elements in the
1289 default object.
1291 :param ignoretz:
1292 If set ``True``, time zones in parsed strings are ignored and a naive
1293 :class:`datetime` object is returned.
1295 :param tzinfos:
1296 Additional time zone names / aliases which may be present in the
1297 string. This argument maps time zone names (and optionally offsets
1298 from those time zones) to time zones. This parameter can be a
1299 dictionary with timezone aliases mapping time zone names to time
1300 zones or a function taking two parameters (``tzname`` and
1301 ``tzoffset``) and returning a time zone.
1303 The timezones to which the names are mapped can be an integer
1304 offset from UTC in seconds or a :class:`tzinfo` object.
1306 .. doctest::
1307 :options: +NORMALIZE_WHITESPACE
1309 >>> from dateutil.parser import parse
1310 >>> from dateutil.tz import gettz
1311 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
1312 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
1313 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
1314 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
1315 datetime.datetime(2012, 1, 19, 17, 21,
1316 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
1318 This parameter is ignored if ``ignoretz`` is set.
1320 :param dayfirst:
1321 Whether to interpret the first value in an ambiguous 3-integer date
1322 (e.g. 01/05/09) as the day (``True``) or month (``False``). If
1323 ``yearfirst`` is set to ``True``, this distinguishes between YDM and
1324 YMD. If set to ``None``, this value is retrieved from the current
1325 :class:`parserinfo` object (which itself defaults to ``False``).
1327 :param yearfirst:
1328 Whether to interpret the first value in an ambiguous 3-integer date
1329 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to
1330 be the year, otherwise the last number is taken to be the year. If
1331 this is set to ``None``, the value is retrieved from the current
1332 :class:`parserinfo` object (which itself defaults to ``False``).
1334 :param fuzzy:
1335 Whether to allow fuzzy parsing, allowing for string like "Today is
1336 January 1, 2047 at 8:21:00AM".
1338 :param fuzzy_with_tokens:
1339 If ``True``, ``fuzzy`` is automatically set to True, and the parser
1340 will return a tuple where the first element is the parsed
1341 :class:`datetime.datetime` datetimestamp and the second element is
1342 a tuple containing the portions of the string which were ignored:
1344 .. doctest::
1346 >>> from dateutil.parser import parse
1347 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
1348 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
1350 :return:
1351 Returns a :class:`datetime.datetime` object or, if the
1352 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
1353 first element being a :class:`datetime.datetime` object, the second
1354 a tuple containing the fuzzy tokens.
1356 :raises ParserError:
1357 Raised for invalid or unknown string formats, if the provided
1358 :class:`tzinfo` is not in a valid format, or if an invalid date would
1359 be created.
1361 :raises OverflowError:
1362 Raised if the parsed date exceeds the largest valid C integer on
1363 your system.
1364 """
1365 if parserinfo:
1366 return parser(parserinfo).parse(timestr, **kwargs)
1367 else:
1368 return DEFAULTPARSER.parse(timestr, **kwargs)
1371class _tzparser(object):
1373 class _result(_resultbase):
1375 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
1376 "start", "end"]
1378 class _attr(_resultbase):
1379 __slots__ = ["month", "week", "weekday",
1380 "yday", "jyday", "day", "time"]
1382 def __repr__(self):
1383 return self._repr("")
1385 def __init__(self):
1386 _resultbase.__init__(self)
1387 self.start = self._attr()
1388 self.end = self._attr()
1390 def parse(self, tzstr):
1391 res = self._result()
1392 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
1393 used_idxs = list()
1394 try:
1396 len_l = len(l)
1398 i = 0
1399 while i < len_l:
1400 # BRST+3[BRDT[+2]]
1401 j = i
1402 while j < len_l and not [x for x in l[j]
1403 if x in "0123456789:,-+"]:
1404 j += 1
1405 if j != i:
1406 if not res.stdabbr:
1407 offattr = "stdoffset"
1408 res.stdabbr = "".join(l[i:j])
1409 else:
1410 offattr = "dstoffset"
1411 res.dstabbr = "".join(l[i:j])
1413 for ii in range(j):
1414 used_idxs.append(ii)
1415 i = j
1416 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
1417 "0123456789")):
1418 if l[i] in ('+', '-'):
1419 # Yes, that's right. See the TZ variable
1420 # documentation.
1421 signal = (1, -1)[l[i] == '+']
1422 used_idxs.append(i)
1423 i += 1
1424 else:
1425 signal = -1
1426 len_li = len(l[i])
1427 if len_li == 4:
1428 # -0300
1429 setattr(res, offattr, (int(l[i][:2]) * 3600 +
1430 int(l[i][2:]) * 60) * signal)
1431 elif i + 1 < len_l and l[i + 1] == ':':
1432 # -03:00
1433 setattr(res, offattr,
1434 (int(l[i]) * 3600 +
1435 int(l[i + 2]) * 60) * signal)
1436 used_idxs.append(i)
1437 i += 2
1438 elif len_li <= 2:
1439 # -[0]3
1440 setattr(res, offattr,
1441 int(l[i][:2]) * 3600 * signal)
1442 else:
1443 return None
1444 used_idxs.append(i)
1445 i += 1
1446 if res.dstabbr:
1447 break
1448 else:
1449 break
1452 if i < len_l:
1453 for j in range(i, len_l):
1454 if l[j] == ';':
1455 l[j] = ','
1457 assert l[i] == ','
1459 i += 1
1461 if i >= len_l:
1462 pass
1463 elif (8 <= l.count(',') <= 9 and
1464 not [y for x in l[i:] if x != ','
1465 for y in x if y not in "0123456789+-"]):
1466 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
1467 for x in (res.start, res.end):
1468 x.month = int(l[i])
1469 used_idxs.append(i)
1470 i += 2
1471 if l[i] == '-':
1472 value = int(l[i + 1]) * -1
1473 used_idxs.append(i)
1474 i += 1
1475 else:
1476 value = int(l[i])
1477 used_idxs.append(i)
1478 i += 2
1479 if value:
1480 x.week = value
1481 x.weekday = (int(l[i]) - 1) % 7
1482 else:
1483 x.day = int(l[i])
1484 used_idxs.append(i)
1485 i += 2
1486 x.time = int(l[i])
1487 used_idxs.append(i)
1488 i += 2
1489 if i < len_l:
1490 if l[i] in ('-', '+'):
1491 signal = (-1, 1)[l[i] == "+"]
1492 used_idxs.append(i)
1493 i += 1
1494 else:
1495 signal = 1
1496 used_idxs.append(i)
1497 res.dstoffset = (res.stdoffset + int(l[i]) * signal)
1499 # This was a made-up format that is not in normal use
1500 warn(('Parsed time zone "%s"' % tzstr) +
1501 'is in a non-standard dateutil-specific format, which ' +
1502 'is now deprecated; support for parsing this format ' +
1503 'will be removed in future versions. It is recommended ' +
1504 'that you switch to a standard format like the GNU ' +
1505 'TZ variable format.', tz.DeprecatedTzFormatWarning)
1506 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
1507 not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
1508 '.', '-', ':')
1509 for y in x if y not in "0123456789"]):
1510 for x in (res.start, res.end):
1511 if l[i] == 'J':
1512 # non-leap year day (1 based)
1513 used_idxs.append(i)
1514 i += 1
1515 x.jyday = int(l[i])
1516 elif l[i] == 'M':
1517 # month[-.]week[-.]weekday
1518 used_idxs.append(i)
1519 i += 1
1520 x.month = int(l[i])
1521 used_idxs.append(i)
1522 i += 1
1523 assert l[i] in ('-', '.')
1524 used_idxs.append(i)
1525 i += 1
1526 x.week = int(l[i])
1527 if x.week == 5:
1528 x.week = -1
1529 used_idxs.append(i)
1530 i += 1
1531 assert l[i] in ('-', '.')
1532 used_idxs.append(i)
1533 i += 1
1534 x.weekday = (int(l[i]) - 1) % 7
1535 else:
1536 # year day (zero based)
1537 x.yday = int(l[i]) + 1
1539 used_idxs.append(i)
1540 i += 1
1542 if i < len_l and l[i] == '/':
1543 used_idxs.append(i)
1544 i += 1
1545 # start time
1546 len_li = len(l[i])
1547 if len_li == 4:
1548 # -0300
1549 x.time = (int(l[i][:2]) * 3600 +
1550 int(l[i][2:]) * 60)
1551 elif i + 1 < len_l and l[i + 1] == ':':
1552 # -03:00
1553 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
1554 used_idxs.append(i)
1555 i += 2
1556 if i + 1 < len_l and l[i + 1] == ':':
1557 used_idxs.append(i)
1558 i += 2
1559 x.time += int(l[i])
1560 elif len_li <= 2:
1561 # -[0]3
1562 x.time = (int(l[i][:2]) * 3600)
1563 else:
1564 return None
1565 used_idxs.append(i)
1566 i += 1
1568 assert i == len_l or l[i] == ','
1570 i += 1
1572 assert i >= len_l
1574 except (IndexError, ValueError, AssertionError):
1575 return None
1577 unused_idxs = set(range(len_l)).difference(used_idxs)
1578 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
1579 return res
1582DEFAULTTZPARSER = _tzparser()
1585def _parsetz(tzstr):
1586 return DEFAULTTZPARSER.parse(tzstr)
1589class ParserError(ValueError):
1590 """Exception subclass used for any failure to parse a datetime string.
1592 This is a subclass of :py:exc:`ValueError`, and should be raised any time
1593 earlier versions of ``dateutil`` would have raised ``ValueError``.
1595 .. versionadded:: 2.8.1
1596 """
1597 def __str__(self):
1598 try:
1599 return self.args[0] % self.args[1:]
1600 except (TypeError, IndexError):
1601 return super(ParserError, self).__str__()
1603 def __repr__(self):
1604 args = ", ".join("'%s'" % arg for arg in self.args)
1605 return "%s(%s)" % (self.__class__.__name__, args)
1608class UnknownTimezoneWarning(RuntimeWarning):
1609 """Raised when the parser finds a timezone it cannot parse into a tzinfo.
1611 .. versionadded:: 2.7.0
1612 """
1613# vim:ts=4:sw=4:et