Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/tools/datetimes.py: 14%

1from __future__ import annotations

3from collections import abc

4from datetime import datetime

5from functools import partial

6from itertools import islice

7from typing import (

8 TYPE_CHECKING,

9 Callable,

10 Hashable,

11 List,

12 Tuple,

13 TypedDict,

14 Union,

15 cast,

16 overload,

17)

18import warnings

20import numpy as np

22from pandas._libs import tslib

23from pandas._libs.tslibs import (

24 OutOfBoundsDatetime,

25 Timedelta,

26 Timestamp,

27 iNaT,

28 nat_strings,

29 parsing,

30 timezones,

31)

32from pandas._libs.tslibs.parsing import (

33 DateParseError,

34 format_is_iso,

35 guess_datetime_format,

36)

37from pandas._libs.tslibs.strptime import array_strptime

38from pandas._typing import (

39 AnyArrayLike,

40 ArrayLike,

41 DateTimeErrorChoices,

42 Timezone,

43 npt,

44)

45from pandas.util._exceptions import find_stack_level

47from pandas.core.dtypes.common import (

48 ensure_object,

49 is_datetime64_dtype,

50 is_datetime64_ns_dtype,

51 is_datetime64tz_dtype,

52 is_float,

53 is_integer,

54 is_integer_dtype,

55 is_list_like,

56 is_numeric_dtype,

57 is_scalar,

58)

59from pandas.core.dtypes.generic import (

60 ABCDataFrame,

61 ABCSeries,

62)

63from pandas.core.dtypes.missing import notna

65from pandas.arrays import (

66 DatetimeArray,

67 IntegerArray,

68)

69from pandas.core import algorithms

70from pandas.core.algorithms import unique

71from pandas.core.arrays.base import ExtensionArray

72from pandas.core.arrays.datetimes import (

73 maybe_convert_dtype,

74 objects_to_datetime64ns,

75 tz_to_dtype,

76)

77from pandas.core.construction import extract_array

78from pandas.core.indexes.base import Index

79from pandas.core.indexes.datetimes import DatetimeIndex

81if TYPE_CHECKING: 81 ↛ 82line 81 didn't jump to line 82, because the condition on line 81 was never true

82 from pandas._libs.tslibs.nattype import NaTType

83 from pandas._libs.tslibs.timedeltas import UnitChoices

85 from pandas import (

86 DataFrame,

87 Series,

88 )

90# ---------------------------------------------------------------------

91# types used in annotations

93ArrayConvertible = Union[List, Tuple, AnyArrayLike]

94Scalar = Union[float, str]

95DatetimeScalar = Union[Scalar, datetime]

97DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible]

99DatetimeDictArg = Union[List[Scalar], Tuple[Scalar, ...], AnyArrayLike]

100

101

102class YearMonthDayDict(TypedDict, total=True):

103 year: DatetimeDictArg

104 month: DatetimeDictArg

105 day: DatetimeDictArg

106

107

108class FulldatetimeDict(YearMonthDayDict, total=False):

109 hour: DatetimeDictArg

110 hours: DatetimeDictArg

111 minute: DatetimeDictArg

112 minutes: DatetimeDictArg

113 second: DatetimeDictArg

114 seconds: DatetimeDictArg

115 ms: DatetimeDictArg

116 us: DatetimeDictArg

117 ns: DatetimeDictArg

118

119

120DictConvertible = Union[FulldatetimeDict, "DataFrame"]

121start_caching_at = 50

122

123

124# ---------------------------------------------------------------------

125

126

127def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False):

128 # Try to guess the format based on the first non-NaN element

129 non_nan_elements = notna(arr).nonzero()[0]

130 if len(non_nan_elements):

131 return guess_datetime_format(arr[non_nan_elements[0]], dayfirst=dayfirst)

132

133

134def should_cache(

135 arg: ArrayConvertible, unique_share: float = 0.7, check_count: int | None = None

136) -> bool:

137 """

138 Decides whether to do caching.

139

140 If the percent of unique elements among `check_count` elements less

141 than `unique_share * 100` then we can do caching.

142

143 Parameters

144 ----------

145 arg: listlike, tuple, 1-d array, Series

146 unique_share: float, default=0.7, optional

147 0 < unique_share < 1

148 check_count: int, optional

149 0 <= check_count <= len(arg)

150

151 Returns

152 -------

153 do_caching: bool

154

155 Notes

156 -----

157 By default for a sequence of less than 50 items in size, we don't do

158 caching; for the number of elements less than 5000, we take ten percent of

159 all elements to check for a uniqueness share; if the sequence size is more

160 than 5000, then we check only the first 500 elements.

161 All constants were chosen empirically by.

162 """

163 do_caching = True

164

165 # default realization

166 if check_count is None:

167 # in this case, the gain from caching is negligible

168 if len(arg) <= start_caching_at:

169 return False

170

171 if len(arg) <= 5000:

172 check_count = len(arg) // 10

173 else:

174 check_count = 500

175 else:

176 assert (

177 0 <= check_count <= len(arg)

178 ), "check_count must be in next bounds: [0; len(arg)]"

179 if check_count == 0:

180 return False

181

182 assert 0 < unique_share < 1, "unique_share must be in next bounds: (0; 1)"

183

184 try:

185 # We can't cache if the items are not hashable.

186 unique_elements = set(islice(arg, check_count))

187 except TypeError:

188 return False

189 if len(unique_elements) > check_count * unique_share:

190 do_caching = False

191 return do_caching

192

193

194def _maybe_cache(

195 arg: ArrayConvertible,

196 format: str | None,

197 cache: bool,

198 convert_listlike: Callable,

199) -> Series:

200 """

201 Create a cache of unique dates from an array of dates

202

203 Parameters

204 ----------

205 arg : listlike, tuple, 1-d array, Series

206 format : string

207 Strftime format to parse time

208 cache : bool

209 True attempts to create a cache of converted values

210 convert_listlike : function

211 Conversion function to apply on dates

212

213 Returns

214 -------

215 cache_array : Series

216 Cache of converted, unique dates. Can be empty

217 """

218 from pandas import Series

219

220 cache_array = Series(dtype=object)

221

222 if cache:

223 # Perform a quicker unique check

224 if not should_cache(arg):

225 return cache_array

226

227 unique_dates = unique(arg)

228 if len(unique_dates) < len(arg):

229 cache_dates = convert_listlike(unique_dates, format)

230 # GH#45319

231 try:

232 cache_array = Series(cache_dates, index=unique_dates)

233 except OutOfBoundsDatetime:

234 return cache_array

235 # GH#39882 and GH#35888 in case of None and NaT we get duplicates

236 if not cache_array.index.is_unique:

237 cache_array = cache_array[~cache_array.index.duplicated()]

238 return cache_array

239

240

241def _box_as_indexlike(

242 dt_array: ArrayLike, utc: bool | None = None, name: Hashable = None

243) -> Index:

244 """

245 Properly boxes the ndarray of datetimes to DatetimeIndex

246 if it is possible or to generic Index instead

247

248 Parameters

249 ----------

250 dt_array: 1-d array

251 Array of datetimes to be wrapped in an Index.

252 tz : object

253 None or 'utc'

254 name : string, default None

255 Name for a resulting index

256

257 Returns

258 -------

259 result : datetime of converted dates

260 - DatetimeIndex if convertible to sole datetime64 type

261 - general Index otherwise

262 """

263

264 if is_datetime64_dtype(dt_array):

265 tz = "utc" if utc else None

266 return DatetimeIndex(dt_array, tz=tz, name=name)

267 return Index(dt_array, name=name, dtype=dt_array.dtype)

268

269

270def _convert_and_box_cache(

271 arg: DatetimeScalarOrArrayConvertible,

272 cache_array: Series,

273 name: Hashable | None = None,

274) -> Index:

275 """

276 Convert array of dates with a cache and wrap the result in an Index.

277

278 Parameters

279 ----------

280 arg : integer, float, string, datetime, list, tuple, 1-d array, Series

281 cache_array : Series

282 Cache of converted, unique dates

283 name : string, default None

284 Name for a DatetimeIndex

285

286 Returns

287 -------

288 result : Index-like of converted dates

289 """

290 from pandas import Series

291

292 result = Series(arg).map(cache_array)

293 return _box_as_indexlike(result._values, utc=None, name=name)

294

295

296def _return_parsed_timezone_results(result: np.ndarray, timezones, tz, name) -> Index:

297 """

298 Return results from array_strptime if a %z or %Z directive was passed.

299

300 Parameters

301 ----------

302 result : ndarray[int64]

303 int64 date representations of the dates

304 timezones : ndarray

305 pytz timezone objects

306 tz : object

307 None or pytz timezone object

308 name : string, default None

309 Name for a DatetimeIndex

310

311 Returns

312 -------

313 tz_result : Index-like of parsed dates with timezone

314 """

315 tz_results = np.array(

316 [Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)]

317 )

318 if tz is not None:

319 # Convert to the same tz

320 tz_results = np.array([tz_result.tz_convert(tz) for tz_result in tz_results])

321

322 return Index(tz_results, name=name)

323

324

325def _convert_listlike_datetimes(

326 arg,

327 format: str | None,

328 name: Hashable = None,

329 tz: Timezone | None = None,

330 unit: str | None = None,

331 errors: str = "raise",

332 infer_datetime_format: bool = False,

333 dayfirst: bool | None = None,

334 yearfirst: bool | None = None,

335 exact: bool = True,

336):

337 """

338 Helper function for to_datetime. Performs the conversions of 1D listlike

339 of dates

340

341 Parameters

342 ----------

343 arg : list, tuple, ndarray, Series, Index

344 date to be parsed

345 name : object

346 None or string for the Index name

347 tz : object

348 None or 'utc'

349 unit : str

350 None or string of the frequency of the passed data

351 errors : str

352 error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'

353 infer_datetime_format : bool, default False

354 inferring format behavior from to_datetime

355 dayfirst : bool

356 dayfirst parsing behavior from to_datetime

357 yearfirst : bool

358 yearfirst parsing behavior from to_datetime

359 exact : bool, default True

360 exact format matching behavior from to_datetime

361

362 Returns

363 -------

364 Index-like of parsed dates

365 """

366 if isinstance(arg, (list, tuple)):

367 arg = np.array(arg, dtype="O")

368

369 arg_dtype = getattr(arg, "dtype", None)

370 # these are shortcutable

371 if is_datetime64tz_dtype(arg_dtype):

372 if not isinstance(arg, (DatetimeArray, DatetimeIndex)):

373 return DatetimeIndex(arg, tz=tz, name=name)

374 if tz == "utc":

375 arg = arg.tz_convert(None).tz_localize(tz)

376 return arg

377

378 elif is_datetime64_ns_dtype(arg_dtype):

379 if not isinstance(arg, (DatetimeArray, DatetimeIndex)):

380 try:

381 return DatetimeIndex(arg, tz=tz, name=name)

382 except ValueError:

383 pass

384 elif tz:

385 # DatetimeArray, DatetimeIndex

386 return arg.tz_localize(tz)

387

388 return arg

389

390 elif unit is not None:

391 if format is not None:

392 raise ValueError("cannot specify both format and unit")

393 return _to_datetime_with_unit(arg, unit, name, tz, errors)

394 elif getattr(arg, "ndim", 1) > 1:

395 raise TypeError(

396 "arg must be a string, datetime, list, tuple, 1-d array, or Series"

397 )

398

399 # warn if passing timedelta64, raise for PeriodDtype

400 # NB: this must come after unit transformation

401 orig_arg = arg

402 try:

403 arg, _ = maybe_convert_dtype(arg, copy=False, tz=timezones.maybe_get_tz(tz))

404 except TypeError:

405 if errors == "coerce":

406 npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))

407 return DatetimeIndex(npvalues, name=name)

408 elif errors == "ignore":

409 idx = Index(arg, name=name)

410 return idx

411 raise

412

413 arg = ensure_object(arg)

414 require_iso8601 = False

415

416 if infer_datetime_format and format is None:

417 format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

418

419 if format is not None:

420 # There is a special fast-path for iso8601 formatted

421 # datetime strings, so in those cases don't use the inferred

422 # format because this path makes process slower in this

423 # special case

424 format_is_iso8601 = format_is_iso(format)

425 if format_is_iso8601:

426 require_iso8601 = not infer_datetime_format

427 format = None

428

429 if format is not None:

430 res = _to_datetime_with_format(

431 arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format

432 )

433 if res is not None:

434 return res

435

436 assert format is None or infer_datetime_format

437 utc = tz == "utc"

438 result, tz_parsed = objects_to_datetime64ns(

439 arg,

440 dayfirst=dayfirst,

441 yearfirst=yearfirst,

442 utc=utc,

443 errors=errors,

444 require_iso8601=require_iso8601,

445 allow_object=True,

446 )

447

448 if tz_parsed is not None:

449 # We can take a shortcut since the datetime64 numpy array

450 # is in UTC

451 dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))

452 return DatetimeIndex._simple_new(dta, name=name)

453

454 utc = tz == "utc"

455 return _box_as_indexlike(result, utc=utc, name=name)

456

457

458def _array_strptime_with_fallback(

459 arg,

460 name,

461 tz,

462 fmt: str,

463 exact: bool,

464 errors: str,

465 infer_datetime_format: bool,

466) -> Index | None:

467 """

468 Call array_strptime, with fallback behavior depending on 'errors'.

469 """

470 utc = tz == "utc"

471

472 try:

473 result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors)

474 except OutOfBoundsDatetime:

475 if errors == "raise":

476 raise

477 elif errors == "coerce":

478 result = np.empty(arg.shape, dtype="M8[ns]")

479 iresult = result.view("i8")

480 iresult.fill(iNaT)

481 else:

482 result = arg

483 except ValueError:

484 # if fmt was inferred, try falling back

485 # to array_to_datetime - terminate here

486 # for specified formats

487 if not infer_datetime_format:

488 if errors == "raise":

489 raise

490 elif errors == "coerce":

491 result = np.empty(arg.shape, dtype="M8[ns]")

492 iresult = result.view("i8")

493 iresult.fill(iNaT)

494 else:

495 result = arg

496 else:

497 # Indicates to the caller to fallback to objects_to_datetime64ns

498 return None

499 else:

500 if "%Z" in fmt or "%z" in fmt:

501 return _return_parsed_timezone_results(result, timezones, tz, name)

502

503 return _box_as_indexlike(result, utc=utc, name=name)

504

505

506def _to_datetime_with_format(

507 arg,

508 orig_arg,

509 name,

510 tz,

511 fmt: str,

512 exact: bool,

513 errors: str,

514 infer_datetime_format: bool,

515) -> Index | None:

516 """

517 Try parsing with the given format, returning None on failure.

518 """

519 result = None

520

521 # shortcut formatting here

522 if fmt == "%Y%m%d":

523 # pass orig_arg as float-dtype may have been converted to

524 # datetime64[ns]

525 orig_arg = ensure_object(orig_arg)

526 try:

527 # may return None without raising

528 result = _attempt_YYYYMMDD(orig_arg, errors=errors)

529 except (ValueError, TypeError, OutOfBoundsDatetime) as err:

530 raise ValueError(

531 "cannot convert the input to '%Y%m%d' date format"

532 ) from err

533 if result is not None:

534 utc = tz == "utc"

535 return _box_as_indexlike(result, utc=utc, name=name)

536

537 # fallback

538 res = _array_strptime_with_fallback(

539 arg, name, tz, fmt, exact, errors, infer_datetime_format

540 )

541 return res

542

543

544def _to_datetime_with_unit(arg, unit, name, tz, errors: str) -> Index:

545 """

546 to_datetime specalized to the case where a 'unit' is passed.

547 """

548 arg = extract_array(arg, extract_numpy=True)

549

550 # GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime

551 # because it expects an ndarray argument

552 if isinstance(arg, IntegerArray):

553 arr = arg.astype(f"datetime64[{unit}]")

554 tz_parsed = None

555 else:

556 arg = np.asarray(arg)

557 arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)

558

559 if errors == "ignore":

560 # Index constructor _may_ infer to DatetimeIndex

561 result = Index._with_infer(arr, name=name)

562 else:

563 result = DatetimeIndex(arr, name=name)

564

565 if not isinstance(result, DatetimeIndex):

566 return result

567

568 # GH#23758: We may still need to localize the result with tz

569 # GH#25546: Apply tz_parsed first (from arg), then tz (from caller)

570 # result will be naive but in UTC

571 result = result.tz_localize("UTC").tz_convert(tz_parsed)

572

573 if tz is not None:

574 if result.tz is None:

575 result = result.tz_localize(tz)

576 else:

577 result = result.tz_convert(tz)

578 return result

579

580

581def _adjust_to_origin(arg, origin, unit):

582 """

583 Helper function for to_datetime.

584 Adjust input argument to the specified origin

585

586 Parameters

587 ----------

588 arg : list, tuple, ndarray, Series, Index

589 date to be adjusted

590 origin : 'julian' or Timestamp

591 origin offset for the arg

592 unit : str

593 passed unit from to_datetime, must be 'D'

594

595 Returns

596 -------

597 ndarray or scalar of adjusted date(s)

598 """

599 if origin == "julian":

600 original = arg

601 j0 = Timestamp(0).to_julian_date()

602 if unit != "D":

603 raise ValueError("unit must be 'D' for origin='julian'")

604 try:

605 arg = arg - j0

606 except TypeError as err:

607 raise ValueError(

608 "incompatible 'arg' type for given 'origin'='julian'"

609 ) from err

610

611 # preemptively check this for a nice range

612 j_max = Timestamp.max.to_julian_date() - j0

613 j_min = Timestamp.min.to_julian_date() - j0

614 if np.any(arg > j_max) or np.any(arg < j_min):

615 raise OutOfBoundsDatetime(

616 f"{original} is Out of Bounds for origin='julian'"

617 )

618 else:

619 # arg must be numeric

620 if not (

621 (is_scalar(arg) and (is_integer(arg) or is_float(arg)))

622 or is_numeric_dtype(np.asarray(arg))

623 ):

624 raise ValueError(

625 f"'{arg}' is not compatible with origin='{origin}'; "

626 "it must be numeric with a unit specified"

627 )

628

629 # we are going to offset back to unix / epoch time

630 try:

631 offset = Timestamp(origin)

632 except OutOfBoundsDatetime as err:

633 raise OutOfBoundsDatetime(f"origin {origin} is Out of Bounds") from err

634 except ValueError as err:

635 raise ValueError(

636 f"origin {origin} cannot be converted to a Timestamp"

637 ) from err

638

639 if offset.tz is not None:

640 raise ValueError(f"origin offset {offset} must be tz-naive")

641 td_offset = offset - Timestamp(0)

642

643 # convert the offset to the unit of the arg

644 # this should be lossless in terms of precision

645 ioffset = td_offset // Timedelta(1, unit=unit)

646

647 # scalars & ndarray-like can handle the addition

648 if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)):

649 arg = np.asarray(arg)

650 arg = arg + ioffset

651 return arg

652

653

654@overload

655def to_datetime(

656 arg: DatetimeScalar,

657 errors: DateTimeErrorChoices = ...,

658 dayfirst: bool = ...,

659 yearfirst: bool = ...,

660 utc: bool | None = ...,

661 format: str | None = ...,

662 exact: bool = ...,

663 unit: str | None = ...,

664 infer_datetime_format: bool = ...,

665 origin=...,

666 cache: bool = ...,

667) -> Timestamp:

668 ...

669

670

671@overload

672def to_datetime(

673 arg: Series | DictConvertible,

674 errors: DateTimeErrorChoices = ...,

675 dayfirst: bool = ...,

676 yearfirst: bool = ...,

677 utc: bool | None = ...,

678 format: str | None = ...,

679 exact: bool = ...,

680 unit: str | None = ...,

681 infer_datetime_format: bool = ...,

682 origin=...,

683 cache: bool = ...,

684) -> Series:

685 ...

686

687

688@overload

689def to_datetime(

690 arg: list | tuple | Index | ArrayLike,

691 errors: DateTimeErrorChoices = ...,

692 dayfirst: bool = ...,

693 yearfirst: bool = ...,

694 utc: bool | None = ...,

695 format: str | None = ...,

696 exact: bool = ...,

697 unit: str | None = ...,

698 infer_datetime_format: bool = ...,

699 origin=...,

700 cache: bool = ...,

701) -> DatetimeIndex:

702 ...

703

704

705def to_datetime(

706 arg: DatetimeScalarOrArrayConvertible | DictConvertible,

707 errors: DateTimeErrorChoices = "raise",

708 dayfirst: bool = False,

709 yearfirst: bool = False,

710 utc: bool | None = None,

711 format: str | None = None,

712 exact: bool = True,

713 unit: str | None = None,

714 infer_datetime_format: bool = False,

715 origin="unix",

716 cache: bool = True,

717) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None:

718 """

719 Convert argument to datetime.

720

721 This function converts a scalar, array-like, :class:`Series` or

722 :class:`DataFrame`/dict-like to a pandas datetime object.

723

724 Parameters

725 ----------

726 arg : int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like

727 The object to convert to a datetime. If a :class:`DataFrame` is provided, the

728 method expects minimally the following columns: :const:`"year"`,

729 :const:`"month"`, :const:`"day"`.

730 errors : {'ignore', 'raise', 'coerce'}, default 'raise'

731 - If :const:`'raise'`, then invalid parsing will raise an exception.

732 - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`.

733 - If :const:`'ignore'`, then invalid parsing will return the input.

734 dayfirst : bool, default False

735 Specify a date parse order if `arg` is str or is list-like.

736 If :const:`True`, parses dates with the day first, e.g. :const:`"10/11/12"`

737 is parsed as :const:`2012-11-10`.

738

739 .. warning::

740

741 ``dayfirst=True`` is not strict, but will prefer to parse

742 with day first. If a delimited date string cannot be parsed in

743 accordance with the given `dayfirst` option, e.g.

744 ``to_datetime(['31-12-2021'])``, then a warning will be shown.

745

746 yearfirst : bool, default False

747 Specify a date parse order if `arg` is str or is list-like.

748

749 - If :const:`True` parses dates with the year first, e.g.

750 :const:`"10/11/12"` is parsed as :const:`2010-11-12`.

751 - If both `dayfirst` and `yearfirst` are :const:`True`, `yearfirst` is

752 preceded (same as :mod:`dateutil`).

753

754 .. warning::

755

756 ``yearfirst=True`` is not strict, but will prefer to parse

757 with year first.

758

759 utc : bool, default None

760 Control timezone-related parsing, localization and conversion.

761

762 - If :const:`True`, the function *always* returns a timezone-aware

763 UTC-localized :class:`Timestamp`, :class:`Series` or

764 :class:`DatetimeIndex`. To do this, timezone-naive inputs are

765 *localized* as UTC, while timezone-aware inputs are *converted* to UTC.

766

767 - If :const:`False` (default), inputs will not be coerced to UTC.

768 Timezone-naive inputs will remain naive, while timezone-aware ones

769 will keep their time offsets. Limitations exist for mixed

770 offsets (typically, daylight savings), see :ref:`Examples

771 <to_datetime_tz_examples>` section for details.

772

773 See also: pandas general documentation about `timezone conversion and

774 localization

775 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html

776 #time-zone-handling>`_.

777

778 format : str, default None

779 The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. Note that

780 :const:`"%f"` will parse all the way up to nanoseconds. See

781 `strftime documentation

782 <https://docs.python.org/3/library/datetime.html

783 #strftime-and-strptime-behavior>`_ for more information on choices.

784 exact : bool, default True

785 Control how `format` is used:

786

787 - If :const:`True`, require an exact `format` match.

788 - If :const:`False`, allow the `format` to match anywhere in the target

789 string.

790

791 unit : str, default 'ns'

792 The unit of the arg (D,s,ms,us,ns) denote the unit, which is an

793 integer or float number. This will be based off the origin.

794 Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate

795 the number of milliseconds to the unix epoch start.

796 infer_datetime_format : bool, default False

797 If :const:`True` and no `format` is given, attempt to infer the format

798 of the datetime strings based on the first non-NaN element,

799 and if it can be inferred, switch to a faster method of parsing them.

800 In some cases this can increase the parsing speed by ~5-10x.

801 origin : scalar, default 'unix'

802 Define the reference date. The numeric values would be parsed as number

803 of units (defined by `unit`) since this reference date.

804

805 - If :const:`'unix'` (or POSIX) time; origin is set to 1970-01-01.

806 - If :const:`'julian'`, unit must be :const:`'D'`, and origin is set to

807 beginning of Julian Calendar. Julian day number :const:`0` is assigned

808 to the day starting at noon on January 1, 4713 BC.

809 - If Timestamp convertible, origin is set to Timestamp identified by

810 origin.

811 cache : bool, default True

812 If :const:`True`, use a cache of unique, converted dates to apply the

813 datetime conversion. May produce significant speed-up when parsing

814 duplicate date strings, especially ones with timezone offsets. The cache

815 is only used when there are at least 50 values. The presence of

816 out-of-bounds values will render the cache unusable and may slow down

817 parsing.

818

819 .. versionchanged:: 0.25.0

820 changed default value from :const:`False` to :const:`True`.

821

822 Returns

823 -------

824 datetime

825 If parsing succeeded.

826 Return type depends on input (types in parenthesis correspond to

827 fallback in case of unsuccessful timezone or out-of-range timestamp

828 parsing):

829

830 - scalar: :class:`Timestamp` (or :class:`datetime.datetime`)

831 - array-like: :class:`DatetimeIndex` (or :class:`Series` with

832 :class:`object` dtype containing :class:`datetime.datetime`)

833 - Series: :class:`Series` of :class:`datetime64` dtype (or

834 :class:`Series` of :class:`object` dtype containing

835 :class:`datetime.datetime`)

836 - DataFrame: :class:`Series` of :class:`datetime64` dtype (or

837 :class:`Series` of :class:`object` dtype containing

838 :class:`datetime.datetime`)

839

840 Raises

841 ------

842 ParserError

843 When parsing a date from string fails.

844 ValueError

845 When another datetime conversion error happens. For example when one

846 of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or

847 when a Timezone-aware :class:`datetime.datetime` is found in an array-like

848 of mixed time offsets, and ``utc=False``.

849

850 See Also

851 --------

852 DataFrame.astype : Cast argument to a specified dtype.

853 to_timedelta : Convert argument to timedelta.

854 convert_dtypes : Convert dtypes.

855

856 Notes

857 -----

858

859 Many input types are supported, and lead to different output types:

860

861 - **scalars** can be int, float, str, datetime object (from stdlib :mod:`datetime`

862 module or :mod:`numpy`). They are converted to :class:`Timestamp` when

863 possible, otherwise they are converted to :class:`datetime.datetime`.

864 None/NaN/null scalars are converted to :const:`NaT`.

865

866 - **array-like** can contain int, float, str, datetime objects. They are

867 converted to :class:`DatetimeIndex` when possible, otherwise they are

868 converted to :class:`Index` with :class:`object` dtype, containing

869 :class:`datetime.datetime`. None/NaN/null entries are converted to

870 :const:`NaT` in both cases.

871

872 - **Series** are converted to :class:`Series` with :class:`datetime64`

873 dtype when possible, otherwise they are converted to :class:`Series` with

874 :class:`object` dtype, containing :class:`datetime.datetime`. None/NaN/null

875 entries are converted to :const:`NaT` in both cases.

876

877 - **DataFrame/dict-like** are converted to :class:`Series` with

878 :class:`datetime64` dtype. For each row a datetime is created from assembling

879 the various dataframe columns. Column keys can be common abbreviations

880 like [‘year’, ‘month’, ‘day’, ‘minute’, ‘second’, ‘ms’, ‘us’, ‘ns’]) or

881 plurals of the same.

882

883 The following causes are responsible for :class:`datetime.datetime` objects

884 being returned (possibly inside an :class:`Index` or a :class:`Series` with

885 :class:`object` dtype) instead of a proper pandas designated type

886 (:class:`Timestamp`, :class:`DatetimeIndex` or :class:`Series`

887 with :class:`datetime64` dtype):

888

889 - when any input element is before :const:`Timestamp.min` or after

890 :const:`Timestamp.max`, see `timestamp limitations

891 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html

892 #timeseries-timestamp-limits>`_.

893

894 - when ``utc=False`` (default) and the input is an array-like or

895 :class:`Series` containing mixed naive/aware datetime, or aware with mixed

896 time offsets. Note that this happens in the (quite frequent) situation when

897 the timezone has a daylight savings policy. In that case you may wish to

898 use ``utc=True``.

899

900 Examples

901 --------

902

903 **Handling various input formats**

904

905 Assembling a datetime from multiple columns of a :class:`DataFrame`. The keys

906 can be common abbreviations like ['year', 'month', 'day', 'minute', 'second',

907 'ms', 'us', 'ns']) or plurals of the same

908

909 >>> df = pd.DataFrame({'year': [2015, 2016],

910 ... 'month': [2, 3],

911 ... 'day': [4, 5]})

912 >>> pd.to_datetime(df)

913 0 2015-02-04

914 1 2016-03-05

915 dtype: datetime64[ns]

916

917 Passing ``infer_datetime_format=True`` can often-times speedup a parsing

918 if its not an ISO8601 format exactly, but in a regular format.

919

920 >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)

921 >>> s.head()

922 0 3/11/2000

923 1 3/12/2000

924 2 3/13/2000

925 3 3/11/2000

926 4 3/12/2000

927 dtype: object

928

929 >>> %timeit pd.to_datetime(s, infer_datetime_format=True) # doctest: +SKIP

930 100 loops, best of 3: 10.4 ms per loop

931

932 >>> %timeit pd.to_datetime(s, infer_datetime_format=False) # doctest: +SKIP

933 1 loop, best of 3: 471 ms per loop

934

935 Using a unix epoch time

936

937 >>> pd.to_datetime(1490195805, unit='s')

938 Timestamp('2017-03-22 15:16:45')

939 >>> pd.to_datetime(1490195805433502912, unit='ns')

940 Timestamp('2017-03-22 15:16:45.433502912')

941

942 .. warning:: For float arg, precision rounding might happen. To prevent

943 unexpected behavior use a fixed-width exact type.

944

945 Using a non-unix epoch origin

946

947 >>> pd.to_datetime([1, 2, 3], unit='D',

948 ... origin=pd.Timestamp('1960-01-01'))

949 DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'],

950 dtype='datetime64[ns]', freq=None)

951

952 **Non-convertible date/times**

953

954 If a date does not meet the `timestamp limitations

955 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html

956 #timeseries-timestamp-limits>`_, passing ``errors='ignore'``

957 will return the original input instead of raising any exception.

958

959 Passing ``errors='coerce'`` will force an out-of-bounds date to :const:`NaT`,

960 in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`.

961

962 >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore')

963 datetime.datetime(1300, 1, 1, 0, 0)

964 >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')

965 NaT

966

967 .. _to_datetime_tz_examples:

968

969 **Timezones and time offsets**

970

971 The default behaviour (``utc=False``) is as follows:

972

973 - Timezone-naive inputs are converted to timezone-naive :class:`DatetimeIndex`:

974

975 >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00:15'])

976 DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'],

977 dtype='datetime64[ns]', freq=None)

978

979 - Timezone-aware inputs *with constant time offset* are converted to

980 timezone-aware :class:`DatetimeIndex`:

981

982 >>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500'])

983 DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'],

984 dtype='datetime64[ns, pytz.FixedOffset(-300)]', freq=None)

985

986 - However, timezone-aware inputs *with mixed time offsets* (for example

987 issued from a timezone with daylight savings, such as Europe/Paris)

988 are **not successfully converted** to a :class:`DatetimeIndex`. Instead a

989 simple :class:`Index` containing :class:`datetime.datetime` objects is

990 returned:

991

992 >>> pd.to_datetime(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100'])

993 Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00],

994 dtype='object')

995

996 - A mix of timezone-aware and timezone-naive inputs is converted to

997 a timezone-aware :class:`DatetimeIndex` if the offsets of the timezone-aware

998 are constant:

999

1000 >>> from datetime import datetime

1001 >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])

1002 DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],

1003 dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)

1004

1005 |

1006

1007 Setting ``utc=True`` solves most of the above issues:

1008

1009 - Timezone-naive inputs are *localized* as UTC

1010

1011 >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True)

1012 DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'],

1013 dtype='datetime64[ns, UTC]', freq=None)

1014

1015 - Timezone-aware inputs are *converted* to UTC (the output represents the

1016 exact same datetime, but viewed from the UTC time offset `+00:00`).

1017

1018 >>> pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'],

1019 ... utc=True)

1020 DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'],

1021 dtype='datetime64[ns, UTC]', freq=None)

1022

1023 - Inputs can contain both naive and aware, string or datetime, the above

1024 rules still apply

1025

1026 >>> from datetime import timezone, timedelta

1027 >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 12:00 -0530',

1028 ... datetime(2020, 1, 1, 18),

1029 ... datetime(2020, 1, 1, 18,

1030 ... tzinfo=timezone(-timedelta(hours=1)))],

1031 ... utc=True)

1032 DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 17:30:00+00:00',

1033 '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],

1034 dtype='datetime64[ns, UTC]', freq=None)

1035 """

1036 if arg is None:

1037 return None

1038

1039 if origin != "unix":

1040 arg = _adjust_to_origin(arg, origin, unit)

1041

1042 tz = "utc" if utc else None

1043 convert_listlike = partial(

1044 _convert_listlike_datetimes,

1045 tz=tz,

1046 unit=unit,

1047 dayfirst=dayfirst,

1048 yearfirst=yearfirst,

1049 errors=errors,

1050 exact=exact,

1051 infer_datetime_format=infer_datetime_format,

1052 )

1053

1054 result: Timestamp | NaTType | Series | Index

1055

1056 if isinstance(arg, Timestamp):

1057 result = arg

1058 if tz is not None:

1059 if arg.tz is not None:

1060 result = arg.tz_convert(tz)

1061 else:

1062 result = arg.tz_localize(tz)

1063 elif isinstance(arg, ABCSeries):

1064 cache_array = _maybe_cache(arg, format, cache, convert_listlike)

1065 if not cache_array.empty:

1066 result = arg.map(cache_array)

1067 else:

1068 values = convert_listlike(arg._values, format)

1069 result = arg._constructor(values, index=arg.index, name=arg.name)

1070 elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):

1071 result = _assemble_from_unit_mappings(arg, errors, tz)

1072 elif isinstance(arg, Index):

1073 cache_array = _maybe_cache(arg, format, cache, convert_listlike)

1074 if not cache_array.empty:

1075 result = _convert_and_box_cache(arg, cache_array, name=arg.name)

1076 else:

1077 result = convert_listlike(arg, format, name=arg.name)

1078 elif is_list_like(arg):

1079 try:

1080 # error: Argument 1 to "_maybe_cache" has incompatible type

1081 # "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray,

1082 # ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...],

1083 # Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]"

1084 argc = cast(

1085 Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg

1086 )

1087 cache_array = _maybe_cache(argc, format, cache, convert_listlike)

1088 except OutOfBoundsDatetime:

1089 # caching attempts to create a DatetimeIndex, which may raise

1090 # an OOB. If that's the desired behavior, then just reraise...

1091 if errors == "raise":

1092 raise

1093 # ... otherwise, continue without the cache.

1094 from pandas import Series

1095

1096 cache_array = Series([], dtype=object) # just an empty array

1097 if not cache_array.empty:

1098 result = _convert_and_box_cache(argc, cache_array)

1099 else:

1100 result = convert_listlike(argc, format)

1101 else:

1102 result = convert_listlike(np.array([arg]), format)[0]

1103 if isinstance(arg, bool) and isinstance(result, np.bool_):

1104 result = bool(result) # TODO: avoid this kludge.

1105

1106 # error: Incompatible return value type (got "Union[Timestamp, NaTType,

1107 # Series, Index]", expected "Union[DatetimeIndex, Series, float, str,

1108 # NaTType, None]")

1109 return result # type: ignore[return-value]

1110

1111

1112# mappings for assembling units

1113_unit_map = {

1114 "year": "year",

1115 "years": "year",

1116 "month": "month",

1117 "months": "month",

1118 "day": "day",

1119 "days": "day",

1120 "hour": "h",

1121 "hours": "h",

1122 "minute": "m",

1123 "minutes": "m",

1124 "second": "s",

1125 "seconds": "s",

1126 "ms": "ms",

1127 "millisecond": "ms",

1128 "milliseconds": "ms",

1129 "us": "us",

1130 "microsecond": "us",

1131 "microseconds": "us",

1132 "ns": "ns",

1133 "nanosecond": "ns",

1134 "nanoseconds": "ns",

1135}

1136

1137

1138def _assemble_from_unit_mappings(arg, errors: DateTimeErrorChoices, tz):

1139 """

1140 assemble the unit specified fields from the arg (DataFrame)

1141 Return a Series for actual parsing

1142

1143 Parameters

1144 ----------

1145 arg : DataFrame

1146 errors : {'ignore', 'raise', 'coerce'}, default 'raise'

1147

1148 - If :const:`'raise'`, then invalid parsing will raise an exception

1149 - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`

1150 - If :const:`'ignore'`, then invalid parsing will return the input

1151 tz : None or 'utc'

1152

1153 Returns

1154 -------

1155 Series

1156 """

1157 from pandas import (

1158 DataFrame,

1159 to_numeric,

1160 to_timedelta,

1161 )

1162

1163 arg = DataFrame(arg)

1164 if not arg.columns.is_unique:

1165 raise ValueError("cannot assemble with duplicate keys")

1166

1167 # replace passed unit with _unit_map

1168 def f(value):

1169 if value in _unit_map:

1170 return _unit_map[value]

1171

1172 # m is case significant

1173 if value.lower() in _unit_map:

1174 return _unit_map[value.lower()]

1175

1176 return value

1177

1178 unit = {k: f(k) for k in arg.keys()}

1179 unit_rev = {v: k for k, v in unit.items()}

1180

1181 # we require at least Ymd

1182 required = ["year", "month", "day"]

1183 req = sorted(set(required) - set(unit_rev.keys()))

1184 if len(req):

1185 _required = ",".join(req)

1186 raise ValueError(

1187 "to assemble mappings requires at least that "

1188 f"[year, month, day] be specified: [{_required}] is missing"

1189 )

1190

1191 # keys we don't recognize

1192 excess = sorted(set(unit_rev.keys()) - set(_unit_map.values()))

1193 if len(excess):

1194 _excess = ",".join(excess)

1195 raise ValueError(

1196 f"extra keys have been passed to the datetime assemblage: [{_excess}]"

1197 )

1198

1199 def coerce(values):

1200 # we allow coercion to if errors allows

1201 values = to_numeric(values, errors=errors)

1202

1203 # prevent overflow in case of int8 or int16

1204 if is_integer_dtype(values):

1205 values = values.astype("int64", copy=False)

1206 return values

1207

1208 values = (

1209 coerce(arg[unit_rev["year"]]) * 10000

1210 + coerce(arg[unit_rev["month"]]) * 100

1211 + coerce(arg[unit_rev["day"]])

1212 )

1213 try:

1214 values = to_datetime(values, format="%Y%m%d", errors=errors, utc=tz)

1215 except (TypeError, ValueError) as err:

1216 raise ValueError(f"cannot assemble the datetimes: {err}") from err

1217

1218 units: list[UnitChoices] = ["h", "m", "s", "ms", "us", "ns"]

1219 for u in units:

1220 value = unit_rev.get(u)

1221 if value is not None and value in arg:

1222 try:

1223 values += to_timedelta(coerce(arg[value]), unit=u, errors=errors)

1224 except (TypeError, ValueError) as err:

1225 raise ValueError(

1226 f"cannot assemble the datetimes [{value}]: {err}"

1227 ) from err

1228 return values

1229

1230

1231def _attempt_YYYYMMDD(arg: npt.NDArray[np.object_], errors: str) -> np.ndarray | None:

1232 """

1233 try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,

1234 arg is a passed in as an object dtype, but could really be ints/strings

1235 with nan-like/or floats (e.g. with nan)

1236

1237 Parameters

1238 ----------

1239 arg : np.ndarray[object]

1240 errors : {'raise','ignore','coerce'}

1241 """

1242

1243 def calc(carg):

1244 # calculate the actual result

1245 carg = carg.astype(object, copy=False)

1246 parsed = parsing.try_parse_year_month_day(

1247 carg / 10000, carg / 100 % 100, carg % 100

1248 )

1249 return tslib.array_to_datetime(parsed, errors=errors)[0]

1250

1251 def calc_with_mask(carg, mask):

1252 result = np.empty(carg.shape, dtype="M8[ns]")

1253 iresult = result.view("i8")

1254 iresult[~mask] = iNaT

1255

1256 masked_result = calc(carg[mask].astype(np.float64).astype(np.int64))

1257 result[mask] = masked_result.astype("M8[ns]")

1258 return result

1259

1260 # try intlike / strings that are ints

1261 try:

1262 return calc(arg.astype(np.int64))

1263 except (ValueError, OverflowError, TypeError):

1264 pass

1265

1266 # a float with actual np.nan

1267 try:

1268 carg = arg.astype(np.float64)

1269 return calc_with_mask(carg, notna(carg))

1270 except (ValueError, OverflowError, TypeError):

1271 pass

1272

1273 # string with NaN-like

1274 try:

1275 # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected

1276 # "Union[Union[ExtensionArray, ndarray], Index, Series]"

1277 mask = ~algorithms.isin(arg, list(nat_strings)) # type: ignore[arg-type]

1278 return calc_with_mask(arg, mask)

1279 except (ValueError, OverflowError, TypeError):

1280 pass

1281

1282 return None

1283

1284

1285def to_time(arg, format=None, infer_time_format=False, errors="raise"):

1286 # GH#34145

1287 warnings.warn(

1288 "`to_time` has been moved, should be imported from pandas.core.tools.times. "

1289 "This alias will be removed in a future version.",

1290 FutureWarning,

1291 stacklevel=find_stack_level(),

1292 )

1293 from pandas.core.tools.times import to_time

1294

1295 return to_time(arg, format, infer_time_format, errors)

1296

1297

1298__all__ = [

1299 "DateParseError",

1300 "should_cache",

1301 "to_datetime",

1302 "to_time",

1303]