Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py: 16%

1from __future__ import annotations

3from datetime import (

4 datetime,

5 time,

6 timedelta,

7 tzinfo,

9from typing import (

10 TYPE_CHECKING,

11 Literal,

12 cast,

13)

14import warnings

16import numpy as np

18from pandas._libs import (

19 lib,

20 tslib,

21)

22from pandas._libs.tslibs import (

23 BaseOffset,

24 NaT,

25 NaTType,

26 Resolution,

27 Timestamp,

28 astype_overflowsafe,

29 fields,

30 get_resolution,

31 get_unit_from_dtype,

32 ints_to_pydatetime,

33 is_date_array_normalized,

34 is_supported_unit,

35 is_unitless,

36 normalize_i8_timestamps,

37 timezones,

38 to_offset,

39 tz_convert_from_utc,

40 tzconversion,

41)

42from pandas._typing import npt

43from pandas.errors import (

44 OutOfBoundsDatetime,

45 PerformanceWarning,

46)

47from pandas.util._exceptions import find_stack_level

48from pandas.util._validators import validate_inclusive

50from pandas.core.dtypes.astype import astype_dt64_to_dt64tz

51from pandas.core.dtypes.common import (

52 DT64NS_DTYPE,

53 INT64_DTYPE,

54 is_bool_dtype,

55 is_datetime64_any_dtype,

56 is_datetime64_dtype,

57 is_datetime64_ns_dtype,

58 is_datetime64tz_dtype,

59 is_dtype_equal,

60 is_extension_array_dtype,

61 is_float_dtype,

62 is_object_dtype,

63 is_period_dtype,

64 is_sparse,

65 is_string_dtype,

66 is_timedelta64_dtype,

67 pandas_dtype,

68)

69from pandas.core.dtypes.dtypes import DatetimeTZDtype

70from pandas.core.dtypes.missing import isna

72from pandas.core.arrays import datetimelike as dtl

73from pandas.core.arrays._ranges import generate_regular_range

74import pandas.core.common as com

76from pandas.tseries.frequencies import get_period_alias

77from pandas.tseries.offsets import (

78 BDay,

79 Day,

80 Tick,

81)

83if TYPE_CHECKING: 83 ↛ 85line 83 didn't jump to line 85, because the condition on line 83 was never true

85 from pandas import DataFrame

86 from pandas.core.arrays import (

87 PeriodArray,

88 TimedeltaArray,

89 )

91_midnight = time(0, 0)

94def tz_to_dtype(tz: tzinfo | None, unit: str = "ns"):

95 """

96 Return a datetime64[ns] dtype appropriate for the given timezone.

98 Parameters

99 ----------

100 tz : tzinfo or None

101 unit : str, default "ns"

102

103 Returns

104 -------

105 np.dtype or Datetime64TZDType

106 """

107 if tz is None:

108 return np.dtype(f"M8[{unit}]")

109 else:

110 return DatetimeTZDtype(tz=tz, unit=unit)

111

112

113def _field_accessor(name: str, field: str, docstring=None):

114 def f(self):

115 values = self._local_timestamps()

116

117 if field in self._bool_ops:

118 result: np.ndarray

119

120 if field.endswith(("start", "end")):

121 freq = self.freq

122 month_kw = 12

123 if freq:

124 kwds = freq.kwds

125 month_kw = kwds.get("startingMonth", kwds.get("month", 12))

126

127 result = fields.get_start_end_field(

128 values, field, self.freqstr, month_kw, reso=self._reso

129 )

130 else:

131 result = fields.get_date_field(values, field, reso=self._reso)

132

133 # these return a boolean by-definition

134 return result

135

136 if field in self._object_ops:

137 result = fields.get_date_name_field(values, field, reso=self._reso)

138 result = self._maybe_mask_results(result, fill_value=None)

139

140 else:

141 result = fields.get_date_field(values, field, reso=self._reso)

142 result = self._maybe_mask_results(

143 result, fill_value=None, convert="float64"

144 )

145

146 return result

147

148 f.__name__ = name

149 f.__doc__ = docstring

150 return property(f)

151

152

153class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):

154 """

155 Pandas ExtensionArray for tz-naive or tz-aware datetime data.

156

157 .. warning::

158

159 DatetimeArray is currently experimental, and its API may change

160 without warning. In particular, :attr:`DatetimeArray.dtype` is

161 expected to change to always be an instance of an ``ExtensionDtype``

162 subclass.

163

164 Parameters

165 ----------

166 values : Series, Index, DatetimeArray, ndarray

167 The datetime data.

168

169 For DatetimeArray `values` (or a Series or Index boxing one),

170 `dtype` and `freq` will be extracted from `values`.

171

172 dtype : numpy.dtype or DatetimeTZDtype

173 Note that the only NumPy dtype allowed is 'datetime64[ns]'.

174 freq : str or Offset, optional

175 The frequency.

176 copy : bool, default False

177 Whether to copy the underlying array of values.

178

179 Attributes

180 ----------

181 None

182

183 Methods

184 -------

185 None

186 """

187

188 _typ = "datetimearray"

189 _internal_fill_value = np.datetime64("NaT", "ns")

190 _recognized_scalars = (datetime, np.datetime64)

191 _is_recognized_dtype = is_datetime64_any_dtype

192 _infer_matches = ("datetime", "datetime64", "date")

193

194 @property

195 def _scalar_type(self) -> type[Timestamp]:

196 return Timestamp

197

198 # define my properties & methods for delegation

199 _bool_ops: list[str] = [

200 "is_month_start",

201 "is_month_end",

202 "is_quarter_start",

203 "is_quarter_end",

204 "is_year_start",

205 "is_year_end",

206 "is_leap_year",

207 ]

208 _object_ops: list[str] = ["freq", "tz"]

209 _field_ops: list[str] = [

210 "year",

211 "month",

212 "day",

213 "hour",

214 "minute",

215 "second",

216 "weekofyear",

217 "week",

218 "weekday",

219 "dayofweek",

220 "day_of_week",

221 "dayofyear",

222 "day_of_year",

223 "quarter",

224 "days_in_month",

225 "daysinmonth",

226 "microsecond",

227 "nanosecond",

228 ]

229 _other_ops: list[str] = ["date", "time", "timetz"]

230 _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _other_ops

231 _datetimelike_methods: list[str] = [

232 "to_period",

233 "tz_localize",

234 "tz_convert",

235 "normalize",

236 "strftime",

237 "round",

238 "floor",

239 "ceil",

240 "month_name",

241 "day_name",

242 ]

243

244 # ndim is inherited from ExtensionArray, must exist to ensure

245 # Timestamp.__richcmp__(DateTimeArray) operates pointwise

246

247 # ensure that operations with numpy arrays defer to our implementation

248 __array_priority__ = 1000

249

250 # -----------------------------------------------------------------

251 # Constructors

252

253 _dtype: np.dtype | DatetimeTZDtype

254 _freq: BaseOffset | None = None

255 _default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__

256

257 @classmethod

258 def _validate_dtype(cls, values, dtype):

259 # used in TimeLikeOps.__init__

260 _validate_dt64_dtype(values.dtype)

261 dtype = _validate_dt64_dtype(dtype)

262 return dtype

263

264 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"

265 @classmethod

266 def _simple_new( # type: ignore[override]

267 cls,

268 values: np.ndarray,

269 freq: BaseOffset | None = None,

270 dtype=DT64NS_DTYPE,

271 ) -> DatetimeArray:

272 assert isinstance(values, np.ndarray)

273 assert dtype.kind == "M"

274 if isinstance(dtype, np.dtype):

275 assert dtype == values.dtype

276 assert not is_unitless(dtype)

277 else:

278 # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC],

279 # then values.dtype should be M8[us].

280 assert dtype._reso == get_unit_from_dtype(values.dtype)

281

282 result = super()._simple_new(values, dtype)

283 result._freq = freq

284 return result

285

286 @classmethod

287 def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):

288 return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)

289

290 @classmethod

291 def _from_sequence_not_strict(

292 cls,

293 data,

294 dtype=None,

295 copy: bool = False,

296 tz=None,

297 freq: str | BaseOffset | lib.NoDefault | None = lib.no_default,

298 dayfirst: bool = False,

299 yearfirst: bool = False,

300 ambiguous="raise",

301 ):

302 explicit_none = freq is None

303 freq = freq if freq is not lib.no_default else None

304

305 freq, freq_infer = dtl.maybe_infer_freq(freq)

306

307 subarr, tz, inferred_freq = _sequence_to_dt64ns(

308 data,

309 dtype=dtype,

310 copy=copy,

311 tz=tz,

312 dayfirst=dayfirst,

313 yearfirst=yearfirst,

314 ambiguous=ambiguous,

315 )

316

317 freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)

318 if explicit_none:

319 freq = None

320

321 dtype = tz_to_dtype(tz)

322 result = cls._simple_new(subarr, freq=freq, dtype=dtype)

323

324 if inferred_freq is None and freq is not None:

325 # this condition precludes `freq_infer`

326 cls._validate_frequency(result, freq, ambiguous=ambiguous)

327

328 elif freq_infer:

329 # Set _freq directly to bypass duplicative _validate_frequency

330 # check.

331 result._freq = to_offset(result.inferred_freq)

332

333 return result

334

335 @classmethod

336 def _generate_range(

337 cls,

338 start,

339 end,

340 periods,

341 freq,

342 tz=None,

343 normalize=False,

344 ambiguous="raise",

345 nonexistent="raise",

346 inclusive="both",

347 ):

348

349 periods = dtl.validate_periods(periods)

350 if freq is None and any(x is None for x in [periods, start, end]):

351 raise ValueError("Must provide freq argument if no data is supplied")

352

353 if com.count_not_none(start, end, periods, freq) != 3:

354 raise ValueError(

355 "Of the four parameters: start, end, periods, "

356 "and freq, exactly three must be specified"

357 )

358 freq = to_offset(freq)

359

360 if start is not None:

361 start = Timestamp(start)

362

363 if end is not None:

364 end = Timestamp(end)

365

366 if start is NaT or end is NaT:

367 raise ValueError("Neither `start` nor `end` can be NaT")

368

369 left_inclusive, right_inclusive = validate_inclusive(inclusive)

370 start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize)

371 tz = _infer_tz_from_endpoints(start, end, tz)

372

373 if tz is not None:

374 # Localize the start and end arguments

375 start_tz = None if start is None else start.tz

376 end_tz = None if end is None else end.tz

377 start = _maybe_localize_point(

378 start, start_tz, start, freq, tz, ambiguous, nonexistent

379 )

380 end = _maybe_localize_point(

381 end, end_tz, end, freq, tz, ambiguous, nonexistent

382 )

383 if freq is not None:

384 # We break Day arithmetic (fixed 24 hour) here and opt for

385 # Day to mean calendar day (23/24/25 hour). Therefore, strip

386 # tz info from start and day to avoid DST arithmetic

387 if isinstance(freq, Day):

388 if start is not None:

389 start = start.tz_localize(None)

390 if end is not None:

391 end = end.tz_localize(None)

392

393 if isinstance(freq, Tick):

394 i8values = generate_regular_range(start, end, periods, freq)

395 else:

396 xdr = generate_range(start=start, end=end, periods=periods, offset=freq)

397 i8values = np.array([x.value for x in xdr], dtype=np.int64)

398

399 endpoint_tz = start.tz if start is not None else end.tz

400

401 if tz is not None and endpoint_tz is None:

402

403 if not timezones.is_utc(tz):

404 # short-circuit tz_localize_to_utc which would make

405 # an unnecessary copy with UTC but be a no-op.

406 i8values = tzconversion.tz_localize_to_utc(

407 i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent

408 )

409

410 # i8values is localized datetime64 array -> have to convert

411 # start/end as well to compare

412 if start is not None:

413 start = start.tz_localize(tz, ambiguous, nonexistent)

414 if end is not None:

415 end = end.tz_localize(tz, ambiguous, nonexistent)

416 else:

417 # Create a linearly spaced date_range in local time

418 # Nanosecond-granularity timestamps aren't always correctly

419 # representable with doubles, so we limit the range that we

420 # pass to np.linspace as much as possible

421 i8values = (

422 np.linspace(0, end.value - start.value, periods, dtype="int64")

423 + start.value

424 )

425 if i8values.dtype != "i8":

426 # 2022-01-09 I (brock) am not sure if it is possible for this

427 # to overflow and cast to e.g. f8, but if it does we need to cast

428 i8values = i8values.astype("i8")

429

430 if start == end:

431 if not left_inclusive and not right_inclusive:

432 i8values = i8values[1:-1]

433 else:

434 start_i8 = Timestamp(start).value

435 end_i8 = Timestamp(end).value

436 if not left_inclusive or not right_inclusive:

437 if not left_inclusive and len(i8values) and i8values[0] == start_i8:

438 i8values = i8values[1:]

439 if not right_inclusive and len(i8values) and i8values[-1] == end_i8:

440 i8values = i8values[:-1]

441

442 dt64_values = i8values.view("datetime64[ns]")

443 dtype = tz_to_dtype(tz)

444 return cls._simple_new(dt64_values, freq=freq, dtype=dtype)

445

446 # -----------------------------------------------------------------

447 # DatetimeLike Interface

448

449 def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64:

450 if not isinstance(value, self._scalar_type) and value is not NaT:

451 raise ValueError("'value' should be a Timestamp.")

452 self._check_compatible_with(value, setitem=setitem)

453 return value.asm8

454

455 def _scalar_from_string(self, value) -> Timestamp | NaTType:

456 return Timestamp(value, tz=self.tz)

457

458 def _check_compatible_with(self, other, setitem: bool = False):

459 if other is NaT:

460 return

461 self._assert_tzawareness_compat(other)

462 if setitem:

463 # Stricter check for setitem vs comparison methods

464 if self.tz is not None and not timezones.tz_compare(self.tz, other.tz):

465 # TODO(2.0): remove this check. GH#37605

466 warnings.warn(

467 "Setitem-like behavior with mismatched timezones is deprecated "

468 "and will change in a future version. Instead of raising "

469 "(or for Index, Series, and DataFrame methods, coercing to "

470 "object dtype), the value being set (or passed as a "

471 "fill_value, or inserted) will be cast to the existing "

472 "DatetimeArray/DatetimeIndex/Series/DataFrame column's "

473 "timezone. To retain the old behavior, explicitly cast to "

474 "object dtype before the operation.",

475 FutureWarning,

476 stacklevel=find_stack_level(),

477 )

478 raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'")

479

480 # -----------------------------------------------------------------

481 # Descriptive Properties

482

483 def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:

484 # GH#42228

485 value = x.view("i8")

486 ts = Timestamp._from_value_and_reso(value, reso=self._reso, tz=self.tz)

487 # Non-overlapping identity check (left operand type: "Timestamp",

488 # right operand type: "NaTType")

489 if ts is not NaT: # type: ignore[comparison-overlap]

490 # GH#41586

491 # do this instead of passing to the constructor to avoid FutureWarning

492 ts._set_freq(self.freq)

493 return ts

494

495 @property

496 # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype"

497 # incompatible with return type "ExtensionDtype" in supertype

498 # "ExtensionArray"

499 def dtype(self) -> np.dtype | DatetimeTZDtype: # type: ignore[override]

500 """

501 The dtype for the DatetimeArray.

502

503 .. warning::

504

505 A future version of pandas will change dtype to never be a

506 ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will

507 always be an instance of an ``ExtensionDtype`` subclass.

508

509 Returns

510 -------

511 numpy.dtype or DatetimeTZDtype

512 If the values are tz-naive, then ``np.dtype('datetime64[ns]')``

513 is returned.

514

515 If the values are tz-aware, then the ``DatetimeTZDtype``

516 is returned.

517 """

518 return self._dtype

519

520 @property

521 def tz(self) -> tzinfo | None:

522 """

523 Return the timezone.

524

525 Returns

526 -------

527 datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None

528 Returns None when the array is tz-naive.

529 """

530 # GH 18595

531 return getattr(self.dtype, "tz", None)

532

533 @tz.setter

534 def tz(self, value):

535 # GH 3746: Prevent localizing or converting the index by setting tz

536 raise AttributeError(

537 "Cannot directly set timezone. Use tz_localize() "

538 "or tz_convert() as appropriate"

539 )

540

541 @property

542 def tzinfo(self) -> tzinfo | None:

543 """

544 Alias for tz attribute

545 """

546 return self.tz

547

548 @property # NB: override with cache_readonly in immutable subclasses

549 def is_normalized(self) -> bool:

550 """

551 Returns True if all of the dates are at midnight ("no time")

552 """

553 return is_date_array_normalized(self.asi8, self.tz, reso=self._reso)

554

555 @property # NB: override with cache_readonly in immutable subclasses

556 def _resolution_obj(self) -> Resolution:

557 return get_resolution(self.asi8, self.tz, reso=self._reso)

558

559 # ----------------------------------------------------------------

560 # Array-Like / EA-Interface Methods

561

562 def __array__(self, dtype=None) -> np.ndarray:

563 if dtype is None and self.tz:

564 # The default for tz-aware is object, to preserve tz info

565 dtype = object

566

567 return super().__array__(dtype=dtype)

568

569 def __iter__(self):

570 """

571 Return an iterator over the boxed values

572

573 Yields

574 ------

575 tstamp : Timestamp

576 """

577 if self.ndim > 1:

578 for i in range(len(self)):

579 yield self[i]

580 else:

581 # convert in chunks of 10k for efficiency

582 data = self.asi8

583 length = len(self)

584 chunksize = 10000

585 chunks = (length // chunksize) + 1

586

587 for i in range(chunks):

588 start_i = i * chunksize

589 end_i = min((i + 1) * chunksize, length)

590 converted = ints_to_pydatetime(

591 data[start_i:end_i],

592 tz=self.tz,

593 freq=self.freq,

594 box="timestamp",

595 reso=self._reso,

596 )

597 yield from converted

598

599 def astype(self, dtype, copy: bool = True):

600 # We handle

601 # --> datetime

602 # --> period

603 # DatetimeLikeArrayMixin Super handles the rest.

604 dtype = pandas_dtype(dtype)

605

606 if is_dtype_equal(dtype, self.dtype):

607 if copy:

608 return self.copy()

609 return self

610

611 elif (

612 self.tz is None

613 and is_datetime64_dtype(dtype)

614 and not is_unitless(dtype)

615 and is_supported_unit(get_unit_from_dtype(dtype))

616 ):

617 # unit conversion e.g. datetime64[s]

618 res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)

619 return type(self)._simple_new(res_values, dtype=res_values.dtype)

620 # TODO: preserve freq?

621

622 elif is_datetime64_ns_dtype(dtype):

623 return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False)

624

625 elif self.tz is not None and isinstance(dtype, DatetimeTZDtype):

626 # tzaware unit conversion e.g. datetime64[s, UTC]

627 np_dtype = np.dtype(dtype.str)

628 res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)

629 return type(self)._simple_new(res_values, dtype=dtype)

630 # TODO: preserve freq?

631

632 elif (

633 self.tz is None

634 and is_datetime64_dtype(dtype)

635 and dtype != self.dtype

636 and is_unitless(dtype)

637 ):

638 # TODO(2.0): just fall through to dtl.DatetimeLikeArrayMixin.astype

639 warnings.warn(

640 "Passing unit-less datetime64 dtype to .astype is deprecated "

641 "and will raise in a future version. Pass 'datetime64[ns]' instead",

642 FutureWarning,

643 stacklevel=find_stack_level(),

644 )

645 # unit conversion e.g. datetime64[s]

646 return self._ndarray.astype(dtype)

647

648 elif is_period_dtype(dtype):

649 return self.to_period(freq=dtype.freq)

650 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)

651

652 # -----------------------------------------------------------------

653 # Rendering Methods

654

655 def _format_native_types(

656 self, *, na_rep="NaT", date_format=None, **kwargs

657 ) -> npt.NDArray[np.object_]:

658 from pandas.io.formats.format import get_format_datetime64_from_values

659

660 fmt = get_format_datetime64_from_values(self, date_format)

661

662 return tslib.format_array_from_datetime(

663 self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._reso

664 )

665

666 # -----------------------------------------------------------------

667 # Comparison Methods

668

669 def _has_same_tz(self, other) -> bool:

670

671 # vzone shouldn't be None if value is non-datetime like

672 if isinstance(other, np.datetime64):

673 # convert to Timestamp as np.datetime64 doesn't have tz attr

674 other = Timestamp(other)

675

676 if not hasattr(other, "tzinfo"):

677 return False

678 other_tz = other.tzinfo

679 return timezones.tz_compare(self.tzinfo, other_tz)

680

681 def _assert_tzawareness_compat(self, other) -> None:

682 # adapted from _Timestamp._assert_tzawareness_compat

683 other_tz = getattr(other, "tzinfo", None)

684 other_dtype = getattr(other, "dtype", None)

685

686 if is_datetime64tz_dtype(other_dtype):

687 # Get tzinfo from Series dtype

688 other_tz = other.dtype.tz

689 if other is NaT:

690 # pd.NaT quacks both aware and naive

691 pass

692 elif self.tz is None:

693 if other_tz is not None:

694 raise TypeError(

695 "Cannot compare tz-naive and tz-aware datetime-like objects."

696 )

697 elif other_tz is None:

698 raise TypeError(

699 "Cannot compare tz-naive and tz-aware datetime-like objects"

700 )

701

702 # -----------------------------------------------------------------

703 # Arithmetic Methods

704

705 def _add_offset(self, offset) -> DatetimeArray:

706

707 assert not isinstance(offset, Tick)

708

709 if self.tz is not None:

710 values = self.tz_localize(None)

711 else:

712 values = self

713

714 try:

715 result = offset._apply_array(values).view(values.dtype)

716 except NotImplementedError:

717 warnings.warn(

718 "Non-vectorized DateOffset being applied to Series or DatetimeIndex.",

719 PerformanceWarning,

720 stacklevel=find_stack_level(),

721 )

722 result = self.astype("O") + offset

723 result = type(self)._from_sequence(result)

724 if not len(self):

725 # GH#30336 _from_sequence won't be able to infer self.tz

726 return result.tz_localize(self.tz)

727

728 else:

729 result = DatetimeArray._simple_new(result, dtype=result.dtype)

730 if self.tz is not None:

731 # FIXME: tz_localize with non-nano

732 result = result.tz_localize(self.tz)

733

734 return result

735

736 # -----------------------------------------------------------------

737 # Timezone Conversion and Localization Methods

738

739 def _local_timestamps(self) -> npt.NDArray[np.int64]:

740 """

741 Convert to an i8 (unix-like nanosecond timestamp) representation

742 while keeping the local timezone and not using UTC.

743 This is used to calculate time-of-day information as if the timestamps

744 were timezone-naive.

745 """

746 if self.tz is None or timezones.is_utc(self.tz):

747 # Avoid the copy that would be made in tzconversion

748 return self.asi8

749 return tz_convert_from_utc(self.asi8, self.tz, reso=self._reso)

750

751 def tz_convert(self, tz) -> DatetimeArray:

752 """

753 Convert tz-aware Datetime Array/Index from one time zone to another.

754

755 Parameters

756 ----------

757 tz : str, pytz.timezone, dateutil.tz.tzfile or None

758 Time zone for time. Corresponding timestamps would be converted

759 to this time zone of the Datetime Array/Index. A `tz` of None will

760 convert to UTC and remove the timezone information.

761

762 Returns

763 -------

764 Array or Index

765

766 Raises

767 ------

768 TypeError

769 If Datetime Array/Index is tz-naive.

770

771 See Also

772 --------

773 DatetimeIndex.tz : A timezone that has a variable offset from UTC.

774 DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a

775 given time zone, or remove timezone from a tz-aware DatetimeIndex.

776

777 Examples

778 --------

779 With the `tz` parameter, we can change the DatetimeIndex

780 to other time zones:

781

782 >>> dti = pd.date_range(start='2014-08-01 09:00',

783 ... freq='H', periods=3, tz='Europe/Berlin')

784

785 >>> dti

786 DatetimeIndex(['2014-08-01 09:00:00+02:00',

787 '2014-08-01 10:00:00+02:00',

788 '2014-08-01 11:00:00+02:00'],

789 dtype='datetime64[ns, Europe/Berlin]', freq='H')

790

791 >>> dti.tz_convert('US/Central')

792 DatetimeIndex(['2014-08-01 02:00:00-05:00',

793 '2014-08-01 03:00:00-05:00',

794 '2014-08-01 04:00:00-05:00'],

795 dtype='datetime64[ns, US/Central]', freq='H')

796

797 With the ``tz=None``, we can remove the timezone (after converting

798 to UTC if necessary):

799

800 >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H',

801 ... periods=3, tz='Europe/Berlin')

802

803 >>> dti

804 DatetimeIndex(['2014-08-01 09:00:00+02:00',

805 '2014-08-01 10:00:00+02:00',

806 '2014-08-01 11:00:00+02:00'],

807 dtype='datetime64[ns, Europe/Berlin]', freq='H')

808

809 >>> dti.tz_convert(None)

810 DatetimeIndex(['2014-08-01 07:00:00',

811 '2014-08-01 08:00:00',

812 '2014-08-01 09:00:00'],

813 dtype='datetime64[ns]', freq='H')

814 """

815 tz = timezones.maybe_get_tz(tz)

816

817 if self.tz is None:

818 # tz naive, use tz_localize

819 raise TypeError(

820 "Cannot convert tz-naive timestamps, use tz_localize to localize"

821 )

822

823 # No conversion since timestamps are all UTC to begin with

824 dtype = tz_to_dtype(tz, unit=self._unit)

825 return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)

826

827 @dtl.ravel_compat

828 def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArray:

829 """

830 Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.

831

832 This method takes a time zone (tz) naive Datetime Array/Index object

833 and makes this time zone aware. It does not move the time to another

834 time zone.

835

836 This method can also be used to do the inverse -- to create a time

837 zone unaware object from an aware object. To that end, pass `tz=None`.

838

839 Parameters

840 ----------

841 tz : str, pytz.timezone, dateutil.tz.tzfile or None

842 Time zone to convert timestamps to. Passing ``None`` will

843 remove the time zone information preserving local time.

844 ambiguous : 'infer', 'NaT', bool array, default 'raise'

845 When clocks moved backward due to DST, ambiguous times may arise.

846 For example in Central European Time (UTC+01), when going from

847 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at

848 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the

849 `ambiguous` parameter dictates how ambiguous times should be

850 handled.

851

852 - 'infer' will attempt to infer fall dst-transition hours based on

853 order

854 - bool-ndarray where True signifies a DST time, False signifies a

855 non-DST time (note that this flag is only applicable for

856 ambiguous times)

857 - 'NaT' will return NaT where there are ambiguous times

858 - 'raise' will raise an AmbiguousTimeError if there are ambiguous

859 times.

860

861 nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \

862default 'raise'

863 A nonexistent time does not exist in a particular timezone

864 where clocks moved forward due to DST.

865

866 - 'shift_forward' will shift the nonexistent time forward to the

867 closest existing time

868 - 'shift_backward' will shift the nonexistent time backward to the

869 closest existing time

870 - 'NaT' will return NaT where there are nonexistent times

871 - timedelta objects will shift nonexistent times by the timedelta

872 - 'raise' will raise an NonExistentTimeError if there are

873 nonexistent times.

874

875 Returns

876 -------

877 Same type as self

878 Array/Index converted to the specified time zone.

879

880 Raises

881 ------

882 TypeError

883 If the Datetime Array/Index is tz-aware and tz is not None.

884

885 See Also

886 --------

887 DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from

888 one time zone to another.

889

890 Examples

891 --------

892 >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)

893 >>> tz_naive

894 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',

895 '2018-03-03 09:00:00'],

896 dtype='datetime64[ns]', freq='D')

897

898 Localize DatetimeIndex in US/Eastern time zone:

899

900 >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')

901 >>> tz_aware

902 DatetimeIndex(['2018-03-01 09:00:00-05:00',

903 '2018-03-02 09:00:00-05:00',

904 '2018-03-03 09:00:00-05:00'],

905 dtype='datetime64[ns, US/Eastern]', freq=None)

906

907 With the ``tz=None``, we can remove the time zone information

908 while keeping the local time (not converted to UTC):

909

910 >>> tz_aware.tz_localize(None)

911 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',

912 '2018-03-03 09:00:00'],

913 dtype='datetime64[ns]', freq=None)

914

915 Be careful with DST changes. When there is sequential data, pandas can

916 infer the DST time:

917

918 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',

919 ... '2018-10-28 02:00:00',

920 ... '2018-10-28 02:30:00',

921 ... '2018-10-28 02:00:00',

922 ... '2018-10-28 02:30:00',

923 ... '2018-10-28 03:00:00',

924 ... '2018-10-28 03:30:00']))

925 >>> s.dt.tz_localize('CET', ambiguous='infer')

926 0 2018-10-28 01:30:00+02:00

927 1 2018-10-28 02:00:00+02:00

928 2 2018-10-28 02:30:00+02:00

929 3 2018-10-28 02:00:00+01:00

930 4 2018-10-28 02:30:00+01:00

931 5 2018-10-28 03:00:00+01:00

932 6 2018-10-28 03:30:00+01:00

933 dtype: datetime64[ns, CET]

934

935 In some cases, inferring the DST is impossible. In such cases, you can

936 pass an ndarray to the ambiguous parameter to set the DST explicitly

937

938 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00',

939 ... '2018-10-28 02:36:00',

940 ... '2018-10-28 03:46:00']))

941 >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))

942 0 2018-10-28 01:20:00+02:00

943 1 2018-10-28 02:36:00+02:00

944 2 2018-10-28 03:46:00+01:00

945 dtype: datetime64[ns, CET]

946

947 If the DST transition causes nonexistent times, you can shift these

948 dates forward or backwards with a timedelta object or `'shift_forward'`

949 or `'shift_backwards'`.

950

951 >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',

952 ... '2015-03-29 03:30:00']))

953 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')

954 0 2015-03-29 03:00:00+02:00

955 1 2015-03-29 03:30:00+02:00

956 dtype: datetime64[ns, Europe/Warsaw]

957

958 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')

959 0 2015-03-29 01:59:59.999999999+01:00

960 1 2015-03-29 03:30:00+02:00

961 dtype: datetime64[ns, Europe/Warsaw]

962

963 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))

964 0 2015-03-29 03:30:00+02:00

965 1 2015-03-29 03:30:00+02:00

966 dtype: datetime64[ns, Europe/Warsaw]

967 """

968 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")

969 if nonexistent not in nonexistent_options and not isinstance(

970 nonexistent, timedelta

971 ):

972 raise ValueError(

973 "The nonexistent argument must be one of 'raise', "

974 "'NaT', 'shift_forward', 'shift_backward' or "

975 "a timedelta object"

976 )

977

978 if self.tz is not None:

979 if tz is None:

980 new_dates = tz_convert_from_utc(self.asi8, self.tz)

981 else:

982 raise TypeError("Already tz-aware, use tz_convert to convert.")

983 else:

984 tz = timezones.maybe_get_tz(tz)

985 # Convert to UTC

986

987 new_dates = tzconversion.tz_localize_to_utc(

988 self.asi8,

989 tz,

990 ambiguous=ambiguous,

991 nonexistent=nonexistent,

992 reso=self._reso,

993 )

994 new_dates = new_dates.view(f"M8[{self._unit}]")

995 dtype = tz_to_dtype(tz, unit=self._unit)

996

997 freq = None

998 if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])):

999 # we can preserve freq

1000 # TODO: Also for fixed-offsets

1001 freq = self.freq

1002 elif tz is None and self.tz is None:

1003 # no-op

1004 freq = self.freq

1005 return self._simple_new(new_dates, dtype=dtype, freq=freq)

1006

1007 # ----------------------------------------------------------------

1008 # Conversion Methods - Vectorized analogues of Timestamp methods

1009

1010 def to_pydatetime(self) -> npt.NDArray[np.object_]:

1011 """

1012 Return an ndarray of datetime.datetime objects.

1013

1014 Returns

1015 -------

1016 datetimes : ndarray[object]

1017 """

1018 return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._reso)

1019

1020 def normalize(self) -> DatetimeArray:

1021 """

1022 Convert times to midnight.

1023

1024 The time component of the date-time is converted to midnight i.e.

1025 00:00:00. This is useful in cases, when the time does not matter.

1026 Length is unaltered. The timezones are unaffected.

1027

1028 This method is available on Series with datetime values under

1029 the ``.dt`` accessor, and directly on Datetime Array/Index.

1030

1031 Returns

1032 -------

1033 DatetimeArray, DatetimeIndex or Series

1034 The same type as the original data. Series will have the same

1035 name and index. DatetimeIndex will have the same name.

1036

1037 See Also

1038 --------

1039 floor : Floor the datetimes to the specified freq.

1040 ceil : Ceil the datetimes to the specified freq.

1041 round : Round the datetimes to the specified freq.

1042

1043 Examples

1044 --------

1045 >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H',

1046 ... periods=3, tz='Asia/Calcutta')

1047 >>> idx

1048 DatetimeIndex(['2014-08-01 10:00:00+05:30',

1049 '2014-08-01 11:00:00+05:30',

1050 '2014-08-01 12:00:00+05:30'],

1051 dtype='datetime64[ns, Asia/Calcutta]', freq='H')

1052 >>> idx.normalize()

1053 DatetimeIndex(['2014-08-01 00:00:00+05:30',

1054 '2014-08-01 00:00:00+05:30',

1055 '2014-08-01 00:00:00+05:30'],

1056 dtype='datetime64[ns, Asia/Calcutta]', freq=None)

1057 """

1058 new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._reso)

1059 dt64_values = new_values.view(self._ndarray.dtype)

1060

1061 dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype)

1062 dta = dta._with_freq("infer")

1063 if self.tz is not None:

1064 dta = dta.tz_localize(self.tz)

1065 return dta

1066

1067 def to_period(self, freq=None) -> PeriodArray:

1068 """

1069 Cast to PeriodArray/Index at a particular frequency.

1070

1071 Converts DatetimeArray/Index to PeriodArray/Index.

1072

1073 Parameters

1074 ----------

1075 freq : str or Offset, optional

1076 One of pandas' :ref:`offset strings <timeseries.offset_aliases>`

1077 or an Offset object. Will be inferred by default.

1078

1079 Returns

1080 -------

1081 PeriodArray/Index

1082

1083 Raises

1084 ------

1085 ValueError

1086 When converting a DatetimeArray/Index with non-regular values,

1087 so that a frequency cannot be inferred.

1088

1089 See Also

1090 --------

1091 PeriodIndex: Immutable ndarray holding ordinal values.

1092 DatetimeIndex.to_pydatetime: Return DatetimeIndex as object.

1093

1094 Examples

1095 --------

1096 >>> df = pd.DataFrame({"y": [1, 2, 3]},

1097 ... index=pd.to_datetime(["2000-03-31 00:00:00",

1098 ... "2000-05-31 00:00:00",

1099 ... "2000-08-31 00:00:00"]))

1100 >>> df.index.to_period("M")

1101 PeriodIndex(['2000-03', '2000-05', '2000-08'],

1102 dtype='period[M]')

1103

1104 Infer the daily frequency

1105

1106 >>> idx = pd.date_range("2017-01-01", periods=2)

1107 >>> idx.to_period()

1108 PeriodIndex(['2017-01-01', '2017-01-02'],

1109 dtype='period[D]')

1110 """

1111 from pandas.core.arrays import PeriodArray

1112

1113 if self.tz is not None:

1114 warnings.warn(

1115 "Converting to PeriodArray/Index representation "

1116 "will drop timezone information.",

1117 UserWarning,

1118 stacklevel=find_stack_level(),

1119 )

1120

1121 if freq is None:

1122 freq = self.freqstr or self.inferred_freq

1123

1124 if freq is None:

1125 raise ValueError(

1126 "You must pass a freq argument as current index has none."

1127 )

1128

1129 res = get_period_alias(freq)

1130

1131 # https://github.com/pandas-dev/pandas/issues/33358

1132 if res is None:

1133 res = freq

1134

1135 freq = res

1136

1137 return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz)

1138

1139 def to_perioddelta(self, freq) -> TimedeltaArray:

1140 """

1141 Calculate deltas between self values and self converted to Periods at a freq.

1142

1143 Used for vectorized offsets.

1144

1145 Parameters

1146 ----------

1147 freq : Period frequency

1148

1149 Returns

1150 -------

1151 TimedeltaArray/Index

1152 """

1153 # Deprecaation GH#34853

1154 warnings.warn(

1155 "to_perioddelta is deprecated and will be removed in a "

1156 "future version. "

1157 "Use `dtindex - dtindex.to_period(freq).to_timestamp()` instead.",

1158 FutureWarning,

1159 # stacklevel chosen to be correct for when called from DatetimeIndex

1160 stacklevel=find_stack_level(),

1161 )

1162 from pandas.core.arrays.timedeltas import TimedeltaArray

1163

1164 if self._ndarray.dtype != "M8[ns]":

1165 raise NotImplementedError("Only supported for nanosecond resolution.")

1166

1167 i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8

1168 m8delta = i8delta.view("m8[ns]")

1169 return TimedeltaArray(m8delta)

1170

1171 # -----------------------------------------------------------------

1172 # Properties - Vectorized Timestamp Properties/Methods

1173

1174 def month_name(self, locale=None) -> npt.NDArray[np.object_]:

1175 """

1176 Return the month names with specified locale.

1177

1178 Parameters

1179 ----------

1180 locale : str, optional

1181 Locale determining the language in which to return the month name.

1182 Default is English locale.

1183

1184 Returns

1185 -------

1186 Series or Index

1187 Series or Index of month names.

1188

1189 Examples

1190 --------

1191 >>> s = pd.Series(pd.date_range(start='2018-01', freq='M', periods=3))

1192 >>> s

1193 0 2018-01-31

1194 1 2018-02-28

1195 2 2018-03-31

1196 dtype: datetime64[ns]

1197 >>> s.dt.month_name()

1198 0 January

1199 1 February

1200 2 March

1201 dtype: object

1202

1203 >>> idx = pd.date_range(start='2018-01', freq='M', periods=3)

1204 >>> idx

1205 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],

1206 dtype='datetime64[ns]', freq='M')

1207 >>> idx.month_name()

1208 Index(['January', 'February', 'March'], dtype='object')

1209 """

1210 values = self._local_timestamps()

1211

1212 result = fields.get_date_name_field(

1213 values, "month_name", locale=locale, reso=self._reso

1214 )

1215 result = self._maybe_mask_results(result, fill_value=None)

1216 return result

1217

1218 def day_name(self, locale=None) -> npt.NDArray[np.object_]:

1219 """

1220 Return the day names with specified locale.

1221

1222 Parameters

1223 ----------

1224 locale : str, optional

1225 Locale determining the language in which to return the day name.

1226 Default is English locale.

1227

1228 Returns

1229 -------

1230 Series or Index

1231 Series or Index of day names.

1232

1233 Examples

1234 --------

1235 >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3))

1236 >>> s

1237 0 2018-01-01

1238 1 2018-01-02

1239 2 2018-01-03

1240 dtype: datetime64[ns]

1241 >>> s.dt.day_name()

1242 0 Monday

1243 1 Tuesday

1244 2 Wednesday

1245 dtype: object

1246

1247 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)

1248 >>> idx

1249 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],

1250 dtype='datetime64[ns]', freq='D')

1251 >>> idx.day_name()

1252 Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')

1253 """

1254 values = self._local_timestamps()

1255

1256 result = fields.get_date_name_field(

1257 values, "day_name", locale=locale, reso=self._reso

1258 )

1259 result = self._maybe_mask_results(result, fill_value=None)

1260 return result

1261

1262 @property

1263 def time(self) -> npt.NDArray[np.object_]:

1264 """

1265 Returns numpy array of :class:`datetime.time` objects.

1266

1267 The time part of the Timestamps.

1268 """

1269 # If the Timestamps have a timezone that is not UTC,

1270 # convert them into their i8 representation while

1271 # keeping their timezone and not using UTC

1272 timestamps = self._local_timestamps()

1273

1274 return ints_to_pydatetime(timestamps, box="time", reso=self._reso)

1275

1276 @property

1277 def timetz(self) -> npt.NDArray[np.object_]:

1278 """

1279 Returns numpy array of :class:`datetime.time` objects with timezones.

1280

1281 The time part of the Timestamps.

1282 """

1283 return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._reso)

1284

1285 @property

1286 def date(self) -> npt.NDArray[np.object_]:

1287 """

1288 Returns numpy array of python :class:`datetime.date` objects.

1289

1290 Namely, the date part of Timestamps without time and

1291 timezone information.

1292 """

1293 # If the Timestamps have a timezone that is not UTC,

1294 # convert them into their i8 representation while

1295 # keeping their timezone and not using UTC

1296 timestamps = self._local_timestamps()

1297

1298 return ints_to_pydatetime(timestamps, box="date", reso=self._reso)

1299

1300 def isocalendar(self) -> DataFrame:

1301 """

1302 Calculate year, week, and day according to the ISO 8601 standard.

1303

1304 .. versionadded:: 1.1.0

1305

1306 Returns

1307 -------

1308 DataFrame

1309 With columns year, week and day.

1310

1311 See Also

1312 --------

1313 Timestamp.isocalendar : Function return a 3-tuple containing ISO year,

1314 week number, and weekday for the given Timestamp object.

1315 datetime.date.isocalendar : Return a named tuple object with

1316 three components: year, week and weekday.

1317

1318 Examples

1319 --------

1320 >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4)

1321 >>> idx.isocalendar()

1322 year week day

1323 2019-12-29 2019 52 7

1324 2019-12-30 2020 1 1

1325 2019-12-31 2020 1 2

1326 2020-01-01 2020 1 3

1327 >>> idx.isocalendar().week

1328 2019-12-29 52

1329 2019-12-30 1

1330 2019-12-31 1

1331 2020-01-01 1

1332 Freq: D, Name: week, dtype: UInt32

1333 """

1334 from pandas import DataFrame

1335

1336 values = self._local_timestamps()

1337 sarray = fields.build_isocalendar_sarray(values, reso=self._reso)

1338 iso_calendar_df = DataFrame(

1339 sarray, columns=["year", "week", "day"], dtype="UInt32"

1340 )

1341 if self._hasna:

1342 iso_calendar_df.iloc[self._isnan] = None

1343 return iso_calendar_df

1344

1345 @property

1346 def weekofyear(self):

1347 """

1348 The week ordinal of the year.

1349

1350 .. deprecated:: 1.1.0

1351

1352 weekofyear and week have been deprecated.

1353 Please use DatetimeIndex.isocalendar().week instead.

1354 """

1355 warnings.warn(

1356 "weekofyear and week have been deprecated, please use "

1357 "DatetimeIndex.isocalendar().week instead, which returns "

1358 "a Series. To exactly reproduce the behavior of week and "

1359 "weekofyear and return an Index, you may call "

1360 "pd.Int64Index(idx.isocalendar().week)",

1361 FutureWarning,

1362 stacklevel=find_stack_level(),

1363 )

1364 week_series = self.isocalendar().week

1365 if week_series.hasnans:

1366 return week_series.to_numpy(dtype="float64", na_value=np.nan)

1367 return week_series.to_numpy(dtype="int64")

1368

1369 week = weekofyear

1370

1371 year = _field_accessor(

1372 "year",

1373 "Y",

1374 """

1375 The year of the datetime.

1376

1377 Examples

1378 --------

1379 >>> datetime_series = pd.Series(

1380 ... pd.date_range("2000-01-01", periods=3, freq="Y")

1381 ... )

1382 >>> datetime_series

1383 0 2000-12-31

1384 1 2001-12-31

1385 2 2002-12-31

1386 dtype: datetime64[ns]

1387 >>> datetime_series.dt.year

1388 0 2000

1389 1 2001

1390 2 2002

1391 dtype: int64

1392 """,

1393 )

1394 month = _field_accessor(

1395 "month",

1396 "M",

1397 """

1398 The month as January=1, December=12.

1399

1400 Examples

1401 --------

1402 >>> datetime_series = pd.Series(

1403 ... pd.date_range("2000-01-01", periods=3, freq="M")

1404 ... )

1405 >>> datetime_series

1406 0 2000-01-31

1407 1 2000-02-29

1408 2 2000-03-31

1409 dtype: datetime64[ns]

1410 >>> datetime_series.dt.month

1411 0 1

1412 1 2

1413 2 3

1414 dtype: int64

1415 """,

1416 )

1417 day = _field_accessor(

1418 "day",

1419 "D",

1420 """

1421 The day of the datetime.

1422

1423 Examples

1424 --------

1425 >>> datetime_series = pd.Series(

1426 ... pd.date_range("2000-01-01", periods=3, freq="D")

1427 ... )

1428 >>> datetime_series

1429 0 2000-01-01

1430 1 2000-01-02

1431 2 2000-01-03

1432 dtype: datetime64[ns]

1433 >>> datetime_series.dt.day

1434 0 1

1435 1 2

1436 2 3

1437 dtype: int64

1438 """,

1439 )

1440 hour = _field_accessor(

1441 "hour",

1442 "h",

1443 """

1444 The hours of the datetime.

1445

1446 Examples

1447 --------

1448 >>> datetime_series = pd.Series(

1449 ... pd.date_range("2000-01-01", periods=3, freq="h")

1450 ... )

1451 >>> datetime_series

1452 0 2000-01-01 00:00:00

1453 1 2000-01-01 01:00:00

1454 2 2000-01-01 02:00:00

1455 dtype: datetime64[ns]

1456 >>> datetime_series.dt.hour

1457 0 0

1458 1 1

1459 2 2

1460 dtype: int64

1461 """,

1462 )

1463 minute = _field_accessor(

1464 "minute",

1465 "m",

1466 """

1467 The minutes of the datetime.

1468

1469 Examples

1470 --------

1471 >>> datetime_series = pd.Series(

1472 ... pd.date_range("2000-01-01", periods=3, freq="T")

1473 ... )

1474 >>> datetime_series

1475 0 2000-01-01 00:00:00

1476 1 2000-01-01 00:01:00

1477 2 2000-01-01 00:02:00

1478 dtype: datetime64[ns]

1479 >>> datetime_series.dt.minute

1480 0 0

1481 1 1

1482 2 2

1483 dtype: int64

1484 """,

1485 )

1486 second = _field_accessor(

1487 "second",

1488 "s",

1489 """

1490 The seconds of the datetime.

1491

1492 Examples

1493 --------

1494 >>> datetime_series = pd.Series(

1495 ... pd.date_range("2000-01-01", periods=3, freq="s")

1496 ... )

1497 >>> datetime_series

1498 0 2000-01-01 00:00:00

1499 1 2000-01-01 00:00:01

1500 2 2000-01-01 00:00:02

1501 dtype: datetime64[ns]

1502 >>> datetime_series.dt.second

1503 0 0

1504 1 1

1505 2 2

1506 dtype: int64

1507 """,

1508 )

1509 microsecond = _field_accessor(

1510 "microsecond",

1511 "us",

1512 """

1513 The microseconds of the datetime.

1514

1515 Examples

1516 --------

1517 >>> datetime_series = pd.Series(

1518 ... pd.date_range("2000-01-01", periods=3, freq="us")

1519 ... )

1520 >>> datetime_series

1521 0 2000-01-01 00:00:00.000000

1522 1 2000-01-01 00:00:00.000001

1523 2 2000-01-01 00:00:00.000002

1524 dtype: datetime64[ns]

1525 >>> datetime_series.dt.microsecond

1526 0 0

1527 1 1

1528 2 2

1529 dtype: int64

1530 """,

1531 )

1532 nanosecond = _field_accessor(

1533 "nanosecond",

1534 "ns",

1535 """

1536 The nanoseconds of the datetime.

1537

1538 Examples

1539 --------

1540 >>> datetime_series = pd.Series(

1541 ... pd.date_range("2000-01-01", periods=3, freq="ns")

1542 ... )

1543 >>> datetime_series

1544 0 2000-01-01 00:00:00.000000000

1545 1 2000-01-01 00:00:00.000000001

1546 2 2000-01-01 00:00:00.000000002

1547 dtype: datetime64[ns]

1548 >>> datetime_series.dt.nanosecond

1549 0 0

1550 1 1

1551 2 2

1552 dtype: int64

1553 """,

1554 )

1555 _dayofweek_doc = """

1556 The day of the week with Monday=0, Sunday=6.

1557

1558 Return the day of the week. It is assumed the week starts on

1559 Monday, which is denoted by 0 and ends on Sunday which is denoted

1560 by 6. This method is available on both Series with datetime

1561 values (using the `dt` accessor) or DatetimeIndex.

1562

1563 Returns

1564 -------

1565 Series or Index

1566 Containing integers indicating the day number.

1567

1568 See Also

1569 --------

1570 Series.dt.dayofweek : Alias.

1571 Series.dt.weekday : Alias.

1572 Series.dt.day_name : Returns the name of the day of the week.

1573

1574 Examples

1575 --------

1576 >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series()

1577 >>> s.dt.dayofweek

1578 2016-12-31 5

1579 2017-01-01 6

1580 2017-01-02 0

1581 2017-01-03 1

1582 2017-01-04 2

1583 2017-01-05 3

1584 2017-01-06 4

1585 2017-01-07 5

1586 2017-01-08 6

1587 Freq: D, dtype: int64

1588 """

1589 day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc)

1590 dayofweek = day_of_week

1591 weekday = day_of_week

1592

1593 day_of_year = _field_accessor(

1594 "dayofyear",

1595 "doy",

1596 """

1597 The ordinal day of the year.

1598 """,

1599 )

1600 dayofyear = day_of_year

1601 quarter = _field_accessor(

1602 "quarter",

1603 "q",

1604 """

1605 The quarter of the date.

1606 """,

1607 )

1608 days_in_month = _field_accessor(

1609 "days_in_month",

1610 "dim",

1611 """

1612 The number of days in the month.

1613 """,

1614 )

1615 daysinmonth = days_in_month

1616 _is_month_doc = """

1617 Indicates whether the date is the {first_or_last} day of the month.

1618

1619 Returns

1620 -------

1621 Series or array

1622 For Series, returns a Series with boolean values.

1623 For DatetimeIndex, returns a boolean array.

1624

1625 See Also

1626 --------

1627 is_month_start : Return a boolean indicating whether the date

1628 is the first day of the month.

1629 is_month_end : Return a boolean indicating whether the date

1630 is the last day of the month.

1631

1632 Examples

1633 --------

1634 This method is available on Series with datetime values under

1635 the ``.dt`` accessor, and directly on DatetimeIndex.

1636

1637 >>> s = pd.Series(pd.date_range("2018-02-27", periods=3))

1638 >>> s

1639 0 2018-02-27

1640 1 2018-02-28

1641 2 2018-03-01

1642 dtype: datetime64[ns]

1643 >>> s.dt.is_month_start

1644 0 False

1645 1 False

1646 2 True

1647 dtype: bool

1648 >>> s.dt.is_month_end

1649 0 False

1650 1 True

1651 2 False

1652 dtype: bool

1653

1654 >>> idx = pd.date_range("2018-02-27", periods=3)

1655 >>> idx.is_month_start

1656 array([False, False, True])

1657 >>> idx.is_month_end

1658 array([False, True, False])

1659 """

1660 is_month_start = _field_accessor(

1661 "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first")

1662 )

1663

1664 is_month_end = _field_accessor(

1665 "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last")

1666 )

1667

1668 is_quarter_start = _field_accessor(

1669 "is_quarter_start",

1670 "is_quarter_start",

1671 """

1672 Indicator for whether the date is the first day of a quarter.

1673

1674 Returns

1675 -------

1676 is_quarter_start : Series or DatetimeIndex

1677 The same type as the original data with boolean values. Series will

1678 have the same name and index. DatetimeIndex will have the same

1679 name.

1680

1681 See Also

1682 --------

1683 quarter : Return the quarter of the date.

1684 is_quarter_end : Similar property for indicating the quarter start.

1685

1686 Examples

1687 --------

1688 This method is available on Series with datetime values under

1689 the ``.dt`` accessor, and directly on DatetimeIndex.

1690

1691 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",

1692 ... periods=4)})

1693 >>> df.assign(quarter=df.dates.dt.quarter,

1694 ... is_quarter_start=df.dates.dt.is_quarter_start)

1695 dates quarter is_quarter_start

1696 0 2017-03-30 1 False

1697 1 2017-03-31 1 False

1698 2 2017-04-01 2 True

1699 3 2017-04-02 2 False

1700

1701 >>> idx = pd.date_range('2017-03-30', periods=4)

1702 >>> idx

1703 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],

1704 dtype='datetime64[ns]', freq='D')

1705

1706 >>> idx.is_quarter_start

1707 array([False, False, True, False])

1708 """,

1709 )

1710 is_quarter_end = _field_accessor(

1711 "is_quarter_end",

1712 "is_quarter_end",

1713 """

1714 Indicator for whether the date is the last day of a quarter.

1715

1716 Returns

1717 -------

1718 is_quarter_end : Series or DatetimeIndex

1719 The same type as the original data with boolean values. Series will

1720 have the same name and index. DatetimeIndex will have the same

1721 name.

1722

1723 See Also

1724 --------

1725 quarter : Return the quarter of the date.

1726 is_quarter_start : Similar property indicating the quarter start.

1727

1728 Examples

1729 --------

1730 This method is available on Series with datetime values under

1731 the ``.dt`` accessor, and directly on DatetimeIndex.

1732

1733 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",

1734 ... periods=4)})

1735 >>> df.assign(quarter=df.dates.dt.quarter,

1736 ... is_quarter_end=df.dates.dt.is_quarter_end)

1737 dates quarter is_quarter_end

1738 0 2017-03-30 1 False

1739 1 2017-03-31 1 True

1740 2 2017-04-01 2 False

1741 3 2017-04-02 2 False

1742

1743 >>> idx = pd.date_range('2017-03-30', periods=4)

1744 >>> idx

1745 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],

1746 dtype='datetime64[ns]', freq='D')

1747

1748 >>> idx.is_quarter_end

1749 array([False, True, False, False])

1750 """,

1751 )

1752 is_year_start = _field_accessor(

1753 "is_year_start",

1754 "is_year_start",

1755 """

1756 Indicate whether the date is the first day of a year.

1757

1758 Returns

1759 -------

1760 Series or DatetimeIndex

1761 The same type as the original data with boolean values. Series will

1762 have the same name and index. DatetimeIndex will have the same

1763 name.

1764

1765 See Also

1766 --------

1767 is_year_end : Similar property indicating the last day of the year.

1768

1769 Examples

1770 --------

1771 This method is available on Series with datetime values under

1772 the ``.dt`` accessor, and directly on DatetimeIndex.

1773

1774 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))

1775 >>> dates

1776 0 2017-12-30

1777 1 2017-12-31

1778 2 2018-01-01

1779 dtype: datetime64[ns]

1780

1781 >>> dates.dt.is_year_start

1782 0 False

1783 1 False

1784 2 True

1785 dtype: bool

1786

1787 >>> idx = pd.date_range("2017-12-30", periods=3)

1788 >>> idx

1789 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],

1790 dtype='datetime64[ns]', freq='D')

1791

1792 >>> idx.is_year_start

1793 array([False, False, True])

1794 """,

1795 )

1796 is_year_end = _field_accessor(

1797 "is_year_end",

1798 "is_year_end",

1799 """

1800 Indicate whether the date is the last day of the year.

1801

1802 Returns

1803 -------

1804 Series or DatetimeIndex

1805 The same type as the original data with boolean values. Series will

1806 have the same name and index. DatetimeIndex will have the same

1807 name.

1808

1809 See Also

1810 --------

1811 is_year_start : Similar property indicating the start of the year.

1812

1813 Examples

1814 --------

1815 This method is available on Series with datetime values under

1816 the ``.dt`` accessor, and directly on DatetimeIndex.

1817

1818 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))

1819 >>> dates

1820 0 2017-12-30

1821 1 2017-12-31

1822 2 2018-01-01

1823 dtype: datetime64[ns]

1824

1825 >>> dates.dt.is_year_end

1826 0 False

1827 1 True

1828 2 False

1829 dtype: bool

1830

1831 >>> idx = pd.date_range("2017-12-30", periods=3)

1832 >>> idx

1833 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],

1834 dtype='datetime64[ns]', freq='D')

1835

1836 >>> idx.is_year_end

1837 array([False, True, False])

1838 """,

1839 )

1840 is_leap_year = _field_accessor(

1841 "is_leap_year",

1842 "is_leap_year",

1843 """

1844 Boolean indicator if the date belongs to a leap year.

1845

1846 A leap year is a year, which has 366 days (instead of 365) including

1847 29th of February as an intercalary day.

1848 Leap years are years which are multiples of four with the exception

1849 of years divisible by 100 but not by 400.

1850

1851 Returns

1852 -------

1853 Series or ndarray

1854 Booleans indicating if dates belong to a leap year.

1855

1856 Examples

1857 --------

1858 This method is available on Series with datetime values under

1859 the ``.dt`` accessor, and directly on DatetimeIndex.

1860

1861 >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y")

1862 >>> idx

1863 DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],

1864 dtype='datetime64[ns]', freq='A-DEC')

1865 >>> idx.is_leap_year

1866 array([ True, False, False])

1867

1868 >>> dates_series = pd.Series(idx)

1869 >>> dates_series

1870 0 2012-12-31

1871 1 2013-12-31

1872 2 2014-12-31

1873 dtype: datetime64[ns]

1874 >>> dates_series.dt.is_leap_year

1875 0 True

1876 1 False

1877 2 False

1878 dtype: bool

1879 """,

1880 )

1881

1882 def to_julian_date(self) -> npt.NDArray[np.float64]:

1883 """

1884 Convert Datetime Array to float64 ndarray of Julian Dates.

1885 0 Julian date is noon January 1, 4713 BC.

1886 https://en.wikipedia.org/wiki/Julian_day

1887 """

1888

1889 # http://mysite.verizon.net/aesir_research/date/jdalg2.htm

1890 year = np.asarray(self.year)

1891 month = np.asarray(self.month)

1892 day = np.asarray(self.day)

1893 testarr = month < 3

1894 year[testarr] -= 1

1895 month[testarr] += 12

1896 return (

1897 day

1898 + np.fix((153 * month - 457) / 5)

1899 + 365 * year

1900 + np.floor(year / 4)

1901 - np.floor(year / 100)

1902 + np.floor(year / 400)

1903 + 1_721_118.5

1904 + (

1905 self.hour

1906 + self.minute / 60

1907 + self.second / 3600

1908 + self.microsecond / 3600 / 10**6

1909 + self.nanosecond / 3600 / 10**9

1910 )

1911 / 24

1912 )

1913

1914 # -----------------------------------------------------------------

1915 # Reductions

1916

1917 def std(

1918 self,

1919 axis=None,

1920 dtype=None,

1921 out=None,

1922 ddof: int = 1,

1923 keepdims: bool = False,

1924 skipna: bool = True,

1925 ):

1926 """

1927 Return sample standard deviation over requested axis.

1928

1929 Normalized by N-1 by default. This can be changed using the ddof argument

1930

1931 Parameters

1932 ----------

1933 axis : int optional, default None

1934 Axis for the function to be applied on.

1935 For `Series` this parameter is unused and defaults to `None`.

1936 ddof : int, default 1

1937 Degrees of Freedom. The divisor used in calculations is N - ddof,

1938 where N represents the number of elements.

1939 skipna : bool, default True

1940 Exclude NA/null values. If an entire row/column is NA, the result will be

1941 NA.

1942

1943 Returns

1944 -------

1945 Timedelta

1946 """

1947 # Because std is translation-invariant, we can get self.std

1948 # by calculating (self - Timestamp(0)).std, and we can do it

1949 # without creating a copy by using a view on self._ndarray

1950 from pandas.core.arrays import TimedeltaArray

1951

1952 # Find the td64 dtype with the same resolution as our dt64 dtype

1953 dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64")

1954 dtype = np.dtype(dtype_str)

1955

1956 tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype)

1957

1958 return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna)

1959

1960

1961# -------------------------------------------------------------------

1962# Constructor Helpers

1963

1964

1965def sequence_to_datetimes(data, require_iso8601: bool = False) -> DatetimeArray:

1966 """

1967 Parse/convert the passed data to either DatetimeArray or np.ndarray[object].

1968 """

1969 result, tz, freq = _sequence_to_dt64ns(

1970 data,

1971 allow_mixed=True,

1972 require_iso8601=require_iso8601,

1973 )

1974

1975 dtype = tz_to_dtype(tz)

1976 dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype)

1977 return dta

1978

1979

1980def _sequence_to_dt64ns(

1981 data,

1982 dtype=None,

1983 copy: bool = False,

1984 tz=None,

1985 dayfirst: bool = False,

1986 yearfirst: bool = False,

1987 ambiguous="raise",

1988 *,

1989 allow_mixed: bool = False,

1990 require_iso8601: bool = False,

1991):

1992 """

1993 Parameters

1994 ----------

1995 data : list-like

1996 dtype : dtype, str, or None, default None

1997 copy : bool, default False

1998 tz : tzinfo, str, or None, default None

1999 dayfirst : bool, default False

2000 yearfirst : bool, default False

2001 ambiguous : str, bool, or arraylike, default 'raise'

2002 See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.

2003 allow_mixed : bool, default False

2004 Interpret integers as timestamps when datetime objects are also present.

2005 require_iso8601 : bool, default False

2006 Only consider ISO-8601 formats when parsing strings.

2007

2008 Returns

2009 -------

2010 result : numpy.ndarray

2011 The sequence converted to a numpy array with dtype ``datetime64[ns]``.

2012 tz : tzinfo or None

2013 Either the user-provided tzinfo or one inferred from the data.

2014 inferred_freq : Tick or None

2015 The inferred frequency of the sequence.

2016

2017 Raises

2018 ------

2019 TypeError : PeriodDType data is passed

2020 """

2021

2022 inferred_freq = None

2023

2024 dtype = _validate_dt64_dtype(dtype)

2025 tz = timezones.maybe_get_tz(tz)

2026

2027 # if dtype has an embedded tz, capture it

2028 tz = validate_tz_from_dtype(dtype, tz)

2029

2030 data, copy = dtl.ensure_arraylike_for_datetimelike(

2031 data, copy, cls_name="DatetimeArray"

2032 )

2033

2034 if isinstance(data, DatetimeArray):

2035 inferred_freq = data.freq

2036

2037 # By this point we are assured to have either a numpy array or Index

2038 data, copy = maybe_convert_dtype(data, copy, tz=tz)

2039 data_dtype = getattr(data, "dtype", None)

2040

2041 if (

2042 is_object_dtype(data_dtype)

2043 or is_string_dtype(data_dtype)

2044 or is_sparse(data_dtype)

2045 ):

2046 # TODO: We do not have tests specific to string-dtypes,

2047 # also complex or categorical or other extension

2048 copy = False

2049 if lib.infer_dtype(data, skipna=False) == "integer":

2050 data = data.astype(np.int64)

2051 else:

2052 # data comes back here as either i8 to denote UTC timestamps

2053 # or M8[ns] to denote wall times

2054 data, inferred_tz = objects_to_datetime64ns(

2055 data,

2056 dayfirst=dayfirst,

2057 yearfirst=yearfirst,

2058 allow_object=False,

2059 allow_mixed=allow_mixed,

2060 require_iso8601=require_iso8601,

2061 )

2062 if tz and inferred_tz:

2063 # two timezones: convert to intended from base UTC repr

2064 if data.dtype == "i8":

2065 # GH#42505

2066 # by convention, these are _already_ UTC, e.g

2067 return data.view(DT64NS_DTYPE), tz, None

2068

2069 if timezones.is_utc(tz):

2070 # Fastpath, avoid copy made in tzconversion

2071 utc_vals = data.view("i8")

2072 else:

2073 utc_vals = tz_convert_from_utc(data.view("i8"), tz)

2074 data = utc_vals.view(DT64NS_DTYPE)

2075 elif inferred_tz:

2076 tz = inferred_tz

2077

2078 data_dtype = data.dtype

2079

2080 # `data` may have originally been a Categorical[datetime64[ns, tz]],

2081 # so we need to handle these types.

2082 if is_datetime64tz_dtype(data_dtype):

2083 # DatetimeArray -> ndarray

2084 tz = _maybe_infer_tz(tz, data.tz)

2085 result = data._ndarray

2086

2087 elif is_datetime64_dtype(data_dtype):

2088 # tz-naive DatetimeArray or ndarray[datetime64]

2089 data = getattr(data, "_ndarray", data)

2090 if data.dtype != DT64NS_DTYPE:

2091 data = astype_overflowsafe(data, dtype=DT64NS_DTYPE)

2092 copy = False

2093

2094 if tz is not None:

2095 # Convert tz-naive to UTC

2096 tz = timezones.maybe_get_tz(tz)

2097 # TODO: if tz is UTC, are there situations where we *don't* want a

2098 # copy? tz_localize_to_utc always makes one.

2099 data = tzconversion.tz_localize_to_utc(

2100 data.view("i8"), tz, ambiguous=ambiguous

2101 )

2102 data = data.view(DT64NS_DTYPE)

2103

2104 assert data.dtype == DT64NS_DTYPE, data.dtype

2105 result = data

2106

2107 else:

2108 # must be integer dtype otherwise

2109 # assume this data are epoch timestamps

2110 if tz:

2111 tz = timezones.maybe_get_tz(tz)

2112

2113 if data.dtype != INT64_DTYPE:

2114 data = data.astype(np.int64, copy=False)

2115 result = data.view(DT64NS_DTYPE)

2116

2117 if copy:

2118 result = result.copy()

2119

2120 assert isinstance(result, np.ndarray), type(result)

2121 assert result.dtype == "M8[ns]", result.dtype

2122

2123 # We have to call this again after possibly inferring a tz above

2124 validate_tz_from_dtype(dtype, tz)

2125

2126 return result, tz, inferred_freq

2127

2128

2129def objects_to_datetime64ns(

2130 data: np.ndarray,

2131 dayfirst,

2132 yearfirst,

2133 utc=False,

2134 errors="raise",

2135 require_iso8601: bool = False,

2136 allow_object: bool = False,

2137 allow_mixed: bool = False,

2138):

2139 """

2140 Convert data to array of timestamps.

2141

2142 Parameters

2143 ----------

2144 data : np.ndarray[object]

2145 dayfirst : bool

2146 yearfirst : bool

2147 utc : bool, default False

2148 Whether to convert timezone-aware timestamps to UTC.

2149 errors : {'raise', 'ignore', 'coerce'}

2150 require_iso8601 : bool, default False

2151 allow_object : bool

2152 Whether to return an object-dtype ndarray instead of raising if the

2153 data contains more than one timezone.

2154 allow_mixed : bool, default False

2155 Interpret integers as timestamps when datetime objects are also present.

2156

2157 Returns

2158 -------

2159 result : ndarray

2160 np.int64 dtype if returned values represent UTC timestamps

2161 np.datetime64[ns] if returned values represent wall times

2162 object if mixed timezones

2163 inferred_tz : tzinfo or None

2164

2165 Raises

2166 ------

2167 ValueError : if data cannot be converted to datetimes

2168 """

2169 assert errors in ["raise", "ignore", "coerce"]

2170

2171 # if str-dtype, convert

2172 data = np.array(data, copy=False, dtype=np.object_)

2173

2174 flags = data.flags

2175 order: Literal["F", "C"] = "F" if flags.f_contiguous else "C"

2176 try:

2177 result, tz_parsed = tslib.array_to_datetime(

2178 data.ravel("K"),

2179 errors=errors,

2180 utc=utc,

2181 dayfirst=dayfirst,

2182 yearfirst=yearfirst,

2183 require_iso8601=require_iso8601,

2184 allow_mixed=allow_mixed,

2185 )

2186 result = result.reshape(data.shape, order=order)

2187 except OverflowError as err:

2188 # Exception is raised when a part of date is greater than 32 bit signed int

2189 raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err

2190

2191 if tz_parsed is not None:

2192 # We can take a shortcut since the datetime64 numpy array

2193 # is in UTC

2194 # Return i8 values to denote unix timestamps

2195 return result.view("i8"), tz_parsed

2196 elif is_datetime64_dtype(result):

2197 # returning M8[ns] denotes wall-times; since tz is None

2198 # the distinction is a thin one

2199 return result, tz_parsed

2200 elif is_object_dtype(result):

2201 # GH#23675 when called via `pd.to_datetime`, returning an object-dtype

2202 # array is allowed. When called via `pd.DatetimeIndex`, we can

2203 # only accept datetime64 dtype, so raise TypeError if object-dtype

2204 # is returned, as that indicates the values can be recognized as

2205 # datetimes but they have conflicting timezones/awareness

2206 if allow_object:

2207 return result, tz_parsed

2208 raise TypeError(result)

2209 else: # pragma: no cover

2210 # GH#23675 this TypeError should never be hit, whereas the TypeError

2211 # in the object-dtype branch above is reachable.

2212 raise TypeError(result)

2213

2214

2215def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):

2216 """

2217 Convert data based on dtype conventions, issuing deprecation warnings

2218 or errors where appropriate.

2219

2220 Parameters

2221 ----------

2222 data : np.ndarray or pd.Index

2223 copy : bool

2224 tz : tzinfo or None, default None

2225

2226 Returns

2227 -------

2228 data : np.ndarray or pd.Index

2229 copy : bool

2230

2231 Raises

2232 ------

2233 TypeError : PeriodDType data is passed

2234 """

2235 if not hasattr(data, "dtype"):

2236 # e.g. collections.deque

2237 return data, copy

2238

2239 if is_float_dtype(data.dtype):

2240 # Note: we must cast to datetime64[ns] here in order to treat these

2241 # as wall-times instead of UTC timestamps.

2242 data = data.astype(DT64NS_DTYPE)

2243 copy = False

2244 if (

2245 tz is not None

2246 and len(data) > 0

2247 and not timezones.is_utc(timezones.maybe_get_tz(tz))

2248 ):

2249 # GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes

2250 warnings.warn(

2251 "The behavior of DatetimeArray._from_sequence with a timezone-aware "

2252 "dtype and floating-dtype data is deprecated. In a future version, "

2253 "this data will be interpreted as nanosecond UTC timestamps "

2254 "instead of wall-times, matching the behavior with integer dtypes. "

2255 "To retain the old behavior, explicitly cast to 'datetime64[ns]' "

2256 "before passing the data to pandas. To get the future behavior, "

2257 "first cast to 'int64'.",

2258 FutureWarning,

2259 stacklevel=find_stack_level(),

2260 )

2261

2262 elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype):

2263 # GH#29794 enforcing deprecation introduced in GH#23539

2264 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]")

2265 elif is_period_dtype(data.dtype):

2266 # Note: without explicitly raising here, PeriodIndex

2267 # test_setops.test_join_does_not_recur fails

2268 raise TypeError(

2269 "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"

2270 )

2271

2272 elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype):

2273 # TODO: We have no tests for these

2274 data = np.array(data, dtype=np.object_)

2275 copy = False

2276

2277 return data, copy

2278

2279

2280# -------------------------------------------------------------------

2281# Validation and Inference

2282

2283

2284def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None:

2285 """

2286 If a timezone is inferred from data, check that it is compatible with

2287 the user-provided timezone, if any.

2288

2289 Parameters

2290 ----------

2291 tz : tzinfo or None

2292 inferred_tz : tzinfo or None

2293

2294 Returns

2295 -------

2296 tz : tzinfo or None

2297

2298 Raises

2299 ------

2300 TypeError : if both timezones are present but do not match

2301 """

2302 if tz is None:

2303 tz = inferred_tz

2304 elif inferred_tz is None:

2305 pass

2306 elif not timezones.tz_compare(tz, inferred_tz):

2307 raise TypeError(

2308 f"data is already tz-aware {inferred_tz}, unable to "

2309 f"set specified tz: {tz}"

2310 )

2311 return tz

2312

2313

2314def _validate_dt64_dtype(dtype):

2315 """

2316 Check that a dtype, if passed, represents either a numpy datetime64[ns]

2317 dtype or a pandas DatetimeTZDtype.

2318

2319 Parameters

2320 ----------

2321 dtype : object

2322

2323 Returns

2324 -------

2325 dtype : None, numpy.dtype, or DatetimeTZDtype

2326

2327 Raises

2328 ------

2329 ValueError : invalid dtype

2330

2331 Notes

2332 -----

2333 Unlike validate_tz_from_dtype, this does _not_ allow non-existent

2334 tz errors to go through

2335 """

2336 if dtype is not None:

2337 dtype = pandas_dtype(dtype)

2338 if is_dtype_equal(dtype, np.dtype("M8")):

2339 # no precision, disallowed GH#24806

2340 msg = (

2341 "Passing in 'datetime64' dtype with no precision is not allowed. "

2342 "Please pass in 'datetime64[ns]' instead."

2343 )

2344 raise ValueError(msg)

2345

2346 if (isinstance(dtype, np.dtype) and dtype != DT64NS_DTYPE) or not isinstance(

2347 dtype, (np.dtype, DatetimeTZDtype)

2348 ):

2349 raise ValueError(

2350 f"Unexpected value for 'dtype': '{dtype}'. "

2351 "Must be 'datetime64[ns]' or DatetimeTZDtype'."

2352 )

2353

2354 if getattr(dtype, "tz", None):

2355 # https://github.com/pandas-dev/pandas/issues/18595

2356 # Ensure that we have a standard timezone for pytz objects.

2357 # Without this, things like adding an array of timedeltas and

2358 # a tz-aware Timestamp (with a tz specific to its datetime) will

2359 # be incorrect(ish?) for the array as a whole

2360 dtype = cast(DatetimeTZDtype, dtype)

2361 dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))

2362

2363 return dtype

2364

2365

2366def validate_tz_from_dtype(dtype, tz: tzinfo | None) -> tzinfo | None:

2367 """

2368 If the given dtype is a DatetimeTZDtype, extract the implied

2369 tzinfo object from it and check that it does not conflict with the given

2370 tz.

2371

2372 Parameters

2373 ----------

2374 dtype : dtype, str

2375 tz : None, tzinfo

2376

2377 Returns

2378 -------

2379 tz : consensus tzinfo

2380

2381 Raises

2382 ------

2383 ValueError : on tzinfo mismatch

2384 """

2385 if dtype is not None:

2386 if isinstance(dtype, str):

2387 try:

2388 dtype = DatetimeTZDtype.construct_from_string(dtype)

2389 except TypeError:

2390 # Things like `datetime64[ns]`, which is OK for the

2391 # constructors, but also nonsense, which should be validated

2392 # but not by us. We *do* allow non-existent tz errors to

2393 # go through

2394 pass

2395 dtz = getattr(dtype, "tz", None)

2396 if dtz is not None:

2397 if tz is not None and not timezones.tz_compare(tz, dtz):

2398 raise ValueError("cannot supply both a tz and a dtype with a tz")

2399 tz = dtz

2400

2401 if tz is not None and is_datetime64_dtype(dtype):

2402 # We also need to check for the case where the user passed a

2403 # tz-naive dtype (i.e. datetime64[ns])

2404 if tz is not None and not timezones.tz_compare(tz, dtz):

2405 raise ValueError(

2406 "cannot supply both a tz and a "

2407 "timezone-naive dtype (i.e. datetime64[ns])"

2408 )

2409

2410 return tz

2411

2412

2413def _infer_tz_from_endpoints(

2414 start: Timestamp, end: Timestamp, tz: tzinfo | None

2415) -> tzinfo | None:

2416 """

2417 If a timezone is not explicitly given via `tz`, see if one can

2418 be inferred from the `start` and `end` endpoints. If more than one

2419 of these inputs provides a timezone, require that they all agree.

2420

2421 Parameters

2422 ----------

2423 start : Timestamp

2424 end : Timestamp

2425 tz : tzinfo or None

2426

2427 Returns

2428 -------

2429 tz : tzinfo or None

2430

2431 Raises

2432 ------

2433 TypeError : if start and end timezones do not agree

2434 """

2435 try:

2436 inferred_tz = timezones.infer_tzinfo(start, end)

2437 except AssertionError as err:

2438 # infer_tzinfo raises AssertionError if passed mismatched timezones

2439 raise TypeError(

2440 "Start and end cannot both be tz-aware with different timezones"

2441 ) from err

2442

2443 inferred_tz = timezones.maybe_get_tz(inferred_tz)

2444 tz = timezones.maybe_get_tz(tz)

2445

2446 if tz is not None and inferred_tz is not None:

2447 if not timezones.tz_compare(inferred_tz, tz):

2448 raise AssertionError("Inferred time zone not equal to passed time zone")

2449

2450 elif inferred_tz is not None:

2451 tz = inferred_tz

2452

2453 return tz

2454

2455

2456def _maybe_normalize_endpoints(

2457 start: Timestamp | None, end: Timestamp | None, normalize: bool

2458):

2459 _normalized = True

2460

2461 if start is not None:

2462 if normalize:

2463 start = start.normalize()

2464 _normalized = True

2465 else:

2466 _normalized = _normalized and start.time() == _midnight

2467

2468 if end is not None:

2469 if normalize:

2470 end = end.normalize()

2471 _normalized = True

2472 else:

2473 _normalized = _normalized and end.time() == _midnight

2474

2475 return start, end, _normalized

2476

2477

2478def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent):

2479 """

2480 Localize a start or end Timestamp to the timezone of the corresponding

2481 start or end Timestamp

2482

2483 Parameters

2484 ----------

2485 ts : start or end Timestamp to potentially localize

2486 is_none : argument that should be None

2487 is_not_none : argument that should not be None

2488 freq : Tick, DateOffset, or None

2489 tz : str, timezone object or None

2490 ambiguous: str, localization behavior for ambiguous times

2491 nonexistent: str, localization behavior for nonexistent times

2492

2493 Returns

2494 -------

2495 ts : Timestamp

2496 """

2497 # Make sure start and end are timezone localized if:

2498 # 1) freq = a Timedelta-like frequency (Tick)

2499 # 2) freq = None i.e. generating a linspaced range

2500 if is_none is None and is_not_none is not None:

2501 # Note: We can't ambiguous='infer' a singular ambiguous time; however,

2502 # we have historically defaulted ambiguous=False

2503 ambiguous = ambiguous if ambiguous != "infer" else False

2504 localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None}

2505 if isinstance(freq, Tick) or freq is None:

2506 localize_args["tz"] = tz

2507 ts = ts.tz_localize(**localize_args)

2508 return ts

2509

2510

2511def generate_range(start=None, end=None, periods=None, offset=BDay()):

2512 """

2513 Generates a sequence of dates corresponding to the specified time

2514 offset. Similar to dateutil.rrule except uses pandas DateOffset

2515 objects to represent time increments.

2516

2517 Parameters

2518 ----------

2519 start : datetime, (default None)

2520 end : datetime, (default None)

2521 periods : int, (default None)

2522 offset : DateOffset, (default BDay())

2523

2524 Notes

2525 -----

2526 * This method is faster for generating weekdays than dateutil.rrule

2527 * At least two of (start, end, periods) must be specified.

2528 * If both start and end are specified, the returned dates will

2529 satisfy start <= date <= end.

2530

2531 Returns

2532 -------

2533 dates : generator object

2534 """

2535 offset = to_offset(offset)

2536

2537 start = Timestamp(start)

2538 start = start if start is not NaT else None

2539 end = Timestamp(end)

2540 end = end if end is not NaT else None

2541

2542 if start and not offset.is_on_offset(start):

2543 start = offset.rollforward(start)

2544

2545 elif end and not offset.is_on_offset(end):

2546 end = offset.rollback(end)

2547

2548 if periods is None and end < start and offset.n >= 0:

2549 end = None

2550 periods = 0

2551

2552 if end is None:

2553 end = start + (periods - 1) * offset

2554

2555 if start is None:

2556 start = end - (periods - 1) * offset

2557

2558 cur = start

2559 if offset.n >= 0:

2560 while cur <= end:

2561 yield cur

2562

2563 if cur == end:

2564 # GH#24252 avoid overflows by not performing the addition

2565 # in offset.apply unless we have to

2566 break

2567

2568 # faster than cur + offset

2569 next_date = offset._apply(cur)

2570 if next_date <= cur:

2571 raise ValueError(f"Offset {offset} did not increment date")

2572 cur = next_date

2573 else:

2574 while cur >= end:

2575 yield cur

2576

2577 if cur == end:

2578 # GH#24252 avoid overflows by not performing the addition

2579 # in offset.apply unless we have to

2580 break

2581

2582 # faster than cur + offset

2583 next_date = offset._apply(cur)

2584 if next_date >= cur:

2585 raise ValueError(f"Offset {offset} did not decrement date")

2586 cur = next_date