Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/datetimelike.py: 17%

1from __future__ import annotations

3from datetime import (

4 datetime,

5 timedelta,

7import operator

8from typing import (

9 TYPE_CHECKING,

10 Any,

11 Callable,

12 Literal,

13 Sequence,

14 TypeVar,

15 Union,

16 cast,

17 final,

18 overload,

19)

20import warnings

22import numpy as np

24from pandas._libs import (

25 algos,

26 lib,

27)

28from pandas._libs.arrays import NDArrayBacked

29from pandas._libs.tslibs import (

30 BaseOffset,

31 IncompatibleFrequency,

32 NaT,

33 NaTType,

34 Period,

35 Resolution,

36 Tick,

37 Timestamp,

38 delta_to_nanoseconds,

39 get_unit_from_dtype,

40 iNaT,

41 ints_to_pydatetime,

42 ints_to_pytimedelta,

43 to_offset,

44)

45from pandas._libs.tslibs.fields import (

46 RoundTo,

47 round_nsint64,

48)

49from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions

50from pandas._libs.tslibs.timestamps import integer_op_not_supported

51from pandas._typing import (

52 ArrayLike,

53 DatetimeLikeScalar,

54 Dtype,

55 DtypeObj,

56 NpDtype,

57 PositionalIndexer2D,

58 PositionalIndexerTuple,

59 ScalarIndexer,

60 SequenceIndexer,

61 npt,

62)

63from pandas.compat.numpy import function as nv

64from pandas.errors import (

65 AbstractMethodError,

66 NullFrequencyError,

67 PerformanceWarning,

68)

69from pandas.util._decorators import (

70 Appender,

71 Substitution,

72 cache_readonly,

73)

74from pandas.util._exceptions import find_stack_level

76from pandas.core.dtypes.common import (

77 is_all_strings,

78 is_categorical_dtype,

79 is_datetime64_any_dtype,

80 is_datetime64_dtype,

81 is_datetime64tz_dtype,

82 is_datetime_or_timedelta_dtype,

83 is_dtype_equal,

84 is_float_dtype,

85 is_integer_dtype,

86 is_list_like,

87 is_object_dtype,

88 is_period_dtype,

89 is_string_dtype,

90 is_timedelta64_dtype,

91 is_unsigned_integer_dtype,

92 pandas_dtype,

93)

94from pandas.core.dtypes.dtypes import (

95 DatetimeTZDtype,

96 ExtensionDtype,

97)

98from pandas.core.dtypes.generic import (

99 ABCCategorical,

100 ABCMultiIndex,

101)

102from pandas.core.dtypes.missing import (

103 is_valid_na_for_dtype,

104 isna,

105)

106

107from pandas.core import (

108 nanops,

109 ops,

110)

111from pandas.core.algorithms import (

112 checked_add_with_arr,

113 isin,

114 mode,

115 unique1d,

116)

117from pandas.core.arraylike import OpsMixin

118from pandas.core.arrays._mixins import (

119 NDArrayBackedExtensionArray,

120 ravel_compat,

121)

122from pandas.core.arrays.base import ExtensionArray

123from pandas.core.arrays.integer import IntegerArray

124import pandas.core.common as com

125from pandas.core.construction import (

126 array as pd_array,

127 ensure_wrapped_if_datetimelike,

128 extract_array,

129)

130from pandas.core.indexers import (

131 check_array_indexer,

132 check_setitem_lengths,

133)

134from pandas.core.ops.common import unpack_zerodim_and_defer

135from pandas.core.ops.invalid import (

136 invalid_comparison,

137 make_invalid_op,

138)

139

140from pandas.tseries import frequencies

141

142if TYPE_CHECKING: 142 ↛ 144line 142 didn't jump to line 144, because the condition on line 142 was never true

143

144 from pandas.core.arrays import (

145 DatetimeArray,

146 PeriodArray,

147 TimedeltaArray,

148 )

149

150DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType]

151DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin")

152

153

154class InvalidComparison(Exception):

155 """

156 Raised by _validate_comparison_value to indicate to caller it should

157 return invalid_comparison.

158 """

159

160 pass

161

162

163class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray):

164 """

165 Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray

166

167 Assumes that __new__/__init__ defines:

168 _data

169 _freq

170

171 and that the inheriting class has methods:

172 _generate_range

173 """

174

175 # _infer_matches -> which infer_dtype strings are close enough to our own

176 _infer_matches: tuple[str, ...]

177 _is_recognized_dtype: Callable[[DtypeObj], bool]

178 _recognized_scalars: tuple[type, ...]

179 _ndarray: np.ndarray

180

181 @cache_readonly

182 def _can_hold_na(self) -> bool:

183 return True

184

185 def __init__(self, data, dtype: Dtype | None = None, freq=None, copy=False) -> None:

186 raise AbstractMethodError(self)

187

188 @property

189 def _scalar_type(self) -> type[DatetimeLikeScalar]:

190 """

191 The scalar associated with this datelike

192

193 * PeriodArray : Period

194 * DatetimeArray : Timestamp

195 * TimedeltaArray : Timedelta

196 """

197 raise AbstractMethodError(self)

198

199 def _scalar_from_string(self, value: str) -> DTScalarOrNaT:

200 """

201 Construct a scalar type from a string.

202

203 Parameters

204 ----------

205 value : str

206

207 Returns

208 -------

209 Period, Timestamp, or Timedelta, or NaT

210 Whatever the type of ``self._scalar_type`` is.

211

212 Notes

213 -----

214 This should call ``self._check_compatible_with`` before

215 unboxing the result.

216 """

217 raise AbstractMethodError(self)

218

219 def _unbox_scalar(

220 self, value: DTScalarOrNaT, setitem: bool = False

221 ) -> np.int64 | np.datetime64 | np.timedelta64:

222 """

223 Unbox the integer value of a scalar `value`.

224

225 Parameters

226 ----------

227 value : Period, Timestamp, Timedelta, or NaT

228 Depending on subclass.

229 setitem : bool, default False

230 Whether to check compatibility with setitem strictness.

231

232 Returns

233 -------

234 int

235

236 Examples

237 --------

238 >>> self._unbox_scalar(Timedelta("10s")) # doctest: +SKIP

239 10000000000

240 """

241 raise AbstractMethodError(self)

242

243 def _check_compatible_with(

244 self, other: DTScalarOrNaT, setitem: bool = False

245 ) -> None:

246 """

247 Verify that `self` and `other` are compatible.

248

249 * DatetimeArray verifies that the timezones (if any) match

250 * PeriodArray verifies that the freq matches

251 * Timedelta has no verification

252

253 In each case, NaT is considered compatible.

254

255 Parameters

256 ----------

257 other

258 setitem : bool, default False

259 For __setitem__ we may have stricter compatibility restrictions than

260 for comparisons.

261

262 Raises

263 ------

264 Exception

265 """

266 raise AbstractMethodError(self)

267

268 # ------------------------------------------------------------------

269 # NDArrayBackedExtensionArray compat

270

271 @cache_readonly

272 def _data(self) -> np.ndarray:

273 return self._ndarray

274

275 # ------------------------------------------------------------------

276

277 def _box_func(self, x):

278 """

279 box function to get object from internal representation

280 """

281 raise AbstractMethodError(self)

282

283 def _box_values(self, values) -> np.ndarray:

284 """

285 apply box func to passed values

286 """

287 return lib.map_infer(values, self._box_func, convert=False)

288

289 def __iter__(self):

290 if self.ndim > 1:

291 return (self[n] for n in range(len(self)))

292 else:

293 return (self._box_func(v) for v in self.asi8)

294

295 @property

296 def asi8(self) -> npt.NDArray[np.int64]:

297 """

298 Integer representation of the values.

299

300 Returns

301 -------

302 ndarray

303 An ndarray with int64 dtype.

304 """

305 # do not cache or you'll create a memory leak

306 return self._ndarray.view("i8")

307

308 # ----------------------------------------------------------------

309 # Rendering Methods

310

311 def _format_native_types(

312 self, *, na_rep="NaT", date_format=None

313 ) -> npt.NDArray[np.object_]:

314 """

315 Helper method for astype when converting to strings.

316

317 Returns

318 -------

319 ndarray[str]

320 """

321 raise AbstractMethodError(self)

322

323 def _formatter(self, boxed: bool = False):

324 # TODO: Remove Datetime & DatetimeTZ formatters.

325 return "'{}'".format

326

327 # ----------------------------------------------------------------

328 # Array-Like / EA-Interface Methods

329

330 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:

331 # used for Timedelta/DatetimeArray, overwritten by PeriodArray

332 if is_object_dtype(dtype):

333 return np.array(list(self), dtype=object)

334 return self._ndarray

335

336 @overload

337 def __getitem__(self, item: ScalarIndexer) -> DTScalarOrNaT:

338 ...

339

340 @overload

341 def __getitem__(

342 self: DatetimeLikeArrayT,

343 item: SequenceIndexer | PositionalIndexerTuple,

344 ) -> DatetimeLikeArrayT:

345 ...

346

347 def __getitem__(

348 self: DatetimeLikeArrayT, key: PositionalIndexer2D

349 ) -> DatetimeLikeArrayT | DTScalarOrNaT:

350 """

351 This getitem defers to the underlying array, which by-definition can

352 only handle list-likes, slices, and integer scalars

353 """

354 # Use cast as we know we will get back a DatetimeLikeArray or DTScalar,

355 # but skip evaluating the Union at runtime for performance

356 # (see https://github.com/pandas-dev/pandas/pull/44624)

357 result = cast(

358 "Union[DatetimeLikeArrayT, DTScalarOrNaT]", super().__getitem__(key)

359 )

360 if lib.is_scalar(result):

361 return result

362 else:

363 # At this point we know the result is an array.

364 result = cast(DatetimeLikeArrayT, result)

365 result._freq = self._get_getitem_freq(key)

366 return result

367

368 def _get_getitem_freq(self, key) -> BaseOffset | None:

369 """

370 Find the `freq` attribute to assign to the result of a __getitem__ lookup.

371 """

372 is_period = is_period_dtype(self.dtype)

373 if is_period:

374 freq = self.freq

375 elif self.ndim != 1:

376 freq = None

377 else:

378 key = check_array_indexer(self, key) # maybe ndarray[bool] -> slice

379 freq = None

380 if isinstance(key, slice):

381 if self.freq is not None and key.step is not None:

382 freq = key.step * self.freq

383 else:

384 freq = self.freq

385 elif key is Ellipsis:

386 # GH#21282 indexing with Ellipsis is similar to a full slice,

387 # should preserve `freq` attribute

388 freq = self.freq

389 elif com.is_bool_indexer(key):

390 new_key = lib.maybe_booleans_to_slice(key.view(np.uint8))

391 if isinstance(new_key, slice):

392 return self._get_getitem_freq(new_key)

393 return freq

394

395 # error: Argument 1 of "__setitem__" is incompatible with supertype

396 # "ExtensionArray"; supertype defines the argument type as "Union[int,

397 # ndarray]"

398 def __setitem__( # type: ignore[override]

399 self,

400 key: int | Sequence[int] | Sequence[bool] | slice,

401 value: NaTType | Any | Sequence[Any],

402 ) -> None:

403 # I'm fudging the types a bit here. "Any" above really depends

404 # on type(self). For PeriodArray, it's Period (or stuff coercible

405 # to a period in from_sequence). For DatetimeArray, it's Timestamp...

406 # I don't know if mypy can do that, possibly with Generics.

407 # https://mypy.readthedocs.io/en/latest/generics.html

408

409 no_op = check_setitem_lengths(key, value, self)

410

411 # Calling super() before the no_op short-circuit means that we raise

412 # on invalid 'value' even if this is a no-op, e.g. wrong-dtype empty array.

413 super().__setitem__(key, value)

414

415 if no_op:

416 return

417

418 self._maybe_clear_freq()

419

420 def _maybe_clear_freq(self):

421 # inplace operations like __setitem__ may invalidate the freq of

422 # DatetimeArray and TimedeltaArray

423 pass

424

425 def astype(self, dtype, copy: bool = True):

426 # Some notes on cases we don't have to handle here in the base class:

427 # 1. PeriodArray.astype handles period -> period

428 # 2. DatetimeArray.astype handles conversion between tz.

429 # 3. DatetimeArray.astype handles datetime -> period

430 dtype = pandas_dtype(dtype)

431

432 if is_object_dtype(dtype):

433 if self.dtype.kind == "M":

434 self = cast("DatetimeArray", self)

435 # *much* faster than self._box_values

436 # for e.g. test_get_loc_tuple_monotonic_above_size_cutoff

437 i8data = self.asi8

438 converted = ints_to_pydatetime(

439 i8data,

440 tz=self.tz,

441 freq=self.freq,

442 box="timestamp",

443 reso=self._reso,

444 )

445 return converted

446

447 elif self.dtype.kind == "m":

448 return ints_to_pytimedelta(self._ndarray, box=True)

449

450 return self._box_values(self.asi8.ravel()).reshape(self.shape)

451

452 elif isinstance(dtype, ExtensionDtype):

453 return super().astype(dtype, copy=copy)

454 elif is_string_dtype(dtype):

455 return self._format_native_types()

456 elif is_integer_dtype(dtype):

457 # we deliberately ignore int32 vs. int64 here.

458 # See https://github.com/pandas-dev/pandas/issues/24381 for more.

459 values = self.asi8

460

461 if is_unsigned_integer_dtype(dtype):

462 # Again, we ignore int32 vs. int64

463 values = values.view("uint64")

464 if dtype != np.uint64:

465 # GH#45034

466 warnings.warn(

467 f"The behavior of .astype from {self.dtype} to {dtype} is "

468 "deprecated. In a future version, this astype will return "

469 "exactly the specified dtype instead of uint64, and will "

470 "raise if that conversion overflows.",

471 FutureWarning,

472 stacklevel=find_stack_level(),

473 )

474 elif (self.asi8 < 0).any():

475 # GH#45034

476 warnings.warn(

477 f"The behavior of .astype from {self.dtype} to {dtype} is "

478 "deprecated. In a future version, this astype will "

479 "raise if the conversion overflows, as it did in this "

480 "case with negative int64 values.",

481 FutureWarning,

482 stacklevel=find_stack_level(),

483 )

484 elif dtype != np.int64:

485 # GH#45034

486 warnings.warn(

487 f"The behavior of .astype from {self.dtype} to {dtype} is "

488 "deprecated. In a future version, this astype will return "

489 "exactly the specified dtype instead of int64, and will "

490 "raise if that conversion overflows.",

491 FutureWarning,

492 stacklevel=find_stack_level(),

493 )

494

495 if copy:

496 values = values.copy()

497 return values

498 elif (

499 is_datetime_or_timedelta_dtype(dtype)

500 and not is_dtype_equal(self.dtype, dtype)

501 ) or is_float_dtype(dtype):

502 # disallow conversion between datetime/timedelta,

503 # and conversions for any datetimelike to float

504 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"

505 raise TypeError(msg)

506 else:

507 return np.asarray(self, dtype=dtype)

508

509 @overload

510 def view(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:

511 ...

512

513 @overload

514 def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray:

515 ...

516

517 @overload

518 def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray:

519 ...

520

521 @overload

522 def view(self, dtype: Dtype | None = ...) -> ArrayLike:

523 ...

524

525 def view(self, dtype: Dtype | None = None) -> ArrayLike:

526 # we need to explicitly call super() method as long as the `@overload`s

527 # are present in this file.

528 return super().view(dtype)

529

530 # ------------------------------------------------------------------

531 # ExtensionArray Interface

532

533 @classmethod

534 def _concat_same_type(

535 cls: type[DatetimeLikeArrayT],

536 to_concat: Sequence[DatetimeLikeArrayT],

537 axis: int = 0,

538 ) -> DatetimeLikeArrayT:

539 new_obj = super()._concat_same_type(to_concat, axis)

540

541 obj = to_concat[0]

542 dtype = obj.dtype

543

544 new_freq = None

545 if is_period_dtype(dtype):

546 new_freq = obj.freq

547 elif axis == 0:

548 # GH 3232: If the concat result is evenly spaced, we can retain the

549 # original frequency

550 to_concat = [x for x in to_concat if len(x)]

551

552 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):

553 pairs = zip(to_concat[:-1], to_concat[1:])

554 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):

555 new_freq = obj.freq

556

557 new_obj._freq = new_freq

558 return new_obj

559

560 def copy(self: DatetimeLikeArrayT, order="C") -> DatetimeLikeArrayT:

561 # error: Unexpected keyword argument "order" for "copy"

562 new_obj = super().copy(order=order) # type: ignore[call-arg]

563 new_obj._freq = self.freq

564 return new_obj

565

566 # ------------------------------------------------------------------

567 # Validation Methods

568 # TODO: try to de-duplicate these, ensure identical behavior

569

570 def _validate_comparison_value(self, other):

571 if isinstance(other, str):

572 try:

573 # GH#18435 strings get a pass from tzawareness compat

574 other = self._scalar_from_string(other)

575 except (ValueError, IncompatibleFrequency):

576 # failed to parse as Timestamp/Timedelta/Period

577 raise InvalidComparison(other)

578

579 if isinstance(other, self._recognized_scalars) or other is NaT:

580 other = self._scalar_type(other)

581 try:

582 self._check_compatible_with(other)

583 except (TypeError, IncompatibleFrequency) as err:

584 # e.g. tzawareness mismatch

585 raise InvalidComparison(other) from err

586

587 elif not is_list_like(other):

588 raise InvalidComparison(other)

589

590 elif len(other) != len(self):

591 raise ValueError("Lengths must match")

592

593 else:

594 try:

595 other = self._validate_listlike(other, allow_object=True)

596 self._check_compatible_with(other)

597 except (TypeError, IncompatibleFrequency) as err:

598 if is_object_dtype(getattr(other, "dtype", None)):

599 # We will have to operate element-wise

600 pass

601 else:

602 raise InvalidComparison(other) from err

603

604 return other

605

606 def _validate_shift_value(self, fill_value):

607 # TODO(2.0): once this deprecation is enforced, use _validate_scalar

608 if is_valid_na_for_dtype(fill_value, self.dtype):

609 fill_value = NaT

610 elif isinstance(fill_value, self._recognized_scalars):

611 fill_value = self._scalar_type(fill_value)

612 else:

613 new_fill: DatetimeLikeScalar

614

615 # only warn if we're not going to raise

616 if self._scalar_type is Period and lib.is_integer(fill_value):

617 # kludge for #31971 since Period(integer) tries to cast to str

618 new_fill = Period._from_ordinal(fill_value, freq=self.freq)

619 else:

620 new_fill = self._scalar_type(fill_value)

621

622 # stacklevel here is chosen to be correct when called from

623 # DataFrame.shift or Series.shift

624 warnings.warn(

625 f"Passing {type(fill_value)} to shift is deprecated and "

626 "will raise in a future version, pass "

627 f"{self._scalar_type.__name__} instead.",

628 FutureWarning,

629 # There is no way to hard-code the level since this might be

630 # reached directly or called from the Index or Block method

631 stacklevel=find_stack_level(),

632 )

633 fill_value = new_fill

634

635 return self._unbox(fill_value, setitem=True)

636

637 def _validate_scalar(

638 self,

639 value,

640 *,

641 allow_listlike: bool = False,

642 setitem: bool = True,

643 unbox: bool = True,

644 ):

645 """

646 Validate that the input value can be cast to our scalar_type.

647

648 Parameters

649 ----------

650 value : object

651 allow_listlike: bool, default False

652 When raising an exception, whether the message should say

653 listlike inputs are allowed.

654 setitem : bool, default True

655 Whether to check compatibility with setitem strictness.

656 unbox : bool, default True

657 Whether to unbox the result before returning. Note: unbox=False

658 skips the setitem compatibility check.

659

660 Returns

661 -------

662 self._scalar_type or NaT

663 """

664 if isinstance(value, self._scalar_type):

665 pass

666

667 elif isinstance(value, str):

668 # NB: Careful about tzawareness

669 try:

670 value = self._scalar_from_string(value)

671 except ValueError as err:

672 msg = self._validation_error_message(value, allow_listlike)

673 raise TypeError(msg) from err

674

675 elif is_valid_na_for_dtype(value, self.dtype):

676 # GH#18295

677 value = NaT

678

679 elif isna(value):

680 # if we are dt64tz and value is dt64("NaT"), dont cast to NaT,

681 # or else we'll fail to raise in _unbox_scalar

682 msg = self._validation_error_message(value, allow_listlike)

683 raise TypeError(msg)

684

685 elif isinstance(value, self._recognized_scalars):

686 value = self._scalar_type(value)

687

688 else:

689 msg = self._validation_error_message(value, allow_listlike)

690 raise TypeError(msg)

691

692 if not unbox:

693 # NB: In general NDArrayBackedExtensionArray will unbox here;

694 # this option exists to prevent a performance hit in

695 # TimedeltaIndex.get_loc

696 return value

697 return self._unbox_scalar(value, setitem=setitem)

698

699 def _validation_error_message(self, value, allow_listlike: bool = False) -> str:

700 """

701 Construct an exception message on validation error.

702

703 Some methods allow only scalar inputs, while others allow either scalar

704 or listlike.

705

706 Parameters

707 ----------

708 allow_listlike: bool, default False

709

710 Returns

711 -------

712 str

713 """

714 if allow_listlike:

715 msg = (

716 f"value should be a '{self._scalar_type.__name__}', 'NaT', "

717 f"or array of those. Got '{type(value).__name__}' instead."

718 )

719 else:

720 msg = (

721 f"value should be a '{self._scalar_type.__name__}' or 'NaT'. "

722 f"Got '{type(value).__name__}' instead."

723 )

724 return msg

725

726 def _validate_listlike(self, value, allow_object: bool = False):

727 if isinstance(value, type(self)):

728 return value

729

730 if isinstance(value, list) and len(value) == 0:

731 # We treat empty list as our own dtype.

732 return type(self)._from_sequence([], dtype=self.dtype)

733

734 if hasattr(value, "dtype") and value.dtype == object:

735 # `array` below won't do inference if value is an Index or Series.

736 # so do so here. in the Index case, inferred_type may be cached.

737 if lib.infer_dtype(value) in self._infer_matches:

738 try:

739 value = type(self)._from_sequence(value)

740 except (ValueError, TypeError):

741 if allow_object:

742 return value

743 msg = self._validation_error_message(value, True)

744 raise TypeError(msg)

745

746 # Do type inference if necessary up front (after unpacking PandasArray)

747 # e.g. we passed PeriodIndex.values and got an ndarray of Periods

748 value = extract_array(value, extract_numpy=True)

749 value = pd_array(value)

750 value = extract_array(value, extract_numpy=True)

751

752 if is_all_strings(value):

753 # We got a StringArray

754 try:

755 # TODO: Could use from_sequence_of_strings if implemented

756 # Note: passing dtype is necessary for PeriodArray tests

757 value = type(self)._from_sequence(value, dtype=self.dtype)

758 except ValueError:

759 pass

760

761 if is_categorical_dtype(value.dtype):

762 # e.g. we have a Categorical holding self.dtype

763 if is_dtype_equal(value.categories.dtype, self.dtype):

764 # TODO: do we need equal dtype or just comparable?

765 value = value._internal_get_values()

766 value = extract_array(value, extract_numpy=True)

767

768 if allow_object and is_object_dtype(value.dtype):

769 pass

770

771 elif not type(self)._is_recognized_dtype(value.dtype):

772 msg = self._validation_error_message(value, True)

773 raise TypeError(msg)

774

775 return value

776

777 def _validate_searchsorted_value(self, value):

778 if not is_list_like(value):

779 return self._validate_scalar(value, allow_listlike=True, setitem=False)

780 else:

781 value = self._validate_listlike(value)

782

783 return self._unbox(value)

784

785 def _validate_setitem_value(self, value):

786 if is_list_like(value):

787 value = self._validate_listlike(value)

788 else:

789 return self._validate_scalar(value, allow_listlike=True)

790

791 return self._unbox(value, setitem=True)

792

793 def _unbox(

794 self, other, setitem: bool = False

795 ) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray:

796 """

797 Unbox either a scalar with _unbox_scalar or an instance of our own type.

798 """

799 if lib.is_scalar(other):

800 other = self._unbox_scalar(other, setitem=setitem)

801 else:

802 # same type as self

803 self._check_compatible_with(other, setitem=setitem)

804 other = other._ndarray

805 return other

806

807 # ------------------------------------------------------------------

808 # Additional array methods

809 # These are not part of the EA API, but we implement them because

810 # pandas assumes they're there.

811

812 @ravel_compat

813 def map(self, mapper):

814 # TODO(GH-23179): Add ExtensionArray.map

815 # Need to figure out if we want ExtensionArray.map first.

816 # If so, then we can refactor IndexOpsMixin._map_values to

817 # a standalone function and call from here..

818 # Else, just rewrite _map_infer_values to do the right thing.

819 from pandas import Index

820

821 return Index(self).map(mapper).array

822

823 def isin(self, values) -> npt.NDArray[np.bool_]:

824 """

825 Compute boolean array of whether each value is found in the

826 passed set of values.

827

828 Parameters

829 ----------

830 values : set or sequence of values

831

832 Returns

833 -------

834 ndarray[bool]

835 """

836 if not hasattr(values, "dtype"):

837 values = np.asarray(values)

838

839 if values.dtype.kind in ["f", "i", "u", "c"]:

840 # TODO: de-duplicate with equals, validate_comparison_value

841 return np.zeros(self.shape, dtype=bool)

842

843 if not isinstance(values, type(self)):

844 inferable = [

845 "timedelta",

846 "timedelta64",

847 "datetime",

848 "datetime64",

849 "date",

850 "period",

851 ]

852 if values.dtype == object:

853 inferred = lib.infer_dtype(values, skipna=False)

854 if inferred not in inferable:

855 if inferred == "string":

856 pass

857

858 elif "mixed" in inferred:

859 return isin(self.astype(object), values)

860 else:

861 return np.zeros(self.shape, dtype=bool)

862

863 try:

864 values = type(self)._from_sequence(values)

865 except ValueError:

866 return isin(self.astype(object), values)

867

868 try:

869 self._check_compatible_with(values)

870 except (TypeError, ValueError):

871 # Includes tzawareness mismatch and IncompatibleFrequencyError

872 return np.zeros(self.shape, dtype=bool)

873

874 return isin(self.asi8, values.asi8)

875

876 # ------------------------------------------------------------------

877 # Null Handling

878

879 def isna(self) -> npt.NDArray[np.bool_]:

880 return self._isnan

881

882 @property # NB: override with cache_readonly in immutable subclasses

883 def _isnan(self) -> npt.NDArray[np.bool_]:

884 """

885 return if each value is nan

886 """

887 return self.asi8 == iNaT

888

889 @property # NB: override with cache_readonly in immutable subclasses

890 def _hasna(self) -> bool:

891 """

892 return if I have any nans; enables various perf speedups

893 """

894 return bool(self._isnan.any())

895

896 def _maybe_mask_results(

897 self, result: np.ndarray, fill_value=iNaT, convert=None

898 ) -> np.ndarray:

899 """

900 Parameters

901 ----------

902 result : np.ndarray

903 fill_value : object, default iNaT

904 convert : str, dtype or None

905

906 Returns

907 -------

908 result : ndarray with values replace by the fill_value

909

910 mask the result if needed, convert to the provided dtype if its not

911 None

912

913 This is an internal routine.

914 """

915 if self._hasna:

916 if convert:

917 result = result.astype(convert)

918 if fill_value is None:

919 fill_value = np.nan

920 np.putmask(result, self._isnan, fill_value)

921 return result

922

923 # ------------------------------------------------------------------

924 # Frequency Properties/Methods

925

926 @property

927 def freq(self):

928 """

929 Return the frequency object if it is set, otherwise None.

930 """

931 return self._freq

932

933 @freq.setter

934 def freq(self, value) -> None:

935 if value is not None:

936 value = to_offset(value)

937 self._validate_frequency(self, value)

938

939 if self.ndim > 1:

940 raise ValueError("Cannot set freq with ndim > 1")

941

942 self._freq = value

943

944 @property

945 def freqstr(self) -> str | None:

946 """

947 Return the frequency object as a string if its set, otherwise None.

948 """

949 if self.freq is None:

950 return None

951 return self.freq.freqstr

952

953 @property # NB: override with cache_readonly in immutable subclasses

954 def inferred_freq(self) -> str | None:

955 """

956 Tries to return a string representing a frequency generated by infer_freq.

957

958 Returns None if it can't autodetect the frequency.

959 """

960 if self.ndim != 1:

961 return None

962 try:

963 return frequencies.infer_freq(self)

964 except ValueError:

965 return None

966

967 @property # NB: override with cache_readonly in immutable subclasses

968 def _resolution_obj(self) -> Resolution | None:

969 freqstr = self.freqstr

970 if freqstr is None:

971 return None

972 try:

973 return Resolution.get_reso_from_freqstr(freqstr)

974 except KeyError:

975 return None

976

977 @property # NB: override with cache_readonly in immutable subclasses

978 def resolution(self) -> str:

979 """

980 Returns day, hour, minute, second, millisecond or microsecond

981 """

982 # error: Item "None" of "Optional[Any]" has no attribute "attrname"

983 return self._resolution_obj.attrname # type: ignore[union-attr]

984

985 @classmethod

986 def _validate_frequency(cls, index, freq, **kwargs):

987 """

988 Validate that a frequency is compatible with the values of a given

989 Datetime Array/Index or Timedelta Array/Index

990

991 Parameters

992 ----------

993 index : DatetimeIndex or TimedeltaIndex

994 The index on which to determine if the given frequency is valid

995 freq : DateOffset

996 The frequency to validate

997 """

998 # TODO: this is not applicable to PeriodArray, move to correct Mixin

999 inferred = index.inferred_freq

1000 if index.size == 0 or inferred == freq.freqstr:

1001 return None

1002

1003 try:

1004 on_freq = cls._generate_range(

1005 start=index[0], end=None, periods=len(index), freq=freq, **kwargs

1006 )

1007 if not np.array_equal(index.asi8, on_freq.asi8):

1008 raise ValueError

1009 except ValueError as e:

1010 if "non-fixed" in str(e):

1011 # non-fixed frequencies are not meaningful for timedelta64;

1012 # we retain that error message

1013 raise e

1014 # GH#11587 the main way this is reached is if the `np.array_equal`

1015 # check above is False. This can also be reached if index[0]

1016 # is `NaT`, in which case the call to `cls._generate_range` will

1017 # raise a ValueError, which we re-raise with a more targeted

1018 # message.

1019 raise ValueError(

1020 f"Inferred frequency {inferred} from passed values "

1021 f"does not conform to passed frequency {freq.freqstr}"

1022 ) from e

1023

1024 @classmethod

1025 def _generate_range(

1026 cls: type[DatetimeLikeArrayT], start, end, periods, freq, *args, **kwargs

1027 ) -> DatetimeLikeArrayT:

1028 raise AbstractMethodError(cls)

1029

1030 # monotonicity/uniqueness properties are called via frequencies.infer_freq,

1031 # see GH#23789

1032

1033 @property

1034 def _is_monotonic_increasing(self) -> bool:

1035 return algos.is_monotonic(self.asi8, timelike=True)[0]

1036

1037 @property

1038 def _is_monotonic_decreasing(self) -> bool:

1039 return algos.is_monotonic(self.asi8, timelike=True)[1]

1040

1041 @property

1042 def _is_unique(self) -> bool:

1043 return len(unique1d(self.asi8.ravel("K"))) == self.size

1044

1045 # ------------------------------------------------------------------

1046 # Arithmetic Methods

1047

1048 def _cmp_method(self, other, op):

1049 if self.ndim > 1 and getattr(other, "shape", None) == self.shape:

1050 # TODO: handle 2D-like listlikes

1051 return op(self.ravel(), other.ravel()).reshape(self.shape)

1052

1053 try:

1054 other = self._validate_comparison_value(other)

1055 except InvalidComparison:

1056 return invalid_comparison(self, other, op)

1057

1058 dtype = getattr(other, "dtype", None)

1059 if is_object_dtype(dtype):

1060 # We have to use comp_method_OBJECT_ARRAY instead of numpy

1061 # comparison otherwise it would fail to raise when

1062 # comparing tz-aware and tz-naive

1063 with np.errstate(all="ignore"):

1064 result = ops.comp_method_OBJECT_ARRAY(

1065 op, np.asarray(self.astype(object)), other

1066 )

1067 return result

1068

1069 if other is NaT:

1070 if op is operator.ne:

1071 result = np.ones(self.shape, dtype=bool)

1072 else:

1073 result = np.zeros(self.shape, dtype=bool)

1074 return result

1075

1076 if not is_period_dtype(self.dtype):

1077 self = cast(TimelikeOps, self)

1078 if self._reso != other._reso:

1079 if not isinstance(other, type(self)):

1080 # i.e. Timedelta/Timestamp, cast to ndarray and let

1081 # compare_mismatched_resolutions handle broadcasting

1082 other_arr = np.array(other.asm8)

1083 else:

1084 other_arr = other._ndarray

1085 return compare_mismatched_resolutions(self._ndarray, other_arr, op)

1086

1087 other_vals = self._unbox(other)

1088 # GH#37462 comparison on i8 values is almost 2x faster than M8/m8

1089 result = op(self._ndarray.view("i8"), other_vals.view("i8"))

1090

1091 o_mask = isna(other)

1092 mask = self._isnan | o_mask

1093 if mask.any():

1094 nat_result = op is operator.ne

1095 np.putmask(result, mask, nat_result)

1096

1097 return result

1098

1099 # pow is invalid for all three subclasses; TimedeltaArray will override

1100 # the multiplication and division ops

1101 __pow__ = make_invalid_op("__pow__")

1102 __rpow__ = make_invalid_op("__rpow__")

1103 __mul__ = make_invalid_op("__mul__")

1104 __rmul__ = make_invalid_op("__rmul__")

1105 __truediv__ = make_invalid_op("__truediv__")

1106 __rtruediv__ = make_invalid_op("__rtruediv__")

1107 __floordiv__ = make_invalid_op("__floordiv__")

1108 __rfloordiv__ = make_invalid_op("__rfloordiv__")

1109 __mod__ = make_invalid_op("__mod__")

1110 __rmod__ = make_invalid_op("__rmod__")

1111 __divmod__ = make_invalid_op("__divmod__")

1112 __rdivmod__ = make_invalid_op("__rdivmod__")

1113

1114 @final

1115 def _add_datetimelike_scalar(self, other) -> DatetimeArray:

1116 if not is_timedelta64_dtype(self.dtype):

1117 raise TypeError(

1118 f"cannot add {type(self).__name__} and {type(other).__name__}"

1119 )

1120

1121 self = cast("TimedeltaArray", self)

1122

1123 from pandas.core.arrays import DatetimeArray

1124 from pandas.core.arrays.datetimes import tz_to_dtype

1125

1126 assert other is not NaT

1127 other = Timestamp(other)

1128 if other is NaT:

1129 # In this case we specifically interpret NaT as a datetime, not

1130 # the timedelta interpretation we would get by returning self + NaT

1131 result = self._ndarray + NaT.to_datetime64().astype(f"M8[{self._unit}]")

1132 # Preserve our resolution

1133 return DatetimeArray._simple_new(result, dtype=result.dtype)

1134

1135 if self._reso != other._reso:

1136 raise NotImplementedError(

1137 "Addition between TimedeltaArray and Timestamp with mis-matched "

1138 "resolutions is not yet supported."

1139 )

1140

1141 i8 = self.asi8

1142 result = checked_add_with_arr(i8, other.value, arr_mask=self._isnan)

1143 dtype = tz_to_dtype(tz=other.tz, unit=self._unit)

1144 res_values = result.view(f"M8[{self._unit}]")

1145 return DatetimeArray._simple_new(res_values, dtype=dtype, freq=self.freq)

1146

1147 @final

1148 def _add_datetime_arraylike(self, other) -> DatetimeArray:

1149 if not is_timedelta64_dtype(self.dtype):

1150 raise TypeError(

1151 f"cannot add {type(self).__name__} and {type(other).__name__}"

1152 )

1153

1154 # At this point we have already checked that other.dtype is datetime64

1155 other = ensure_wrapped_if_datetimelike(other)

1156 # defer to DatetimeArray.__add__

1157 return other + self

1158

1159 @final

1160 def _sub_datetimelike_scalar(self, other: datetime | np.datetime64):

1161 if self.dtype.kind != "M":

1162 raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")

1163

1164 self = cast("DatetimeArray", self)

1165 # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]

1166

1167 # error: Non-overlapping identity check (left operand type: "Union[datetime,

1168 # datetime64]", right operand type: "NaTType") [comparison-overlap]

1169 assert other is not NaT # type: ignore[comparison-overlap]

1170 other = Timestamp(other)

1171 # error: Non-overlapping identity check (left operand type: "Timestamp",

1172 # right operand type: "NaTType")

1173 if other is NaT: # type: ignore[comparison-overlap]

1174 return self - NaT

1175

1176 try:

1177 self._assert_tzawareness_compat(other)

1178 except TypeError as err:

1179 new_message = str(err).replace("compare", "subtract")

1180 raise type(err)(new_message) from err

1181

1182 i8 = self.asi8

1183 result = checked_add_with_arr(i8, -other.value, arr_mask=self._isnan)

1184 return result.view("timedelta64[ns]")

1185

1186 @final

1187 def _sub_datetime_arraylike(self, other):

1188 if self.dtype.kind != "M":

1189 raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")

1190

1191 if len(self) != len(other):

1192 raise ValueError("cannot add indices of unequal length")

1193

1194 self = cast("DatetimeArray", self)

1195 other = ensure_wrapped_if_datetimelike(other)

1196

1197 try:

1198 self._assert_tzawareness_compat(other)

1199 except TypeError as err:

1200 new_message = str(err).replace("compare", "subtract")

1201 raise type(err)(new_message) from err

1202

1203 self_i8 = self.asi8

1204 other_i8 = other.asi8

1205 new_values = checked_add_with_arr(

1206 self_i8, -other_i8, arr_mask=self._isnan, b_mask=other._isnan

1207 )

1208 return new_values.view("timedelta64[ns]")

1209

1210 @final

1211 def _sub_period(self, other: Period) -> npt.NDArray[np.object_]:

1212 if not is_period_dtype(self.dtype):

1213 raise TypeError(f"cannot subtract Period from a {type(self).__name__}")

1214

1215 # If the operation is well-defined, we return an object-dtype ndarray

1216 # of DateOffsets. Null entries are filled with pd.NaT

1217 self._check_compatible_with(other)

1218 new_i8_data = checked_add_with_arr(

1219 self.asi8, -other.ordinal, arr_mask=self._isnan

1220 )

1221 new_data = np.array([self.freq.base * x for x in new_i8_data])

1222

1223 if self._hasna:

1224 new_data[self._isnan] = NaT

1225

1226 return new_data

1227

1228 @final

1229 def _add_period(self, other: Period) -> PeriodArray:

1230 if not is_timedelta64_dtype(self.dtype):

1231 raise TypeError(f"cannot add Period to a {type(self).__name__}")

1232

1233 # We will wrap in a PeriodArray and defer to the reversed operation

1234 from pandas.core.arrays.period import PeriodArray

1235

1236 i8vals = np.broadcast_to(other.ordinal, self.shape)

1237 parr = PeriodArray(i8vals, freq=other.freq)

1238 return parr + self

1239

1240 def _add_offset(self, offset):

1241 raise AbstractMethodError(self)

1242

1243 def _add_timedeltalike_scalar(self, other):

1244 """

1245 Add a delta of a timedeltalike

1246

1247 Returns

1248 -------

1249 Same type as self

1250 """

1251 if isna(other):

1252 # i.e np.timedelta64("NaT"), not recognized by delta_to_nanoseconds

1253 new_values = np.empty(self.shape, dtype="i8").view(self._ndarray.dtype)

1254 new_values.fill(iNaT)

1255 return type(self)._simple_new(new_values, dtype=self.dtype)

1256

1257 # PeriodArray overrides, so we only get here with DTA/TDA

1258 # error: "DatetimeLikeArrayMixin" has no attribute "_reso"

1259 inc = delta_to_nanoseconds(other, reso=self._reso) # type: ignore[attr-defined]

1260

1261 new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan)

1262 new_values = new_values.view(self._ndarray.dtype)

1263

1264 new_freq = None

1265 if isinstance(self.freq, Tick) or is_period_dtype(self.dtype):

1266 # adding a scalar preserves freq

1267 new_freq = self.freq

1268

1269 # error: Unexpected keyword argument "freq" for "_simple_new" of "NDArrayBacked"

1270 return type(self)._simple_new( # type: ignore[call-arg]

1271 new_values, dtype=self.dtype, freq=new_freq

1272 )

1273

1274 def _add_timedelta_arraylike(

1275 self, other: TimedeltaArray | npt.NDArray[np.timedelta64]

1276 ):

1277 """

1278 Add a delta of a TimedeltaIndex

1279

1280 Returns

1281 -------

1282 Same type as self

1283 """

1284 # overridden by PeriodArray

1285

1286 if len(self) != len(other):

1287 raise ValueError("cannot add indices of unequal length")

1288

1289 other = ensure_wrapped_if_datetimelike(other)

1290 other = cast("TimedeltaArray", other)

1291

1292 self_i8 = self.asi8

1293 other_i8 = other.asi8

1294 new_values = checked_add_with_arr(

1295 self_i8, other_i8, arr_mask=self._isnan, b_mask=other._isnan

1296 )

1297 return type(self)(new_values, dtype=self.dtype)

1298

1299 @final

1300 def _add_nat(self):

1301 """

1302 Add pd.NaT to self

1303 """

1304 if is_period_dtype(self.dtype):

1305 raise TypeError(

1306 f"Cannot add {type(self).__name__} and {type(NaT).__name__}"

1307 )

1308 self = cast("TimedeltaArray | DatetimeArray", self)

1309

1310 # GH#19124 pd.NaT is treated like a timedelta for both timedelta

1311 # and datetime dtypes

1312 result = np.empty(self.shape, dtype=np.int64)

1313 result.fill(iNaT)

1314 result = result.view(self._ndarray.dtype) # preserve reso

1315 return type(self)._simple_new(result, dtype=self.dtype, freq=None)

1316

1317 @final

1318 def _sub_nat(self):

1319 """

1320 Subtract pd.NaT from self

1321 """

1322 # GH#19124 Timedelta - datetime is not in general well-defined.

1323 # We make an exception for pd.NaT, which in this case quacks

1324 # like a timedelta.

1325 # For datetime64 dtypes by convention we treat NaT as a datetime, so

1326 # this subtraction returns a timedelta64 dtype.

1327 # For period dtype, timedelta64 is a close-enough return dtype.

1328 result = np.empty(self.shape, dtype=np.int64)

1329 result.fill(iNaT)

1330 return result.view("timedelta64[ns]")

1331

1332 @final

1333 def _sub_period_array(self, other: PeriodArray) -> npt.NDArray[np.object_]:

1334 if not is_period_dtype(self.dtype):

1335 raise TypeError(

1336 f"cannot subtract {other.dtype}-dtype from {type(self).__name__}"

1337 )

1338

1339 self = cast("PeriodArray", self)

1340 self._require_matching_freq(other)

1341

1342 new_i8_values = checked_add_with_arr(

1343 self.asi8, -other.asi8, arr_mask=self._isnan, b_mask=other._isnan

1344 )

1345

1346 new_values = np.array([self.freq.base * x for x in new_i8_values])

1347 if self._hasna or other._hasna:

1348 mask = self._isnan | other._isnan

1349 new_values[mask] = NaT

1350 return new_values

1351

1352 @final

1353 def _addsub_object_array(self, other: np.ndarray, op):

1354 """

1355 Add or subtract array-like of DateOffset objects

1356

1357 Parameters

1358 ----------

1359 other : np.ndarray[object]

1360 op : {operator.add, operator.sub}

1361

1362 Returns

1363 -------

1364 result : same class as self

1365 """

1366 assert op in [operator.add, operator.sub]

1367 if len(other) == 1 and self.ndim == 1:

1368 # If both 1D then broadcasting is unambiguous

1369 return op(self, other[0])

1370

1371 warnings.warn(

1372 "Adding/subtracting object-dtype array to "

1373 f"{type(self).__name__} not vectorized.",

1374 PerformanceWarning,

1375 stacklevel=find_stack_level(),

1376 )

1377

1378 # Caller is responsible for broadcasting if necessary

1379 assert self.shape == other.shape, (self.shape, other.shape)

1380

1381 with warnings.catch_warnings():

1382 # filter out warnings about Timestamp.freq

1383 warnings.filterwarnings("ignore", category=FutureWarning)

1384 res_values = op(self.astype("O"), np.asarray(other))

1385

1386 result = pd_array(res_values.ravel())

1387 result = extract_array(result, extract_numpy=True).reshape(self.shape)

1388 return result

1389

1390 def _time_shift(

1391 self: DatetimeLikeArrayT, periods: int, freq=None

1392 ) -> DatetimeLikeArrayT:

1393 """

1394 Shift each value by `periods`.

1395

1396 Note this is different from ExtensionArray.shift, which

1397 shifts the *position* of each element, padding the end with

1398 missing values.

1399

1400 Parameters

1401 ----------

1402 periods : int

1403 Number of periods to shift by.

1404 freq : pandas.DateOffset, pandas.Timedelta, or str

1405 Frequency increment to shift by.

1406 """

1407 if freq is not None and freq != self.freq:

1408 if isinstance(freq, str):

1409 freq = to_offset(freq)

1410 offset = periods * freq

1411 return self + offset

1412

1413 if periods == 0 or len(self) == 0:

1414 # GH#14811 empty case

1415 return self.copy()

1416

1417 if self.freq is None:

1418 raise NullFrequencyError("Cannot shift with no freq")

1419

1420 start = self[0] + periods * self.freq

1421 end = self[-1] + periods * self.freq

1422

1423 # Note: in the DatetimeTZ case, _generate_range will infer the

1424 # appropriate timezone from `start` and `end`, so tz does not need

1425 # to be passed explicitly.

1426 return self._generate_range(start=start, end=end, periods=None, freq=self.freq)

1427

1428 @unpack_zerodim_and_defer("__add__")

1429 def __add__(self, other):

1430 other_dtype = getattr(other, "dtype", None)

1431

1432 # scalar others

1433 if other is NaT:

1434 result = self._add_nat()

1435 elif isinstance(other, (Tick, timedelta, np.timedelta64)):

1436 result = self._add_timedeltalike_scalar(other)

1437 elif isinstance(other, BaseOffset):

1438 # specifically _not_ a Tick

1439 result = self._add_offset(other)

1440 elif isinstance(other, (datetime, np.datetime64)):

1441 result = self._add_datetimelike_scalar(other)

1442 elif isinstance(other, Period) and is_timedelta64_dtype(self.dtype):

1443 result = self._add_period(other)

1444 elif lib.is_integer(other):

1445 # This check must come after the check for np.timedelta64

1446 # as is_integer returns True for these

1447 if not is_period_dtype(self.dtype):

1448 raise integer_op_not_supported(self)

1449 result = cast("PeriodArray", self)._addsub_int_array_or_scalar(

1450 other * self.freq.n, operator.add

1451 )

1452

1453 # array-like others

1454 elif is_timedelta64_dtype(other_dtype):

1455 # TimedeltaIndex, ndarray[timedelta64]

1456 result = self._add_timedelta_arraylike(other)

1457 elif is_object_dtype(other_dtype):

1458 # e.g. Array/Index of DateOffset objects

1459 result = self._addsub_object_array(other, operator.add)

1460 elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype):

1461 # DatetimeIndex, ndarray[datetime64]

1462 return self._add_datetime_arraylike(other)

1463 elif is_integer_dtype(other_dtype):

1464 if not is_period_dtype(self.dtype):

1465 raise integer_op_not_supported(self)

1466 result = cast("PeriodArray", self)._addsub_int_array_or_scalar(

1467 other * self.freq.n, operator.add

1468 )

1469 else:

1470 # Includes Categorical, other ExtensionArrays

1471 # For PeriodDtype, if self is a TimedeltaArray and other is a

1472 # PeriodArray with a timedelta-like (i.e. Tick) freq, this

1473 # operation is valid. Defer to the PeriodArray implementation.

1474 # In remaining cases, this will end up raising TypeError.

1475 return NotImplemented

1476

1477 if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype):

1478 from pandas.core.arrays import TimedeltaArray

1479

1480 return TimedeltaArray(result)

1481 return result

1482

1483 def __radd__(self, other):

1484 # alias for __add__

1485 return self.__add__(other)

1486

1487 @unpack_zerodim_and_defer("__sub__")

1488 def __sub__(self, other):

1489

1490 other_dtype = getattr(other, "dtype", None)

1491

1492 # scalar others

1493 if other is NaT:

1494 result = self._sub_nat()

1495 elif isinstance(other, (Tick, timedelta, np.timedelta64)):

1496 result = self._add_timedeltalike_scalar(-other)

1497 elif isinstance(other, BaseOffset):

1498 # specifically _not_ a Tick

1499 result = self._add_offset(-other)

1500 elif isinstance(other, (datetime, np.datetime64)):

1501 result = self._sub_datetimelike_scalar(other)

1502 elif lib.is_integer(other):

1503 # This check must come after the check for np.timedelta64

1504 # as is_integer returns True for these

1505 if not is_period_dtype(self.dtype):

1506 raise integer_op_not_supported(self)

1507 result = cast("PeriodArray", self)._addsub_int_array_or_scalar(

1508 other * self.freq.n, operator.sub

1509 )

1510

1511 elif isinstance(other, Period):

1512 result = self._sub_period(other)

1513

1514 # array-like others

1515 elif is_timedelta64_dtype(other_dtype):

1516 # TimedeltaIndex, ndarray[timedelta64]

1517 result = self._add_timedelta_arraylike(-other)

1518 elif is_object_dtype(other_dtype):

1519 # e.g. Array/Index of DateOffset objects

1520 result = self._addsub_object_array(other, operator.sub)

1521 elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype):

1522 # DatetimeIndex, ndarray[datetime64]

1523 result = self._sub_datetime_arraylike(other)

1524 elif is_period_dtype(other_dtype):

1525 # PeriodIndex

1526 result = self._sub_period_array(other)

1527 elif is_integer_dtype(other_dtype):

1528 if not is_period_dtype(self.dtype):

1529 raise integer_op_not_supported(self)

1530 result = cast("PeriodArray", self)._addsub_int_array_or_scalar(

1531 other * self.freq.n, operator.sub

1532 )

1533 else:

1534 # Includes ExtensionArrays, float_dtype

1535 return NotImplemented

1536

1537 if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype):

1538 from pandas.core.arrays import TimedeltaArray

1539

1540 return TimedeltaArray(result)

1541 return result

1542

1543 def __rsub__(self, other):

1544 other_dtype = getattr(other, "dtype", None)

1545

1546 if is_datetime64_any_dtype(other_dtype) and is_timedelta64_dtype(self.dtype):

1547 # ndarray[datetime64] cannot be subtracted from self, so

1548 # we need to wrap in DatetimeArray/Index and flip the operation

1549 if lib.is_scalar(other):

1550 # i.e. np.datetime64 object

1551 return Timestamp(other) - self

1552 if not isinstance(other, DatetimeLikeArrayMixin):

1553 # Avoid down-casting DatetimeIndex

1554 from pandas.core.arrays import DatetimeArray

1555

1556 other = DatetimeArray(other)

1557 return other - self

1558 elif (

1559 is_datetime64_any_dtype(self.dtype)

1560 and hasattr(other, "dtype")

1561 and not is_datetime64_any_dtype(other.dtype)

1562 ):

1563 # GH#19959 datetime - datetime is well-defined as timedelta,

1564 # but any other type - datetime is not well-defined.

1565 raise TypeError(

1566 f"cannot subtract {type(self).__name__} from {type(other).__name__}"

1567 )

1568 elif is_period_dtype(self.dtype) and is_timedelta64_dtype(other_dtype):

1569 # TODO: Can we simplify/generalize these cases at all?

1570 raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}")

1571 elif is_timedelta64_dtype(self.dtype):

1572 self = cast("TimedeltaArray", self)

1573 return (-self) + other

1574

1575 # We get here with e.g. datetime objects

1576 return -(self - other)

1577

1578 def __iadd__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT:

1579 result = self + other

1580 self[:] = result[:]

1581

1582 if not is_period_dtype(self.dtype):

1583 # restore freq, which is invalidated by setitem

1584 self._freq = result.freq

1585 return self

1586

1587 def __isub__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT:

1588 result = self - other

1589 self[:] = result[:]

1590

1591 if not is_period_dtype(self.dtype):

1592 # restore freq, which is invalidated by setitem

1593 self._freq = result.freq

1594 return self

1595

1596 # --------------------------------------------------------------

1597 # Reductions

1598

1599 def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs):

1600 """

1601 Return the minimum value of the Array or minimum along

1602 an axis.

1603

1604 See Also

1605 --------

1606 numpy.ndarray.min

1607 Index.min : Return the minimum value in an Index.

1608 Series.min : Return the minimum value in a Series.

1609 """

1610 nv.validate_min((), kwargs)

1611 nv.validate_minmax_axis(axis, self.ndim)

1612

1613 if is_period_dtype(self.dtype):

1614 # pass datetime64 values to nanops to get correct NaT semantics

1615 result = nanops.nanmin(

1616 self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna

1617 )

1618 if result is NaT:

1619 return NaT

1620 result = result.view("i8")

1621 if axis is None or self.ndim == 1:

1622 return self._box_func(result)

1623 return self._from_backing_data(result)

1624

1625 result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)

1626 return self._wrap_reduction_result(axis, result)

1627

1628 def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs):

1629 """

1630 Return the maximum value of the Array or maximum along

1631 an axis.

1632

1633 See Also

1634 --------

1635 numpy.ndarray.max

1636 Index.max : Return the maximum value in an Index.

1637 Series.max : Return the maximum value in a Series.

1638 """

1639 nv.validate_max((), kwargs)

1640 nv.validate_minmax_axis(axis, self.ndim)

1641

1642 if is_period_dtype(self.dtype):

1643 # pass datetime64 values to nanops to get correct NaT semantics

1644 result = nanops.nanmax(

1645 self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna

1646 )

1647 if result is NaT:

1648 return result

1649 result = result.view("i8")

1650 if axis is None or self.ndim == 1:

1651 return self._box_func(result)

1652 return self._from_backing_data(result)

1653

1654 result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)

1655 return self._wrap_reduction_result(axis, result)

1656

1657 def mean(self, *, skipna: bool = True, axis: int | None = 0):

1658 """

1659 Return the mean value of the Array.

1660

1661 .. versionadded:: 0.25.0

1662

1663 Parameters

1664 ----------

1665 skipna : bool, default True

1666 Whether to ignore any NaT elements.

1667 axis : int, optional, default 0

1668

1669 Returns

1670 -------

1671 scalar

1672 Timestamp or Timedelta.

1673

1674 See Also

1675 --------

1676 numpy.ndarray.mean : Returns the average of array elements along a given axis.

1677 Series.mean : Return the mean value in a Series.

1678

1679 Notes

1680 -----

1681 mean is only defined for Datetime and Timedelta dtypes, not for Period.

1682 """

1683 if is_period_dtype(self.dtype):

1684 # See discussion in GH#24757

1685 raise TypeError(

1686 f"mean is not implemented for {type(self).__name__} since the "

1687 "meaning is ambiguous. An alternative is "

1688 "obj.to_timestamp(how='start').mean()"

1689 )

1690

1691 result = nanops.nanmean(

1692 self._ndarray, axis=axis, skipna=skipna, mask=self.isna()

1693 )

1694 return self._wrap_reduction_result(axis, result)

1695

1696 def median(self, *, axis: int | None = None, skipna: bool = True, **kwargs):

1697 nv.validate_median((), kwargs)

1698

1699 if axis is not None and abs(axis) >= self.ndim:

1700 raise ValueError("abs(axis) must be less than ndim")

1701

1702 if is_period_dtype(self.dtype):

1703 # pass datetime64 values to nanops to get correct NaT semantics

1704 result = nanops.nanmedian(

1705 self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna

1706 )

1707 result = result.view("i8")

1708 if axis is None or self.ndim == 1:

1709 return self._box_func(result)

1710 return self._from_backing_data(result)

1711

1712 result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)

1713 return self._wrap_reduction_result(axis, result)

1714

1715 def _mode(self, dropna: bool = True):

1716 mask = None

1717 if dropna:

1718 mask = self.isna()

1719

1720 i8modes = mode(self.view("i8"), mask=mask)

1721 npmodes = i8modes.view(self._ndarray.dtype)

1722 npmodes = cast(np.ndarray, npmodes)

1723 return self._from_backing_data(npmodes)

1724

1725

1726class DatelikeOps(DatetimeLikeArrayMixin):

1727 """

1728 Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.

1729 """

1730

1731 @Substitution(

1732 URL="https://docs.python.org/3/library/datetime.html"

1733 "#strftime-and-strptime-behavior"

1734 )

1735 def strftime(self, date_format: str) -> npt.NDArray[np.object_]:

1736 """

1737 Convert to Index using specified date_format.

1738

1739 Return an Index of formatted strings specified by date_format, which

1740 supports the same string format as the python standard library. Details

1741 of the string format can be found in `python string format

1742 doc <%(URL)s>`__.

1743

1744 Formats supported by the C `strftime` API but not by the python string format

1745 doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be

1746 preferably replaced with their supported equivalents (such as `"%%H:%%M"`,

1747 `"%%I:%%M:%%S %%p"`).

1748

1749 Note that `PeriodIndex` support additional directives, detailed in

1750 `Period.strftime`.

1751

1752 Parameters

1753 ----------

1754 date_format : str

1755 Date format string (e.g. "%%Y-%%m-%%d").

1756

1757 Returns

1758 -------

1759 ndarray[object]

1760 NumPy ndarray of formatted strings.

1761

1762 See Also

1763 --------

1764 to_datetime : Convert the given argument to datetime.

1765 DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.

1766 DatetimeIndex.round : Round the DatetimeIndex to the specified freq.

1767 DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.

1768 Timestamp.strftime : Format a single Timestamp.

1769 Period.strftime : Format a single Period.

1770

1771 Examples

1772 --------

1773 >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),

1774 ... periods=3, freq='s')

1775 >>> rng.strftime('%%B %%d, %%Y, %%r')

1776 Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',

1777 'March 10, 2018, 09:00:02 AM'],

1778 dtype='object')

1779 """

1780 result = self._format_native_types(date_format=date_format, na_rep=np.nan)

1781 return result.astype(object, copy=False)

1782

1783

1784_round_doc = """

1785 Perform {op} operation on the data to the specified `freq`.

1786

1787 Parameters

1788 ----------

1789 freq : str or Offset

1790 The frequency level to {op} the index to. Must be a fixed

1791 frequency like 'S' (second) not 'ME' (month end). See

1792 :ref:`frequency aliases <timeseries.offset_aliases>` for

1793 a list of possible `freq` values.

1794 ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'

1795 Only relevant for DatetimeIndex:

1796

1797 - 'infer' will attempt to infer fall dst-transition hours based on

1798 order

1799 - bool-ndarray where True signifies a DST time, False designates

1800 a non-DST time (note that this flag is only applicable for

1801 ambiguous times)

1802 - 'NaT' will return NaT where there are ambiguous times

1803 - 'raise' will raise an AmbiguousTimeError if there are ambiguous

1804 times.

1805

1806 nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise'

1807 A nonexistent time does not exist in a particular timezone

1808 where clocks moved forward due to DST.

1809

1810 - 'shift_forward' will shift the nonexistent time forward to the

1811 closest existing time

1812 - 'shift_backward' will shift the nonexistent time backward to the

1813 closest existing time

1814 - 'NaT' will return NaT where there are nonexistent times

1815 - timedelta objects will shift nonexistent times by the timedelta

1816 - 'raise' will raise an NonExistentTimeError if there are

1817 nonexistent times.

1818

1819 Returns

1820 -------

1821 DatetimeIndex, TimedeltaIndex, or Series

1822 Index of the same type for a DatetimeIndex or TimedeltaIndex,

1823 or a Series with the same index for a Series.

1824

1825 Raises

1826 ------

1827 ValueError if the `freq` cannot be converted.

1828

1829 Notes

1830 -----

1831 If the timestamps have a timezone, {op}ing will take place relative to the

1832 local ("wall") time and re-localized to the same timezone. When {op}ing

1833 near daylight savings time, use ``nonexistent`` and ``ambiguous`` to

1834 control the re-localization behavior.

1835

1836 Examples

1837 --------

1838 **DatetimeIndex**

1839

1840 >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')

1841 >>> rng

1842 DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',

1843 '2018-01-01 12:01:00'],

1844 dtype='datetime64[ns]', freq='T')

1845 """

1846

1847_round_example = """>>> rng.round('H')

1848 DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',

1849 '2018-01-01 12:00:00'],

1850 dtype='datetime64[ns]', freq=None)

1851

1852 **Series**

1853

1854 >>> pd.Series(rng).dt.round("H")

1855 0 2018-01-01 12:00:00

1856 1 2018-01-01 12:00:00

1857 2 2018-01-01 12:00:00

1858 dtype: datetime64[ns]

1859

1860 When rounding near a daylight savings time transition, use ``ambiguous`` or

1861 ``nonexistent`` to control how the timestamp should be re-localized.

1862

1863 >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")

1864

1865 >>> rng_tz.floor("2H", ambiguous=False)

1866 DatetimeIndex(['2021-10-31 02:00:00+01:00'],

1867 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)

1868

1869 >>> rng_tz.floor("2H", ambiguous=True)

1870 DatetimeIndex(['2021-10-31 02:00:00+02:00'],

1871 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)

1872 """

1873

1874_floor_example = """>>> rng.floor('H')

1875 DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',

1876 '2018-01-01 12:00:00'],

1877 dtype='datetime64[ns]', freq=None)

1878

1879 **Series**

1880

1881 >>> pd.Series(rng).dt.floor("H")

1882 0 2018-01-01 11:00:00

1883 1 2018-01-01 12:00:00

1884 2 2018-01-01 12:00:00

1885 dtype: datetime64[ns]

1886

1887 When rounding near a daylight savings time transition, use ``ambiguous`` or

1888 ``nonexistent`` to control how the timestamp should be re-localized.

1889

1890 >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")

1891

1892 >>> rng_tz.floor("2H", ambiguous=False)

1893 DatetimeIndex(['2021-10-31 02:00:00+01:00'],

1894 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)

1895

1896 >>> rng_tz.floor("2H", ambiguous=True)

1897 DatetimeIndex(['2021-10-31 02:00:00+02:00'],

1898 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)

1899 """

1900

1901_ceil_example = """>>> rng.ceil('H')

1902 DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',

1903 '2018-01-01 13:00:00'],

1904 dtype='datetime64[ns]', freq=None)

1905

1906 **Series**

1907

1908 >>> pd.Series(rng).dt.ceil("H")

1909 0 2018-01-01 12:00:00

1910 1 2018-01-01 12:00:00

1911 2 2018-01-01 13:00:00

1912 dtype: datetime64[ns]

1913

1914 When rounding near a daylight savings time transition, use ``ambiguous`` or

1915 ``nonexistent`` to control how the timestamp should be re-localized.

1916

1917 >>> rng_tz = pd.DatetimeIndex(["2021-10-31 01:30:00"], tz="Europe/Amsterdam")

1918

1919 >>> rng_tz.ceil("H", ambiguous=False)

1920 DatetimeIndex(['2021-10-31 02:00:00+01:00'],

1921 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)

1922

1923 >>> rng_tz.ceil("H", ambiguous=True)

1924 DatetimeIndex(['2021-10-31 02:00:00+02:00'],

1925 dtype='datetime64[ns, Europe/Amsterdam]', freq=None)

1926 """

1927

1928

1929TimelikeOpsT = TypeVar("TimelikeOpsT", bound="TimelikeOps")

1930

1931

1932class TimelikeOps(DatetimeLikeArrayMixin):

1933 """

1934 Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.

1935 """

1936

1937 _default_dtype: np.dtype

1938

1939 def __init__(self, values, dtype=None, freq=lib.no_default, copy: bool = False):

1940 values = extract_array(values, extract_numpy=True)

1941 if isinstance(values, IntegerArray):

1942 values = values.to_numpy("int64", na_value=iNaT)

1943

1944 inferred_freq = getattr(values, "_freq", None)

1945 explicit_none = freq is None

1946 freq = freq if freq is not lib.no_default else None

1947

1948 if isinstance(values, type(self)):

1949 if explicit_none:

1950 # don't inherit from values

1951 pass

1952 elif freq is None:

1953 freq = values.freq

1954 elif freq and values.freq:

1955 freq = to_offset(freq)

1956 freq, _ = validate_inferred_freq(freq, values.freq, False)

1957

1958 if dtype is not None:

1959 dtype = pandas_dtype(dtype)

1960 if not is_dtype_equal(dtype, values.dtype):

1961 # TODO: we only have tests for this for DTA, not TDA (2022-07-01)

1962 raise TypeError(

1963 f"dtype={dtype} does not match data dtype {values.dtype}"

1964 )

1965

1966 dtype = values.dtype

1967 values = values._ndarray

1968

1969 elif dtype is None:

1970 dtype = self._default_dtype

1971

1972 if not isinstance(values, np.ndarray):

1973 raise ValueError(

1974 f"Unexpected type '{type(values).__name__}'. 'values' must be a "

1975 f"{type(self).__name__}, ndarray, or Series or Index "

1976 "containing one of those."

1977 )

1978 if values.ndim not in [1, 2]:

1979 raise ValueError("Only 1-dimensional input arrays are supported.")

1980

1981 if values.dtype == "i8":

1982 # for compat with datetime/timedelta/period shared methods,

1983 # we can sometimes get here with int64 values. These represent

1984 # nanosecond UTC (or tz-naive) unix timestamps

1985 values = values.view(self._default_dtype)

1986

1987 dtype = self._validate_dtype(values, dtype)

1988

1989 if freq == "infer":

1990 raise ValueError(

1991 f"Frequency inference not allowed in {type(self).__name__}.__init__. "

1992 "Use 'pd.array()' instead."

1993 )

1994

1995 if copy:

1996 values = values.copy()

1997 if freq:

1998 freq = to_offset(freq)

1999

2000 NDArrayBacked.__init__(self, values=values, dtype=dtype)

2001 self._freq = freq

2002

2003 if inferred_freq is None and freq is not None:

2004 type(self)._validate_frequency(self, freq)

2005

2006 @classmethod

2007 def _validate_dtype(cls, values, dtype):

2008 raise AbstractMethodError(cls)

2009

2010 # --------------------------------------------------------------

2011

2012 @cache_readonly

2013 def _reso(self) -> int:

2014 return get_unit_from_dtype(self._ndarray.dtype)

2015

2016 @cache_readonly

2017 def _unit(self) -> str:

2018 # e.g. "ns", "us", "ms"

2019 # error: Argument 1 to "dtype_to_unit" has incompatible type

2020 # "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"

2021 return dtype_to_unit(self.dtype) # type: ignore[arg-type]

2022

2023 # --------------------------------------------------------------

2024

2025 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

2026 if (

2027 ufunc in [np.isnan, np.isinf, np.isfinite]

2028 and len(inputs) == 1

2029 and inputs[0] is self

2030 ):

2031 # numpy 1.18 changed isinf and isnan to not raise on dt64/td64

2032 return getattr(ufunc, method)(self._ndarray, **kwargs)

2033

2034 return super().__array_ufunc__(ufunc, method, *inputs, **kwargs)

2035

2036 def _round(self, freq, mode, ambiguous, nonexistent):

2037 # round the local times

2038 if is_datetime64tz_dtype(self.dtype):

2039 # operate on naive timestamps, then convert back to aware

2040 self = cast("DatetimeArray", self)

2041 naive = self.tz_localize(None)

2042 result = naive._round(freq, mode, ambiguous, nonexistent)

2043 return result.tz_localize(

2044 self.tz, ambiguous=ambiguous, nonexistent=nonexistent

2045 )

2046

2047 values = self.view("i8")

2048 values = cast(np.ndarray, values)

2049 nanos = to_offset(freq).nanos # raises on non-fixed frequencies

2050 nanos = delta_to_nanoseconds(to_offset(freq), self._reso)

2051 result_i8 = round_nsint64(values, mode, nanos)

2052 result = self._maybe_mask_results(result_i8, fill_value=iNaT)

2053 result = result.view(self._ndarray.dtype)

2054 return self._simple_new(result, dtype=self.dtype)

2055

2056 @Appender((_round_doc + _round_example).format(op="round"))

2057 def round(self, freq, ambiguous="raise", nonexistent="raise"):

2058 return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent)

2059

2060 @Appender((_round_doc + _floor_example).format(op="floor"))

2061 def floor(self, freq, ambiguous="raise", nonexistent="raise"):

2062 return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)

2063

2064 @Appender((_round_doc + _ceil_example).format(op="ceil"))

2065 def ceil(self, freq, ambiguous="raise", nonexistent="raise"):

2066 return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)

2067

2068 # --------------------------------------------------------------

2069 # Reductions

2070

2071 def any(self, *, axis: int | None = None, skipna: bool = True) -> bool:

2072 # GH#34479 discussion of desired behavior long-term

2073 return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())

2074

2075 def all(self, *, axis: int | None = None, skipna: bool = True) -> bool:

2076 # GH#34479 discussion of desired behavior long-term

2077 return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())

2078

2079 # --------------------------------------------------------------

2080 # Frequency Methods

2081

2082 def _maybe_clear_freq(self) -> None:

2083 self._freq = None

2084

2085 def _with_freq(self, freq):

2086 """

2087 Helper to get a view on the same data, with a new freq.

2088

2089 Parameters

2090 ----------

2091 freq : DateOffset, None, or "infer"

2092

2093 Returns

2094 -------

2095 Same type as self

2096 """

2097 # GH#29843

2098 if freq is None:

2099 # Always valid

2100 pass

2101 elif len(self) == 0 and isinstance(freq, BaseOffset):

2102 # Always valid. In the TimedeltaArray case, we assume this

2103 # is a Tick offset.

2104 pass

2105 else:

2106 # As an internal method, we can ensure this assertion always holds

2107 assert freq == "infer"

2108 freq = to_offset(self.inferred_freq)

2109

2110 arr = self.view()

2111 arr._freq = freq

2112 return arr

2113

2114 # --------------------------------------------------------------

2115

2116 # GH#46910 - Keep old signature to test we don't break things for EA library authors

2117 def factorize( # type:ignore[override]

2118 self,

2119 na_sentinel: int = -1,

2120 sort: bool = False,

2121 ):

2122 if self.freq is not None:

2123 # We must be unique, so can short-circuit (and retain freq)

2124 codes = np.arange(len(self), dtype=np.intp)

2125 uniques = self.copy() # TODO: copy or view?

2126 if sort and self.freq.n < 0:

2127 codes = codes[::-1]

2128 uniques = uniques[::-1]

2129 return codes, uniques

2130 # FIXME: shouldn't get here; we are ignoring sort

2131 return super().factorize(na_sentinel=na_sentinel)

2132

2133

2134# -------------------------------------------------------------------

2135# Shared Constructor Helpers

2136

2137

2138def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str):

2139 if not hasattr(data, "dtype"):

2140 # e.g. list, tuple

2141 if np.ndim(data) == 0:

2142 # i.e. generator

2143 data = list(data)

2144 data = np.asarray(data)

2145 copy = False

2146 elif isinstance(data, ABCMultiIndex):

2147 raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.")

2148 else:

2149 data = extract_array(data, extract_numpy=True)

2150

2151 if isinstance(data, IntegerArray):

2152 data = data.to_numpy("int64", na_value=iNaT)

2153 copy = False

2154 elif not isinstance(data, (np.ndarray, ExtensionArray)):

2155 # GH#24539 e.g. xarray, dask object

2156 data = np.asarray(data)

2157

2158 elif isinstance(data, ABCCategorical):

2159 # GH#18664 preserve tz in going DTI->Categorical->DTI

2160 # TODO: cases where we need to do another pass through maybe_convert_dtype,

2161 # e.g. the categories are timedelta64s

2162 data = data.categories.take(data.codes, fill_value=NaT)._values

2163 copy = False

2164

2165 return data, copy

2166

2167

2168@overload

2169def validate_periods(periods: None) -> None:

2170 ...

2171

2172

2173@overload

2174def validate_periods(periods: float) -> int:

2175 ...

2176

2177

2178def validate_periods(periods: float | None) -> int | None:

2179 """

2180 If a `periods` argument is passed to the Datetime/Timedelta Array/Index

2181 constructor, cast it to an integer.

2182

2183 Parameters

2184 ----------

2185 periods : None, float, int

2186

2187 Returns

2188 -------

2189 periods : None or int

2190

2191 Raises

2192 ------

2193 TypeError

2194 if periods is None, float, or int

2195 """

2196 if periods is not None:

2197 if lib.is_float(periods):

2198 periods = int(periods)

2199 elif not lib.is_integer(periods):

2200 raise TypeError(f"periods must be a number, got {periods}")

2201 # error: Incompatible return value type (got "Optional[float]",

2202 # expected "Optional[int]")

2203 return periods # type: ignore[return-value]

2204

2205

2206def validate_inferred_freq(

2207 freq, inferred_freq, freq_infer

2208) -> tuple[BaseOffset | None, bool]:

2209 """

2210 If the user passes a freq and another freq is inferred from passed data,

2211 require that they match.

2212

2213 Parameters

2214 ----------

2215 freq : DateOffset or None

2216 inferred_freq : DateOffset or None

2217 freq_infer : bool

2218

2219 Returns

2220 -------

2221 freq : DateOffset or None

2222 freq_infer : bool

2223

2224 Notes

2225 -----

2226 We assume at this point that `maybe_infer_freq` has been called, so

2227 `freq` is either a DateOffset object or None.

2228 """

2229 if inferred_freq is not None:

2230 if freq is not None and freq != inferred_freq:

2231 raise ValueError(

2232 f"Inferred frequency {inferred_freq} from passed "

2233 "values does not conform to passed frequency "

2234 f"{freq.freqstr}"

2235 )

2236 elif freq is None:

2237 freq = inferred_freq

2238 freq_infer = False

2239

2240 return freq, freq_infer

2241

2242

2243def maybe_infer_freq(freq):

2244 """

2245 Comparing a DateOffset to the string "infer" raises, so we need to

2246 be careful about comparisons. Make a dummy variable `freq_infer` to

2247 signify the case where the given freq is "infer" and set freq to None

2248 to avoid comparison trouble later on.

2249

2250 Parameters

2251 ----------

2252 freq : {DateOffset, None, str}

2253

2254 Returns

2255 -------

2256 freq : {DateOffset, None}

2257 freq_infer : bool

2258 Whether we should inherit the freq of passed data.

2259 """

2260 freq_infer = False

2261 if not isinstance(freq, BaseOffset):

2262 # if a passed freq is None, don't infer automatically

2263 if freq != "infer":

2264 freq = to_offset(freq)

2265 else:

2266 freq_infer = True

2267 freq = None

2268 return freq, freq_infer

2269

2270

2271def dtype_to_unit(dtype: DatetimeTZDtype | np.dtype) -> str:

2272 """

2273 Return the unit str corresponding to the dtype's resolution.

2274

2275 Parameters

2276 ----------

2277 dtype : DatetimeTZDtype or np.dtype

2278 If np.dtype, we assume it is a datetime64 dtype.

2279

2280 Returns

2281 -------

2282 str

2283 """

2284 if isinstance(dtype, DatetimeTZDtype):

2285 return dtype.unit

2286 return np.datetime_data(dtype)[0]