Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/datetimelike.py: 21%

1"""

2Base and utility classes for tseries type pandas objects.

3"""

4from __future__ import annotations

6from datetime import datetime

7from typing import (

8 TYPE_CHECKING,

9 Any,

10 Callable,

11 Sequence,

12 TypeVar,

13 cast,

14 final,

15)

16import warnings

18import numpy as np

20from pandas._libs import (

21 NaT,

22 Timedelta,

23 lib,

24)

25from pandas._libs.tslibs import (

26 BaseOffset,

27 Resolution,

28 Tick,

29 parsing,

30 to_offset,

31)

32from pandas.compat.numpy import function as nv

33from pandas.util._decorators import (

34 Appender,

35 cache_readonly,

36 doc,

37)

38from pandas.util._exceptions import find_stack_level

40from pandas.core.dtypes.common import (

41 is_categorical_dtype,

42 is_dtype_equal,

43 is_integer,

44 is_list_like,

45)

46from pandas.core.dtypes.concat import concat_compat

48from pandas.core.arrays import (

49 DatetimeArray,

50 ExtensionArray,

51 PeriodArray,

52 TimedeltaArray,

53)

54from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin

55import pandas.core.common as com

56import pandas.core.indexes.base as ibase

57from pandas.core.indexes.base import (

58 Index,

59 _index_shared_docs,

60)

61from pandas.core.indexes.extension import (

62 NDArrayBackedExtensionIndex,

63 inherit_names,

64)

65from pandas.core.indexes.range import RangeIndex

66from pandas.core.tools.timedeltas import to_timedelta

68if TYPE_CHECKING: 68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true

69 from pandas import CategoricalIndex

71_index_doc_kwargs = dict(ibase._index_doc_kwargs)

73_T = TypeVar("_T", bound="DatetimeIndexOpsMixin")

74_TDT = TypeVar("_TDT", bound="DatetimeTimedeltaMixin")

77@inherit_names(

78 ["inferred_freq", "_resolution_obj", "resolution"],

79 DatetimeLikeArrayMixin,

80 cache=True,

81)

82@inherit_names(["mean", "asi8", "freq", "freqstr"], DatetimeLikeArrayMixin)

83class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):

84 """

85 Common ops mixin to support a unified interface datetimelike Index.

86 """

88 _is_numeric_dtype = False

89 _can_hold_strings = False

90 _data: DatetimeArray | TimedeltaArray | PeriodArray

91 freq: BaseOffset | None

92 freqstr: str | None

93 _resolution_obj: Resolution

95 # ------------------------------------------------------------------------

97 @cache_readonly

98 def hasnans(self) -> bool:

99 return self._data._hasna

100

101 def equals(self, other: Any) -> bool:

102 """

103 Determines if two Index objects contain the same elements.

104 """

105 if self.is_(other):

106 return True

107

108 if not isinstance(other, Index):

109 return False

110 elif other.dtype.kind in ["f", "i", "u", "c"]:

111 return False

112 elif not isinstance(other, type(self)):

113 should_try = False

114 inferable = self._data._infer_matches

115 if other.dtype == object:

116 should_try = other.inferred_type in inferable

117 elif is_categorical_dtype(other.dtype):

118 other = cast("CategoricalIndex", other)

119 should_try = other.categories.inferred_type in inferable

120

121 if should_try:

122 try:

123 other = type(self)(other)

124 except (ValueError, TypeError, OverflowError):

125 # e.g.

126 # ValueError -> cannot parse str entry, or OutOfBoundsDatetime

127 # TypeError -> trying to convert IntervalIndex to DatetimeIndex

128 # OverflowError -> Index([very_large_timedeltas])

129 return False

130

131 if not is_dtype_equal(self.dtype, other.dtype):

132 # have different timezone

133 return False

134

135 return np.array_equal(self.asi8, other.asi8)

136

137 @Appender(Index.__contains__.__doc__)

138 def __contains__(self, key: Any) -> bool:

139 hash(key)

140 try:

141 self.get_loc(key)

142 except (KeyError, TypeError, ValueError):

143 return False

144 return True

145

146 def _convert_tolerance(self, tolerance, target):

147 tolerance = np.asarray(to_timedelta(tolerance).to_numpy())

148 return super()._convert_tolerance(tolerance, target)

149

150 # --------------------------------------------------------------------

151 # Rendering Methods

152

153 def format(

154 self,

155 name: bool = False,

156 formatter: Callable | None = None,

157 na_rep: str = "NaT",

158 date_format: str | None = None,

159 ) -> list[str]:

160 """

161 Render a string representation of the Index.

162 """

163 header = []

164 if name:

165 header.append(

166 ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))

167 if self.name is not None

168 else ""

169 )

170

171 if formatter is not None:

172 return header + list(self.map(formatter))

173

174 return self._format_with_header(header, na_rep=na_rep, date_format=date_format)

175

176 def _format_with_header(

177 self, header: list[str], na_rep: str = "NaT", date_format: str | None = None

178 ) -> list[str]:

179 # matches base class except for whitespace padding and date_format

180 return header + list(

181 self._format_native_types(na_rep=na_rep, date_format=date_format)

182 )

183

184 @property

185 def _formatter_func(self):

186 return self._data._formatter()

187

188 def _format_attrs(self):

189 """

190 Return a list of tuples of the (attr,formatted_value).

191 """

192 attrs = super()._format_attrs()

193 for attrib in self._attributes:

194 # iterating over _attributes prevents us from doing this for PeriodIndex

195 if attrib == "freq":

196 freq = self.freqstr

197 if freq is not None:

198 freq = repr(freq) # e.g. D -> 'D'

199 attrs.append(("freq", freq))

200 return attrs

201

202 @Appender(Index._summary.__doc__)

203 def _summary(self, name=None) -> str:

204 result = super()._summary(name=name)

205 if self.freq:

206 result += f"\nFreq: {self.freqstr}"

207

208 return result

209

210 # --------------------------------------------------------------------

211 # Indexing Methods

212

213 @final

214 def _can_partial_date_slice(self, reso: Resolution) -> bool:

215 # e.g. test_getitem_setitem_periodindex

216 # History of conversation GH#3452, GH#3931, GH#2369, GH#14826

217 return reso > self._resolution_obj

218 # NB: for DTI/PI, not TDI

219

220 def _parsed_string_to_bounds(self, reso: Resolution, parsed):

221 raise NotImplementedError

222

223 def _parse_with_reso(self, label: str):

224 # overridden by TimedeltaIndex

225 try:

226 if self.freq is None or hasattr(self.freq, "rule_code"):

227 freq = self.freq

228 except NotImplementedError:

229 freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))

230 parsed, reso_str = parsing.parse_time_string(label, freq)

231 reso = Resolution.from_attrname(reso_str)

232 return parsed, reso

233

234 def _get_string_slice(self, key: str):

235 # overridden by TimedeltaIndex

236 parsed, reso = self._parse_with_reso(key)

237 try:

238 return self._partial_date_slice(reso, parsed)

239 except KeyError as err:

240 raise KeyError(key) from err

241

242 @final

243 def _partial_date_slice(

244 self,

245 reso: Resolution,

246 parsed: datetime,

247 ):

248 """

249 Parameters

250 ----------

251 reso : Resolution

252 parsed : datetime

253

254 Returns

255 -------

256 slice or ndarray[intp]

257 """

258 if not self._can_partial_date_slice(reso):

259 raise ValueError

260

261 t1, t2 = self._parsed_string_to_bounds(reso, parsed)

262 vals = self._data._ndarray

263 unbox = self._data._unbox

264

265 if self.is_monotonic_increasing:

266

267 if len(self) and (

268 (t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1])

269 ):

270 # we are out of range

271 raise KeyError

272

273 # TODO: does this depend on being monotonic _increasing_?

274

275 # a monotonic (sorted) series can be sliced

276 left = vals.searchsorted(unbox(t1), side="left")

277 right = vals.searchsorted(unbox(t2), side="right")

278 return slice(left, right)

279

280 else:

281 lhs_mask = vals >= unbox(t1)

282 rhs_mask = vals <= unbox(t2)

283

284 # try to find the dates

285 return (lhs_mask & rhs_mask).nonzero()[0]

286

287 def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):

288 """

289 If label is a string, cast it to scalar type according to resolution.

290

291 Parameters

292 ----------

293 label : object

294 side : {'left', 'right'}

295 kind : {'loc', 'getitem'} or None

296

297 Returns

298 -------

299 label : object

300

301 Notes

302 -----

303 Value of `side` parameter should be validated in caller.

304 """

305 assert kind in ["loc", "getitem", None, lib.no_default]

306 self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound")

307

308 if isinstance(label, str):

309 try:

310 parsed, reso = self._parse_with_reso(label)

311 except ValueError as err:

312 # DTI -> parsing.DateParseError

313 # TDI -> 'unit abbreviation w/o a number'

314 # PI -> string cannot be parsed as datetime-like

315 raise self._invalid_indexer("slice", label) from err

316

317 lower, upper = self._parsed_string_to_bounds(reso, parsed)

318 return lower if side == "left" else upper

319 elif not isinstance(label, self._data._recognized_scalars):

320 raise self._invalid_indexer("slice", label)

321

322 return label

323

324 # --------------------------------------------------------------------

325 # Arithmetic Methods

326

327 def shift(self: _T, periods: int = 1, freq=None) -> _T:

328 """

329 Shift index by desired number of time frequency increments.

330

331 This method is for shifting the values of datetime-like indexes

332 by a specified time increment a given number of times.

333

334 Parameters

335 ----------

336 periods : int, default 1

337 Number of periods (or increments) to shift by,

338 can be positive or negative.

339 freq : pandas.DateOffset, pandas.Timedelta or string, optional

340 Frequency increment to shift by.

341 If None, the index is shifted by its own `freq` attribute.

342 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.

343

344 Returns

345 -------

346 pandas.DatetimeIndex

347 Shifted index.

348

349 See Also

350 --------

351 Index.shift : Shift values of Index.

352 PeriodIndex.shift : Shift values of PeriodIndex.

353 """

354 arr = self._data.view()

355 arr._freq = self.freq

356 result = arr._time_shift(periods, freq=freq)

357 return type(self)._simple_new(result, name=self.name)

358

359 # --------------------------------------------------------------------

360

361 @doc(Index._maybe_cast_listlike_indexer)

362 def _maybe_cast_listlike_indexer(self, keyarr):

363 try:

364 res = self._data._validate_listlike(keyarr, allow_object=True)

365 except (ValueError, TypeError):

366 if not isinstance(keyarr, ExtensionArray):

367 # e.g. we don't want to cast DTA to ndarray[object]

368 res = com.asarray_tuplesafe(keyarr)

369 # TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray

370 else:

371 res = keyarr

372 return Index(res, dtype=res.dtype)

373

374

375class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin):

376 """

377 Mixin class for methods shared by DatetimeIndex and TimedeltaIndex,

378 but not PeriodIndex

379 """

380

381 _data: DatetimeArray | TimedeltaArray

382 _comparables = ["name", "freq"]

383 _attributes = ["name", "freq"]

384

385 # Compat for frequency inference, see GH#23789

386 _is_monotonic_increasing = Index.is_monotonic_increasing

387 _is_monotonic_decreasing = Index.is_monotonic_decreasing

388 _is_unique = Index.is_unique

389

390 _join_precedence = 10

391

392 def _with_freq(self, freq):

393 arr = self._data._with_freq(freq)

394 return type(self)._simple_new(arr, name=self._name)

395

396 def is_type_compatible(self, kind: str) -> bool:

397 warnings.warn(

398 f"{type(self).__name__}.is_type_compatible is deprecated and will be "

399 "removed in a future version.",

400 FutureWarning,

401 stacklevel=find_stack_level(),

402 )

403 return kind in self._data._infer_matches

404

405 @property

406 def values(self) -> np.ndarray:

407 # NB: For Datetime64TZ this is lossy

408 return self._data._ndarray

409

410 # --------------------------------------------------------------------

411 # Set Operation Methods

412

413 @cache_readonly

414 def _as_range_index(self) -> RangeIndex:

415 # Convert our i8 representations to RangeIndex

416 # Caller is responsible for checking isinstance(self.freq, Tick)

417 freq = cast(Tick, self.freq)

418 tick = freq.delta.value

419 rng = range(self[0].value, self[-1].value + tick, tick)

420 return RangeIndex(rng)

421

422 def _can_range_setop(self, other):

423 return isinstance(self.freq, Tick) and isinstance(other.freq, Tick)

424

425 def _wrap_range_setop(self, other, res_i8):

426 new_freq = None

427 if not len(res_i8):

428 # RangeIndex defaults to step=1, which we don't want.

429 new_freq = self.freq

430 elif isinstance(res_i8, RangeIndex):

431 new_freq = to_offset(Timedelta(res_i8.step))

432 res_i8 = res_i8

433

434 # TODO: we cannot just do

435 # type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq)

436 # because test_setops_preserve_freq fails with _validate_frequency raising.

437 # This raising is incorrect, as 'on_freq' is incorrect. This will

438 # be fixed by GH#41493

439 res_values = res_i8.values.view(self._data._ndarray.dtype)

440 result = type(self._data)._simple_new(

441 res_values, dtype=self.dtype, freq=new_freq

442 )

443 return self._wrap_setop_result(other, result)

444

445 def _range_intersect(self, other, sort):

446 # Dispatch to RangeIndex intersection logic.

447 left = self._as_range_index

448 right = other._as_range_index

449 res_i8 = left.intersection(right, sort=sort)

450 return self._wrap_range_setop(other, res_i8)

451

452 def _range_union(self, other, sort):

453 # Dispatch to RangeIndex union logic.

454 left = self._as_range_index

455 right = other._as_range_index

456 res_i8 = left.union(right, sort=sort)

457 return self._wrap_range_setop(other, res_i8)

458

459 def _intersection(self, other: Index, sort=False) -> Index:

460 """

461 intersection specialized to the case with matching dtypes and both non-empty.

462 """

463 other = cast("DatetimeTimedeltaMixin", other)

464

465 if self._can_range_setop(other):

466 return self._range_intersect(other, sort=sort)

467

468 if not self._can_fast_intersect(other):

469 result = Index._intersection(self, other, sort=sort)

470 # We need to invalidate the freq because Index._intersection

471 # uses _shallow_copy on a view of self._data, which will preserve

472 # self.freq if we're not careful.

473 # At this point we should have result.dtype == self.dtype

474 # and type(result) is type(self._data)

475 result = self._wrap_setop_result(other, result)

476 return result._with_freq(None)._with_freq("infer")

477

478 else:

479 return self._fast_intersect(other, sort)

480

481 def _fast_intersect(self, other, sort):

482 # to make our life easier, "sort" the two ranges

483 if self[0] <= other[0]:

484 left, right = self, other

485 else:

486 left, right = other, self

487

488 # after sorting, the intersection always starts with the right index

489 # and ends with the index of which the last elements is smallest

490 end = min(left[-1], right[-1])

491 start = right[0]

492

493 if end < start:

494 result = self[:0]

495 else:

496 lslice = slice(*left.slice_locs(start, end))

497 result = left._values[lslice]

498

499 return result

500

501 def _can_fast_intersect(self: _T, other: _T) -> bool:

502 # Note: we only get here with len(self) > 0 and len(other) > 0

503 if self.freq is None:

504 return False

505

506 elif other.freq != self.freq:

507 return False

508

509 elif not self.is_monotonic_increasing:

510 # Because freq is not None, we must then be monotonic decreasing

511 return False

512

513 # this along with matching freqs ensure that we "line up",

514 # so intersection will preserve freq

515 # Note we are assuming away Ticks, as those go through _range_intersect

516 # GH#42104

517 return self.freq.n == 1

518

519 def _can_fast_union(self: _T, other: _T) -> bool:

520 # Assumes that type(self) == type(other), as per the annotation

521 # The ability to fast_union also implies that `freq` should be

522 # retained on union.

523 freq = self.freq

524

525 if freq is None or freq != other.freq:

526 return False

527

528 if not self.is_monotonic_increasing:

529 # Because freq is not None, we must then be monotonic decreasing

530 # TODO: do union on the reversed indexes?

531 return False

532

533 if len(self) == 0 or len(other) == 0:

534 # only reached via union_many

535 return True

536

537 # to make our life easier, "sort" the two ranges

538 if self[0] <= other[0]:

539 left, right = self, other

540 else:

541 left, right = other, self

542

543 right_start = right[0]

544 left_end = left[-1]

545

546 # Only need to "adjoin", not overlap

547 return (right_start == left_end + freq) or right_start in left

548

549 def _fast_union(self: _TDT, other: _TDT, sort=None) -> _TDT:

550 # Caller is responsible for ensuring self and other are non-empty

551

552 # to make our life easier, "sort" the two ranges

553 if self[0] <= other[0]:

554 left, right = self, other

555 elif sort is False:

556 # TDIs are not in the "correct" order and we don't want

557 # to sort but want to remove overlaps

558 left, right = self, other

559 left_start = left[0]

560 loc = right.searchsorted(left_start, side="left")

561 right_chunk = right._values[:loc]

562 dates = concat_compat((left._values, right_chunk))

563 result = type(self)._simple_new(dates, name=self.name)

564 return result

565 else:

566 left, right = other, self

567

568 left_end = left[-1]

569 right_end = right[-1]

570

571 # concatenate

572 if left_end < right_end:

573 loc = right.searchsorted(left_end, side="right")

574 right_chunk = right._values[loc:]

575 dates = concat_compat([left._values, right_chunk])

576 # The can_fast_union check ensures that the result.freq

577 # should match self.freq

578 dates = type(self._data)(dates, freq=self.freq)

579 result = type(self)._simple_new(dates)

580 return result

581 else:

582 return left

583

584 def _union(self, other, sort):

585 # We are called by `union`, which is responsible for this validation

586 assert isinstance(other, type(self))

587 assert self.dtype == other.dtype

588

589 if self._can_range_setop(other):

590 return self._range_union(other, sort=sort)

591

592 if self._can_fast_union(other):

593 result = self._fast_union(other, sort=sort)

594 # in the case with sort=None, the _can_fast_union check ensures

595 # that result.freq == self.freq

596 return result

597 else:

598 return super()._union(other, sort)._with_freq("infer")

599

600 # --------------------------------------------------------------------

601 # Join Methods

602

603 def _get_join_freq(self, other):

604 """

605 Get the freq to attach to the result of a join operation.

606 """

607 freq = None

608 if self._can_fast_union(other):

609 freq = self.freq

610 return freq

611

612 def _wrap_joined_index(self, joined, other):

613 assert other.dtype == self.dtype, (other.dtype, self.dtype)

614 result = super()._wrap_joined_index(joined, other)

615 result._data._freq = self._get_join_freq(other)

616 return result

617

618 def _get_engine_target(self) -> np.ndarray:

619 # engine methods and libjoin methods need dt64/td64 values cast to i8

620 return self._data._ndarray.view("i8")

621

622 def _from_join_target(self, result: np.ndarray):

623 # view e.g. i8 back to M8[ns]

624 result = result.view(self._data._ndarray.dtype)

625 return self._data._from_backing_data(result)

626

627 # --------------------------------------------------------------------

628 # List-like Methods

629

630 def _get_delete_freq(self, loc: int | slice | Sequence[int]):

631 """

632 Find the `freq` for self.delete(loc).

633 """

634 freq = None

635 if self.freq is not None:

636 if is_integer(loc):

637 if loc in (0, -len(self), -1, len(self) - 1):

638 freq = self.freq

639 else:

640 if is_list_like(loc):

641 # error: Incompatible types in assignment (expression has

642 # type "Union[slice, ndarray]", variable has type

643 # "Union[int, slice, Sequence[int]]")

644 loc = lib.maybe_indices_to_slice( # type: ignore[assignment]

645 np.asarray(loc, dtype=np.intp), len(self)

646 )

647 if isinstance(loc, slice) and loc.step in (1, None):

648 if loc.start in (0, None) or loc.stop in (len(self), None):

649 freq = self.freq

650 return freq

651

652 def _get_insert_freq(self, loc: int, item):

653 """

654 Find the `freq` for self.insert(loc, item).

655 """

656 value = self._data._validate_scalar(item)

657 item = self._data._box_func(value)

658

659 freq = None

660 if self.freq is not None:

661 # freq can be preserved on edge cases

662 if self.size:

663 if item is NaT:

664 pass

665 elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]:

666 freq = self.freq

667 elif (loc == len(self)) and item - self.freq == self[-1]:

668 freq = self.freq

669 else:

670 # Adding a single item to an empty index may preserve freq

671 if isinstance(self.freq, Tick):

672 # all TimedeltaIndex cases go through here; is_on_offset

673 # would raise TypeError

674 freq = self.freq

675 elif self.freq.is_on_offset(item):

676 freq = self.freq

677 return freq

678

679 @doc(NDArrayBackedExtensionIndex.delete)

680 def delete(self, loc) -> DatetimeTimedeltaMixin:

681 result = super().delete(loc)

682 result._data._freq = self._get_delete_freq(loc)

683 return result

684

685 @doc(NDArrayBackedExtensionIndex.insert)

686 def insert(self, loc: int, item):

687 result = super().insert(loc, item)

688 if isinstance(result, type(self)):

689 # i.e. parent class method did not cast

690 result._data._freq = self._get_insert_freq(loc, item)

691 return result

692

693 # --------------------------------------------------------------------

694 # NDArray-Like Methods

695

696 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)

697 def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):

698 nv.validate_take((), kwargs)

699 indices = np.asarray(indices, dtype=np.intp)

700

701 result = NDArrayBackedExtensionIndex.take(

702 self, indices, axis, allow_fill, fill_value, **kwargs

703 )

704

705 maybe_slice = lib.maybe_indices_to_slice(indices, len(self))

706 if isinstance(maybe_slice, slice):

707 freq = self._data._get_getitem_freq(maybe_slice)

708 result._data._freq = freq

709 return result