Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/datetimelike.py: 21%
344 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Base and utility classes for tseries type pandas objects.
3"""
4from __future__ import annotations
6from datetime import datetime
7from typing import (
8 TYPE_CHECKING,
9 Any,
10 Callable,
11 Sequence,
12 TypeVar,
13 cast,
14 final,
15)
16import warnings
18import numpy as np
20from pandas._libs import (
21 NaT,
22 Timedelta,
23 lib,
24)
25from pandas._libs.tslibs import (
26 BaseOffset,
27 Resolution,
28 Tick,
29 parsing,
30 to_offset,
31)
32from pandas.compat.numpy import function as nv
33from pandas.util._decorators import (
34 Appender,
35 cache_readonly,
36 doc,
37)
38from pandas.util._exceptions import find_stack_level
40from pandas.core.dtypes.common import (
41 is_categorical_dtype,
42 is_dtype_equal,
43 is_integer,
44 is_list_like,
45)
46from pandas.core.dtypes.concat import concat_compat
48from pandas.core.arrays import (
49 DatetimeArray,
50 ExtensionArray,
51 PeriodArray,
52 TimedeltaArray,
53)
54from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
55import pandas.core.common as com
56import pandas.core.indexes.base as ibase
57from pandas.core.indexes.base import (
58 Index,
59 _index_shared_docs,
60)
61from pandas.core.indexes.extension import (
62 NDArrayBackedExtensionIndex,
63 inherit_names,
64)
65from pandas.core.indexes.range import RangeIndex
66from pandas.core.tools.timedeltas import to_timedelta
68if TYPE_CHECKING: 68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true
69 from pandas import CategoricalIndex
71_index_doc_kwargs = dict(ibase._index_doc_kwargs)
73_T = TypeVar("_T", bound="DatetimeIndexOpsMixin")
74_TDT = TypeVar("_TDT", bound="DatetimeTimedeltaMixin")
77@inherit_names(
78 ["inferred_freq", "_resolution_obj", "resolution"],
79 DatetimeLikeArrayMixin,
80 cache=True,
81)
82@inherit_names(["mean", "asi8", "freq", "freqstr"], DatetimeLikeArrayMixin)
83class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
84 """
85 Common ops mixin to support a unified interface datetimelike Index.
86 """
88 _is_numeric_dtype = False
89 _can_hold_strings = False
90 _data: DatetimeArray | TimedeltaArray | PeriodArray
91 freq: BaseOffset | None
92 freqstr: str | None
93 _resolution_obj: Resolution
95 # ------------------------------------------------------------------------
97 @cache_readonly
98 def hasnans(self) -> bool:
99 return self._data._hasna
101 def equals(self, other: Any) -> bool:
102 """
103 Determines if two Index objects contain the same elements.
104 """
105 if self.is_(other):
106 return True
108 if not isinstance(other, Index):
109 return False
110 elif other.dtype.kind in ["f", "i", "u", "c"]:
111 return False
112 elif not isinstance(other, type(self)):
113 should_try = False
114 inferable = self._data._infer_matches
115 if other.dtype == object:
116 should_try = other.inferred_type in inferable
117 elif is_categorical_dtype(other.dtype):
118 other = cast("CategoricalIndex", other)
119 should_try = other.categories.inferred_type in inferable
121 if should_try:
122 try:
123 other = type(self)(other)
124 except (ValueError, TypeError, OverflowError):
125 # e.g.
126 # ValueError -> cannot parse str entry, or OutOfBoundsDatetime
127 # TypeError -> trying to convert IntervalIndex to DatetimeIndex
128 # OverflowError -> Index([very_large_timedeltas])
129 return False
131 if not is_dtype_equal(self.dtype, other.dtype):
132 # have different timezone
133 return False
135 return np.array_equal(self.asi8, other.asi8)
137 @Appender(Index.__contains__.__doc__)
138 def __contains__(self, key: Any) -> bool:
139 hash(key)
140 try:
141 self.get_loc(key)
142 except (KeyError, TypeError, ValueError):
143 return False
144 return True
146 def _convert_tolerance(self, tolerance, target):
147 tolerance = np.asarray(to_timedelta(tolerance).to_numpy())
148 return super()._convert_tolerance(tolerance, target)
150 # --------------------------------------------------------------------
151 # Rendering Methods
153 def format(
154 self,
155 name: bool = False,
156 formatter: Callable | None = None,
157 na_rep: str = "NaT",
158 date_format: str | None = None,
159 ) -> list[str]:
160 """
161 Render a string representation of the Index.
162 """
163 header = []
164 if name:
165 header.append(
166 ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
167 if self.name is not None
168 else ""
169 )
171 if formatter is not None:
172 return header + list(self.map(formatter))
174 return self._format_with_header(header, na_rep=na_rep, date_format=date_format)
176 def _format_with_header(
177 self, header: list[str], na_rep: str = "NaT", date_format: str | None = None
178 ) -> list[str]:
179 # matches base class except for whitespace padding and date_format
180 return header + list(
181 self._format_native_types(na_rep=na_rep, date_format=date_format)
182 )
184 @property
185 def _formatter_func(self):
186 return self._data._formatter()
188 def _format_attrs(self):
189 """
190 Return a list of tuples of the (attr,formatted_value).
191 """
192 attrs = super()._format_attrs()
193 for attrib in self._attributes:
194 # iterating over _attributes prevents us from doing this for PeriodIndex
195 if attrib == "freq":
196 freq = self.freqstr
197 if freq is not None:
198 freq = repr(freq) # e.g. D -> 'D'
199 attrs.append(("freq", freq))
200 return attrs
202 @Appender(Index._summary.__doc__)
203 def _summary(self, name=None) -> str:
204 result = super()._summary(name=name)
205 if self.freq:
206 result += f"\nFreq: {self.freqstr}"
208 return result
210 # --------------------------------------------------------------------
211 # Indexing Methods
213 @final
214 def _can_partial_date_slice(self, reso: Resolution) -> bool:
215 # e.g. test_getitem_setitem_periodindex
216 # History of conversation GH#3452, GH#3931, GH#2369, GH#14826
217 return reso > self._resolution_obj
218 # NB: for DTI/PI, not TDI
220 def _parsed_string_to_bounds(self, reso: Resolution, parsed):
221 raise NotImplementedError
223 def _parse_with_reso(self, label: str):
224 # overridden by TimedeltaIndex
225 try:
226 if self.freq is None or hasattr(self.freq, "rule_code"):
227 freq = self.freq
228 except NotImplementedError:
229 freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
230 parsed, reso_str = parsing.parse_time_string(label, freq)
231 reso = Resolution.from_attrname(reso_str)
232 return parsed, reso
234 def _get_string_slice(self, key: str):
235 # overridden by TimedeltaIndex
236 parsed, reso = self._parse_with_reso(key)
237 try:
238 return self._partial_date_slice(reso, parsed)
239 except KeyError as err:
240 raise KeyError(key) from err
242 @final
243 def _partial_date_slice(
244 self,
245 reso: Resolution,
246 parsed: datetime,
247 ):
248 """
249 Parameters
250 ----------
251 reso : Resolution
252 parsed : datetime
254 Returns
255 -------
256 slice or ndarray[intp]
257 """
258 if not self._can_partial_date_slice(reso):
259 raise ValueError
261 t1, t2 = self._parsed_string_to_bounds(reso, parsed)
262 vals = self._data._ndarray
263 unbox = self._data._unbox
265 if self.is_monotonic_increasing:
267 if len(self) and (
268 (t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1])
269 ):
270 # we are out of range
271 raise KeyError
273 # TODO: does this depend on being monotonic _increasing_?
275 # a monotonic (sorted) series can be sliced
276 left = vals.searchsorted(unbox(t1), side="left")
277 right = vals.searchsorted(unbox(t2), side="right")
278 return slice(left, right)
280 else:
281 lhs_mask = vals >= unbox(t1)
282 rhs_mask = vals <= unbox(t2)
284 # try to find the dates
285 return (lhs_mask & rhs_mask).nonzero()[0]
287 def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
288 """
289 If label is a string, cast it to scalar type according to resolution.
291 Parameters
292 ----------
293 label : object
294 side : {'left', 'right'}
295 kind : {'loc', 'getitem'} or None
297 Returns
298 -------
299 label : object
301 Notes
302 -----
303 Value of `side` parameter should be validated in caller.
304 """
305 assert kind in ["loc", "getitem", None, lib.no_default]
306 self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound")
308 if isinstance(label, str):
309 try:
310 parsed, reso = self._parse_with_reso(label)
311 except ValueError as err:
312 # DTI -> parsing.DateParseError
313 # TDI -> 'unit abbreviation w/o a number'
314 # PI -> string cannot be parsed as datetime-like
315 raise self._invalid_indexer("slice", label) from err
317 lower, upper = self._parsed_string_to_bounds(reso, parsed)
318 return lower if side == "left" else upper
319 elif not isinstance(label, self._data._recognized_scalars):
320 raise self._invalid_indexer("slice", label)
322 return label
324 # --------------------------------------------------------------------
325 # Arithmetic Methods
327 def shift(self: _T, periods: int = 1, freq=None) -> _T:
328 """
329 Shift index by desired number of time frequency increments.
331 This method is for shifting the values of datetime-like indexes
332 by a specified time increment a given number of times.
334 Parameters
335 ----------
336 periods : int, default 1
337 Number of periods (or increments) to shift by,
338 can be positive or negative.
339 freq : pandas.DateOffset, pandas.Timedelta or string, optional
340 Frequency increment to shift by.
341 If None, the index is shifted by its own `freq` attribute.
342 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
344 Returns
345 -------
346 pandas.DatetimeIndex
347 Shifted index.
349 See Also
350 --------
351 Index.shift : Shift values of Index.
352 PeriodIndex.shift : Shift values of PeriodIndex.
353 """
354 arr = self._data.view()
355 arr._freq = self.freq
356 result = arr._time_shift(periods, freq=freq)
357 return type(self)._simple_new(result, name=self.name)
359 # --------------------------------------------------------------------
361 @doc(Index._maybe_cast_listlike_indexer)
362 def _maybe_cast_listlike_indexer(self, keyarr):
363 try:
364 res = self._data._validate_listlike(keyarr, allow_object=True)
365 except (ValueError, TypeError):
366 if not isinstance(keyarr, ExtensionArray):
367 # e.g. we don't want to cast DTA to ndarray[object]
368 res = com.asarray_tuplesafe(keyarr)
369 # TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray
370 else:
371 res = keyarr
372 return Index(res, dtype=res.dtype)
375class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin):
376 """
377 Mixin class for methods shared by DatetimeIndex and TimedeltaIndex,
378 but not PeriodIndex
379 """
381 _data: DatetimeArray | TimedeltaArray
382 _comparables = ["name", "freq"]
383 _attributes = ["name", "freq"]
385 # Compat for frequency inference, see GH#23789
386 _is_monotonic_increasing = Index.is_monotonic_increasing
387 _is_monotonic_decreasing = Index.is_monotonic_decreasing
388 _is_unique = Index.is_unique
390 _join_precedence = 10
392 def _with_freq(self, freq):
393 arr = self._data._with_freq(freq)
394 return type(self)._simple_new(arr, name=self._name)
396 def is_type_compatible(self, kind: str) -> bool:
397 warnings.warn(
398 f"{type(self).__name__}.is_type_compatible is deprecated and will be "
399 "removed in a future version.",
400 FutureWarning,
401 stacklevel=find_stack_level(),
402 )
403 return kind in self._data._infer_matches
405 @property
406 def values(self) -> np.ndarray:
407 # NB: For Datetime64TZ this is lossy
408 return self._data._ndarray
410 # --------------------------------------------------------------------
411 # Set Operation Methods
413 @cache_readonly
414 def _as_range_index(self) -> RangeIndex:
415 # Convert our i8 representations to RangeIndex
416 # Caller is responsible for checking isinstance(self.freq, Tick)
417 freq = cast(Tick, self.freq)
418 tick = freq.delta.value
419 rng = range(self[0].value, self[-1].value + tick, tick)
420 return RangeIndex(rng)
422 def _can_range_setop(self, other):
423 return isinstance(self.freq, Tick) and isinstance(other.freq, Tick)
425 def _wrap_range_setop(self, other, res_i8):
426 new_freq = None
427 if not len(res_i8):
428 # RangeIndex defaults to step=1, which we don't want.
429 new_freq = self.freq
430 elif isinstance(res_i8, RangeIndex):
431 new_freq = to_offset(Timedelta(res_i8.step))
432 res_i8 = res_i8
434 # TODO: we cannot just do
435 # type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq)
436 # because test_setops_preserve_freq fails with _validate_frequency raising.
437 # This raising is incorrect, as 'on_freq' is incorrect. This will
438 # be fixed by GH#41493
439 res_values = res_i8.values.view(self._data._ndarray.dtype)
440 result = type(self._data)._simple_new(
441 res_values, dtype=self.dtype, freq=new_freq
442 )
443 return self._wrap_setop_result(other, result)
445 def _range_intersect(self, other, sort):
446 # Dispatch to RangeIndex intersection logic.
447 left = self._as_range_index
448 right = other._as_range_index
449 res_i8 = left.intersection(right, sort=sort)
450 return self._wrap_range_setop(other, res_i8)
452 def _range_union(self, other, sort):
453 # Dispatch to RangeIndex union logic.
454 left = self._as_range_index
455 right = other._as_range_index
456 res_i8 = left.union(right, sort=sort)
457 return self._wrap_range_setop(other, res_i8)
459 def _intersection(self, other: Index, sort=False) -> Index:
460 """
461 intersection specialized to the case with matching dtypes and both non-empty.
462 """
463 other = cast("DatetimeTimedeltaMixin", other)
465 if self._can_range_setop(other):
466 return self._range_intersect(other, sort=sort)
468 if not self._can_fast_intersect(other):
469 result = Index._intersection(self, other, sort=sort)
470 # We need to invalidate the freq because Index._intersection
471 # uses _shallow_copy on a view of self._data, which will preserve
472 # self.freq if we're not careful.
473 # At this point we should have result.dtype == self.dtype
474 # and type(result) is type(self._data)
475 result = self._wrap_setop_result(other, result)
476 return result._with_freq(None)._with_freq("infer")
478 else:
479 return self._fast_intersect(other, sort)
481 def _fast_intersect(self, other, sort):
482 # to make our life easier, "sort" the two ranges
483 if self[0] <= other[0]:
484 left, right = self, other
485 else:
486 left, right = other, self
488 # after sorting, the intersection always starts with the right index
489 # and ends with the index of which the last elements is smallest
490 end = min(left[-1], right[-1])
491 start = right[0]
493 if end < start:
494 result = self[:0]
495 else:
496 lslice = slice(*left.slice_locs(start, end))
497 result = left._values[lslice]
499 return result
501 def _can_fast_intersect(self: _T, other: _T) -> bool:
502 # Note: we only get here with len(self) > 0 and len(other) > 0
503 if self.freq is None:
504 return False
506 elif other.freq != self.freq:
507 return False
509 elif not self.is_monotonic_increasing:
510 # Because freq is not None, we must then be monotonic decreasing
511 return False
513 # this along with matching freqs ensure that we "line up",
514 # so intersection will preserve freq
515 # Note we are assuming away Ticks, as those go through _range_intersect
516 # GH#42104
517 return self.freq.n == 1
519 def _can_fast_union(self: _T, other: _T) -> bool:
520 # Assumes that type(self) == type(other), as per the annotation
521 # The ability to fast_union also implies that `freq` should be
522 # retained on union.
523 freq = self.freq
525 if freq is None or freq != other.freq:
526 return False
528 if not self.is_monotonic_increasing:
529 # Because freq is not None, we must then be monotonic decreasing
530 # TODO: do union on the reversed indexes?
531 return False
533 if len(self) == 0 or len(other) == 0:
534 # only reached via union_many
535 return True
537 # to make our life easier, "sort" the two ranges
538 if self[0] <= other[0]:
539 left, right = self, other
540 else:
541 left, right = other, self
543 right_start = right[0]
544 left_end = left[-1]
546 # Only need to "adjoin", not overlap
547 return (right_start == left_end + freq) or right_start in left
549 def _fast_union(self: _TDT, other: _TDT, sort=None) -> _TDT:
550 # Caller is responsible for ensuring self and other are non-empty
552 # to make our life easier, "sort" the two ranges
553 if self[0] <= other[0]:
554 left, right = self, other
555 elif sort is False:
556 # TDIs are not in the "correct" order and we don't want
557 # to sort but want to remove overlaps
558 left, right = self, other
559 left_start = left[0]
560 loc = right.searchsorted(left_start, side="left")
561 right_chunk = right._values[:loc]
562 dates = concat_compat((left._values, right_chunk))
563 result = type(self)._simple_new(dates, name=self.name)
564 return result
565 else:
566 left, right = other, self
568 left_end = left[-1]
569 right_end = right[-1]
571 # concatenate
572 if left_end < right_end:
573 loc = right.searchsorted(left_end, side="right")
574 right_chunk = right._values[loc:]
575 dates = concat_compat([left._values, right_chunk])
576 # The can_fast_union check ensures that the result.freq
577 # should match self.freq
578 dates = type(self._data)(dates, freq=self.freq)
579 result = type(self)._simple_new(dates)
580 return result
581 else:
582 return left
584 def _union(self, other, sort):
585 # We are called by `union`, which is responsible for this validation
586 assert isinstance(other, type(self))
587 assert self.dtype == other.dtype
589 if self._can_range_setop(other):
590 return self._range_union(other, sort=sort)
592 if self._can_fast_union(other):
593 result = self._fast_union(other, sort=sort)
594 # in the case with sort=None, the _can_fast_union check ensures
595 # that result.freq == self.freq
596 return result
597 else:
598 return super()._union(other, sort)._with_freq("infer")
600 # --------------------------------------------------------------------
601 # Join Methods
603 def _get_join_freq(self, other):
604 """
605 Get the freq to attach to the result of a join operation.
606 """
607 freq = None
608 if self._can_fast_union(other):
609 freq = self.freq
610 return freq
612 def _wrap_joined_index(self, joined, other):
613 assert other.dtype == self.dtype, (other.dtype, self.dtype)
614 result = super()._wrap_joined_index(joined, other)
615 result._data._freq = self._get_join_freq(other)
616 return result
618 def _get_engine_target(self) -> np.ndarray:
619 # engine methods and libjoin methods need dt64/td64 values cast to i8
620 return self._data._ndarray.view("i8")
622 def _from_join_target(self, result: np.ndarray):
623 # view e.g. i8 back to M8[ns]
624 result = result.view(self._data._ndarray.dtype)
625 return self._data._from_backing_data(result)
627 # --------------------------------------------------------------------
628 # List-like Methods
630 def _get_delete_freq(self, loc: int | slice | Sequence[int]):
631 """
632 Find the `freq` for self.delete(loc).
633 """
634 freq = None
635 if self.freq is not None:
636 if is_integer(loc):
637 if loc in (0, -len(self), -1, len(self) - 1):
638 freq = self.freq
639 else:
640 if is_list_like(loc):
641 # error: Incompatible types in assignment (expression has
642 # type "Union[slice, ndarray]", variable has type
643 # "Union[int, slice, Sequence[int]]")
644 loc = lib.maybe_indices_to_slice( # type: ignore[assignment]
645 np.asarray(loc, dtype=np.intp), len(self)
646 )
647 if isinstance(loc, slice) and loc.step in (1, None):
648 if loc.start in (0, None) or loc.stop in (len(self), None):
649 freq = self.freq
650 return freq
652 def _get_insert_freq(self, loc: int, item):
653 """
654 Find the `freq` for self.insert(loc, item).
655 """
656 value = self._data._validate_scalar(item)
657 item = self._data._box_func(value)
659 freq = None
660 if self.freq is not None:
661 # freq can be preserved on edge cases
662 if self.size:
663 if item is NaT:
664 pass
665 elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]:
666 freq = self.freq
667 elif (loc == len(self)) and item - self.freq == self[-1]:
668 freq = self.freq
669 else:
670 # Adding a single item to an empty index may preserve freq
671 if isinstance(self.freq, Tick):
672 # all TimedeltaIndex cases go through here; is_on_offset
673 # would raise TypeError
674 freq = self.freq
675 elif self.freq.is_on_offset(item):
676 freq = self.freq
677 return freq
679 @doc(NDArrayBackedExtensionIndex.delete)
680 def delete(self, loc) -> DatetimeTimedeltaMixin:
681 result = super().delete(loc)
682 result._data._freq = self._get_delete_freq(loc)
683 return result
685 @doc(NDArrayBackedExtensionIndex.insert)
686 def insert(self, loc: int, item):
687 result = super().insert(loc, item)
688 if isinstance(result, type(self)):
689 # i.e. parent class method did not cast
690 result._data._freq = self._get_insert_freq(loc, item)
691 return result
693 # --------------------------------------------------------------------
694 # NDArray-Like Methods
696 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
697 def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
698 nv.validate_take((), kwargs)
699 indices = np.asarray(indices, dtype=np.intp)
701 result = NDArrayBackedExtensionIndex.take(
702 self, indices, axis, allow_fill, fill_value, **kwargs
703 )
705 maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
706 if isinstance(maybe_slice, slice):
707 freq = self._data._get_getitem_freq(maybe_slice)
708 result._data._freq = freq
709 return result