Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/interval.py: 21%
376 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1""" define the IntervalIndex """
2from __future__ import annotations
4from operator import (
5 le,
6 lt,
7)
8import textwrap
9from typing import (
10 Any,
11 Hashable,
12 Literal,
13)
15import numpy as np
17from pandas._libs import lib
18from pandas._libs.interval import (
19 Interval,
20 IntervalMixin,
21 IntervalTree,
22)
23from pandas._libs.tslibs import (
24 BaseOffset,
25 Timedelta,
26 Timestamp,
27 to_offset,
28)
29from pandas._typing import (
30 Dtype,
31 DtypeObj,
32 IntervalClosedType,
33 npt,
34)
35from pandas.errors import InvalidIndexError
36from pandas.util._decorators import (
37 Appender,
38 cache_readonly,
39)
40from pandas.util._exceptions import rewrite_exception
42from pandas.core.dtypes.cast import (
43 find_common_type,
44 infer_dtype_from_scalar,
45 maybe_box_datetimelike,
46 maybe_downcast_numeric,
47)
48from pandas.core.dtypes.common import (
49 ensure_platform_int,
50 is_datetime64tz_dtype,
51 is_datetime_or_timedelta_dtype,
52 is_dtype_equal,
53 is_float,
54 is_float_dtype,
55 is_integer,
56 is_integer_dtype,
57 is_interval_dtype,
58 is_list_like,
59 is_number,
60 is_object_dtype,
61 is_scalar,
62)
63from pandas.core.dtypes.dtypes import IntervalDtype
64from pandas.core.dtypes.missing import is_valid_na_for_dtype
66from pandas.core.algorithms import unique
67from pandas.core.arrays.interval import (
68 IntervalArray,
69 _interval_shared_docs,
70)
71import pandas.core.common as com
72from pandas.core.indexers import is_valid_positional_slice
73import pandas.core.indexes.base as ibase
74from pandas.core.indexes.base import (
75 Index,
76 _index_shared_docs,
77 ensure_index,
78 maybe_extract_name,
79)
80from pandas.core.indexes.datetimes import (
81 DatetimeIndex,
82 date_range,
83)
84from pandas.core.indexes.extension import (
85 ExtensionIndex,
86 inherit_names,
87)
88from pandas.core.indexes.multi import MultiIndex
89from pandas.core.indexes.timedeltas import (
90 TimedeltaIndex,
91 timedelta_range,
92)
94_index_doc_kwargs = dict(ibase._index_doc_kwargs)
96_index_doc_kwargs.update(
97 {
98 "klass": "IntervalIndex",
99 "qualname": "IntervalIndex",
100 "target_klass": "IntervalIndex or list of Intervals",
101 "name": textwrap.dedent(
102 """\
103 name : object, optional
104 Name to be stored in the index.
105 """
106 ),
107 }
108)
111def _get_next_label(label):
112 dtype = getattr(label, "dtype", type(label))
113 if isinstance(label, (Timestamp, Timedelta)):
114 dtype = "datetime64"
115 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
116 return label + np.timedelta64(1, "ns")
117 elif is_integer_dtype(dtype):
118 return label + 1
119 elif is_float_dtype(dtype):
120 return np.nextafter(label, np.infty)
121 else:
122 raise TypeError(f"cannot determine next label for type {repr(type(label))}")
125def _get_prev_label(label):
126 dtype = getattr(label, "dtype", type(label))
127 if isinstance(label, (Timestamp, Timedelta)):
128 dtype = "datetime64"
129 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
130 return label - np.timedelta64(1, "ns")
131 elif is_integer_dtype(dtype):
132 return label - 1
133 elif is_float_dtype(dtype):
134 return np.nextafter(label, -np.infty)
135 else:
136 raise TypeError(f"cannot determine next label for type {repr(type(label))}")
139def _new_IntervalIndex(cls, d):
140 """
141 This is called upon unpickling, rather than the default which doesn't have
142 arguments and breaks __new__.
143 """
144 return cls.from_arrays(**d)
147@Appender(
148 _interval_shared_docs["class"]
149 % {
150 "klass": "IntervalIndex",
151 "summary": "Immutable index of intervals that are closed on the same side.",
152 "name": _index_doc_kwargs["name"],
153 "versionadded": "0.20.0",
154 "extra_attributes": "is_overlapping\nvalues\n",
155 "extra_methods": "",
156 "examples": textwrap.dedent(
157 """\
158 Examples
159 --------
160 A new ``IntervalIndex`` is typically constructed using
161 :func:`interval_range`:
163 >>> pd.interval_range(start=0, end=5)
164 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
165 dtype='interval[int64, right]')
167 It may also be constructed using one of the constructor
168 methods: :meth:`IntervalIndex.from_arrays`,
169 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`.
171 See further examples in the doc strings of ``interval_range`` and the
172 mentioned constructor methods.
173 """
174 ),
175 }
176)
177@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True)
178@inherit_names(
179 [
180 "__array__",
181 "overlaps",
182 "contains",
183 "closed_left",
184 "closed_right",
185 "open_left",
186 "open_right",
187 "is_empty",
188 ],
189 IntervalArray,
190)
191@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True)
192class IntervalIndex(ExtensionIndex):
193 _typ = "intervalindex"
195 # annotate properties pinned via inherit_names
196 closed: IntervalClosedType
197 is_non_overlapping_monotonic: bool
198 closed_left: bool
199 closed_right: bool
200 open_left: bool
201 open_right: bool
203 _data: IntervalArray
204 _values: IntervalArray
205 _can_hold_strings = False
206 _data_cls = IntervalArray
208 # --------------------------------------------------------------------
209 # Constructors
211 def __new__(
212 cls,
213 data,
214 closed=None,
215 dtype: Dtype | None = None,
216 copy: bool = False,
217 name: Hashable = None,
218 verify_integrity: bool = True,
219 ) -> IntervalIndex:
221 name = maybe_extract_name(name, data, cls)
223 with rewrite_exception("IntervalArray", cls.__name__):
224 array = IntervalArray(
225 data,
226 closed=closed,
227 copy=copy,
228 dtype=dtype,
229 verify_integrity=verify_integrity,
230 )
232 return cls._simple_new(array, name)
234 @classmethod
235 @Appender(
236 _interval_shared_docs["from_breaks"]
237 % {
238 "klass": "IntervalIndex",
239 "examples": textwrap.dedent(
240 """\
241 Examples
242 --------
243 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])
244 IntervalIndex([(0, 1], (1, 2], (2, 3]],
245 dtype='interval[int64, right]')
246 """
247 ),
248 }
249 )
250 def from_breaks(
251 cls,
252 breaks,
253 closed: IntervalClosedType | None = "right",
254 name: Hashable = None,
255 copy: bool = False,
256 dtype: Dtype | None = None,
257 ) -> IntervalIndex:
258 with rewrite_exception("IntervalArray", cls.__name__):
259 array = IntervalArray.from_breaks(
260 breaks, closed=closed, copy=copy, dtype=dtype
261 )
262 return cls._simple_new(array, name=name)
264 @classmethod
265 @Appender(
266 _interval_shared_docs["from_arrays"]
267 % {
268 "klass": "IntervalIndex",
269 "examples": textwrap.dedent(
270 """\
271 Examples
272 --------
273 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
274 IntervalIndex([(0, 1], (1, 2], (2, 3]],
275 dtype='interval[int64, right]')
276 """
277 ),
278 }
279 )
280 def from_arrays(
281 cls,
282 left,
283 right,
284 closed: IntervalClosedType = "right",
285 name: Hashable = None,
286 copy: bool = False,
287 dtype: Dtype | None = None,
288 ) -> IntervalIndex:
289 with rewrite_exception("IntervalArray", cls.__name__):
290 array = IntervalArray.from_arrays(
291 left, right, closed, copy=copy, dtype=dtype
292 )
293 return cls._simple_new(array, name=name)
295 @classmethod
296 @Appender(
297 _interval_shared_docs["from_tuples"]
298 % {
299 "klass": "IntervalIndex",
300 "examples": textwrap.dedent(
301 """\
302 Examples
303 --------
304 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)])
305 IntervalIndex([(0, 1], (1, 2]],
306 dtype='interval[int64, right]')
307 """
308 ),
309 }
310 )
311 def from_tuples(
312 cls,
313 data,
314 closed: str = "right",
315 name: Hashable = None,
316 copy: bool = False,
317 dtype: Dtype | None = None,
318 ) -> IntervalIndex:
319 with rewrite_exception("IntervalArray", cls.__name__):
320 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype)
321 return cls._simple_new(arr, name=name)
323 # --------------------------------------------------------------------
324 # error: Return type "IntervalTree" of "_engine" incompatible with return type
325 # "Union[IndexEngine, ExtensionEngine]" in supertype "Index"
326 @cache_readonly
327 def _engine(self) -> IntervalTree: # type: ignore[override]
328 left = self._maybe_convert_i8(self.left)
329 right = self._maybe_convert_i8(self.right)
330 return IntervalTree(left, right, closed=self.closed)
332 def __contains__(self, key: Any) -> bool:
333 """
334 return a boolean if this key is IN the index
335 We *only* accept an Interval
337 Parameters
338 ----------
339 key : Interval
341 Returns
342 -------
343 bool
344 """
345 hash(key)
346 if not isinstance(key, Interval):
347 if is_valid_na_for_dtype(key, self.dtype):
348 return self.hasnans
349 return False
351 try:
352 self.get_loc(key)
353 return True
354 except KeyError:
355 return False
357 @cache_readonly
358 def _multiindex(self) -> MultiIndex:
359 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"])
361 def __reduce__(self):
362 d = {
363 "left": self.left,
364 "right": self.right,
365 "closed": self.closed,
366 "name": self.name,
367 }
368 return _new_IntervalIndex, (type(self), d), None
370 @property
371 def inferred_type(self) -> str:
372 """Return a string of the type inferred from the values"""
373 return "interval"
375 @Appender(Index.memory_usage.__doc__)
376 def memory_usage(self, deep: bool = False) -> int:
377 # we don't use an explicit engine
378 # so return the bytes here
379 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep)
381 # IntervalTree doesn't have a is_monotonic_decreasing, so have to override
382 # the Index implementation
383 @cache_readonly
384 def is_monotonic_decreasing(self) -> bool:
385 """
386 Return True if the IntervalIndex is monotonic decreasing (only equal or
387 decreasing values), else False
388 """
389 return self[::-1].is_monotonic_increasing
391 @cache_readonly
392 def is_unique(self) -> bool:
393 """
394 Return True if the IntervalIndex contains unique elements, else False.
395 """
396 left = self.left
397 right = self.right
399 if self.isna().sum() > 1:
400 return False
402 if left.is_unique or right.is_unique:
403 return True
405 seen_pairs = set()
406 check_idx = np.where(left.duplicated(keep=False))[0]
407 for idx in check_idx:
408 pair = (left[idx], right[idx])
409 if pair in seen_pairs:
410 return False
411 seen_pairs.add(pair)
413 return True
415 @property
416 def is_overlapping(self) -> bool:
417 """
418 Return True if the IntervalIndex has overlapping intervals, else False.
420 Two intervals overlap if they share a common point, including closed
421 endpoints. Intervals that only have an open endpoint in common do not
422 overlap.
424 Returns
425 -------
426 bool
427 Boolean indicating if the IntervalIndex has overlapping intervals.
429 See Also
430 --------
431 Interval.overlaps : Check whether two Interval objects overlap.
432 IntervalIndex.overlaps : Check an IntervalIndex elementwise for
433 overlaps.
435 Examples
436 --------
437 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])
438 >>> index
439 IntervalIndex([(0, 2], (1, 3], (4, 5]],
440 dtype='interval[int64, right]')
441 >>> index.is_overlapping
442 True
444 Intervals that share closed endpoints overlap:
446 >>> index = pd.interval_range(0, 3, closed='both')
447 >>> index
448 IntervalIndex([[0, 1], [1, 2], [2, 3]],
449 dtype='interval[int64, both]')
450 >>> index.is_overlapping
451 True
453 Intervals that only have an open endpoint in common do not overlap:
455 >>> index = pd.interval_range(0, 3, closed='left')
456 >>> index
457 IntervalIndex([[0, 1), [1, 2), [2, 3)],
458 dtype='interval[int64, left]')
459 >>> index.is_overlapping
460 False
461 """
462 # GH 23309
463 return self._engine.is_overlapping
465 def _needs_i8_conversion(self, key) -> bool:
466 """
467 Check if a given key needs i8 conversion. Conversion is necessary for
468 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An
469 Interval-like requires conversion if its endpoints are one of the
470 aforementioned types.
472 Assumes that any list-like data has already been cast to an Index.
474 Parameters
475 ----------
476 key : scalar or Index-like
477 The key that should be checked for i8 conversion
479 Returns
480 -------
481 bool
482 """
483 if is_interval_dtype(key) or isinstance(key, Interval):
484 return self._needs_i8_conversion(key.left)
486 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex)
487 return isinstance(key, i8_types)
489 def _maybe_convert_i8(self, key):
490 """
491 Maybe convert a given key to its equivalent i8 value(s). Used as a
492 preprocessing step prior to IntervalTree queries (self._engine), which
493 expects numeric data.
495 Parameters
496 ----------
497 key : scalar or list-like
498 The key that should maybe be converted to i8.
500 Returns
501 -------
502 scalar or list-like
503 The original key if no conversion occurred, int if converted scalar,
504 Int64Index if converted list-like.
505 """
506 original = key
507 if is_list_like(key):
508 key = ensure_index(key)
510 if not self._needs_i8_conversion(key):
511 return original
513 scalar = is_scalar(key)
514 if is_interval_dtype(key) or isinstance(key, Interval):
515 # convert left/right and reconstruct
516 left = self._maybe_convert_i8(key.left)
517 right = self._maybe_convert_i8(key.right)
518 constructor = Interval if scalar else IntervalIndex.from_arrays
519 # error: "object" not callable
520 return constructor(
521 left, right, closed=self.closed
522 ) # type: ignore[operator]
524 if scalar:
525 # Timestamp/Timedelta
526 key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True)
527 if lib.is_period(key):
528 key_i8 = key.ordinal
529 elif isinstance(key_i8, Timestamp):
530 key_i8 = key_i8.value
531 elif isinstance(key_i8, (np.datetime64, np.timedelta64)):
532 key_i8 = key_i8.view("i8")
533 else:
534 # DatetimeIndex/TimedeltaIndex
535 key_dtype, key_i8 = key.dtype, Index(key.asi8)
536 if key.hasnans:
537 # convert NaT from its i8 value to np.nan so it's not viewed
538 # as a valid value, maybe causing errors (e.g. is_overlapping)
539 key_i8 = key_i8.where(~key._isnan)
541 # ensure consistency with IntervalIndex subtype
542 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],
543 # ExtensionDtype]" has no attribute "subtype"
544 subtype = self.dtype.subtype # type: ignore[union-attr]
546 if not is_dtype_equal(subtype, key_dtype):
547 raise ValueError(
548 f"Cannot index an IntervalIndex of subtype {subtype} with "
549 f"values of dtype {key_dtype}"
550 )
552 return key_i8
554 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
555 if not self.is_non_overlapping_monotonic:
556 raise KeyError(
557 "can only get slices from an IntervalIndex if bounds are "
558 "non-overlapping and all monotonic increasing or decreasing"
559 )
561 if isinstance(label, (IntervalMixin, IntervalIndex)):
562 raise NotImplementedError("Interval objects are not currently supported")
564 # GH 20921: "not is_monotonic_increasing" for the second condition
565 # instead of "is_monotonic_decreasing" to account for single element
566 # indexes being both increasing and decreasing
567 if (side == "left" and self.left.is_monotonic_increasing) or (
568 side == "right" and not self.left.is_monotonic_increasing
569 ):
570 sub_idx = self.right
571 if self.open_right:
572 label = _get_next_label(label)
573 else:
574 sub_idx = self.left
575 if self.open_left:
576 label = _get_prev_label(label)
578 return sub_idx._searchsorted_monotonic(label, side)
580 # --------------------------------------------------------------------
581 # Indexing Methods
583 def get_loc(
584 self, key, method: str | None = None, tolerance=None
585 ) -> int | slice | np.ndarray:
586 """
587 Get integer location, slice or boolean mask for requested label.
589 Parameters
590 ----------
591 key : label
592 method : {None}, optional
593 * default: matches where the label is within an interval only.
595 .. deprecated:: 1.4
597 Returns
598 -------
599 int if unique index, slice if monotonic index, else mask
601 Examples
602 --------
603 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)
604 >>> index = pd.IntervalIndex([i1, i2])
605 >>> index.get_loc(1)
606 0
608 You can also supply a point inside an interval.
610 >>> index.get_loc(1.5)
611 1
613 If a label is in several intervals, you get the locations of all the
614 relevant intervals.
616 >>> i3 = pd.Interval(0, 2)
617 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3])
618 >>> overlapping_index.get_loc(0.5)
619 array([ True, False, True])
621 Only exact matches will be returned if an interval is provided.
623 >>> index.get_loc(pd.Interval(0, 1))
624 0
625 """
626 self._check_indexing_method(method)
627 self._check_indexing_error(key)
629 if isinstance(key, Interval):
630 if self.closed != key.closed:
631 raise KeyError(key)
632 mask = (self.left == key.left) & (self.right == key.right)
633 elif is_valid_na_for_dtype(key, self.dtype):
634 mask = self.isna()
635 else:
636 # assume scalar
637 op_left = le if self.closed_left else lt
638 op_right = le if self.closed_right else lt
639 try:
640 mask = op_left(self.left, key) & op_right(key, self.right)
641 except TypeError as err:
642 # scalar is not comparable to II subtype --> invalid label
643 raise KeyError(key) from err
645 matches = mask.sum()
646 if matches == 0:
647 raise KeyError(key)
648 elif matches == 1:
649 return mask.argmax()
651 res = lib.maybe_booleans_to_slice(mask.view("u1"))
652 if isinstance(res, slice) and res.stop is None:
653 # TODO: DO this in maybe_booleans_to_slice?
654 res = slice(res.start, len(self), res.step)
655 return res
657 def _get_indexer(
658 self,
659 target: Index,
660 method: str | None = None,
661 limit: int | None = None,
662 tolerance: Any | None = None,
663 ) -> npt.NDArray[np.intp]:
665 if isinstance(target, IntervalIndex):
666 # We only get here with not self.is_overlapping
667 # -> at most one match per interval in target
668 # want exact matches -> need both left/right to match, so defer to
669 # left/right get_indexer, compare elementwise, equality -> match
670 indexer = self._get_indexer_unique_sides(target)
672 elif not is_object_dtype(target.dtype):
673 # homogeneous scalar index: use IntervalTree
674 # we should always have self._should_partial_index(target) here
675 target = self._maybe_convert_i8(target)
676 indexer = self._engine.get_indexer(target.values)
677 else:
678 # heterogeneous scalar index: defer elementwise to get_loc
679 # we should always have self._should_partial_index(target) here
680 return self._get_indexer_pointwise(target)[0]
682 return ensure_platform_int(indexer)
684 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
685 def get_indexer_non_unique(
686 self, target: Index
687 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
688 target = ensure_index(target)
690 if not self._should_compare(target) and not self._should_partial_index(target):
691 # e.g. IntervalIndex with different closed or incompatible subtype
692 # -> no matches
693 return self._get_indexer_non_comparable(target, None, unique=False)
695 elif isinstance(target, IntervalIndex):
696 if self.left.is_unique and self.right.is_unique:
697 # fastpath available even if we don't have self._index_as_unique
698 indexer = self._get_indexer_unique_sides(target)
699 missing = (indexer == -1).nonzero()[0]
700 else:
701 return self._get_indexer_pointwise(target)
703 elif is_object_dtype(target.dtype) or not self._should_partial_index(target):
704 # target might contain intervals: defer elementwise to get_loc
705 return self._get_indexer_pointwise(target)
707 else:
708 # Note: this case behaves differently from other Index subclasses
709 # because IntervalIndex does partial-int indexing
710 target = self._maybe_convert_i8(target)
711 indexer, missing = self._engine.get_indexer_non_unique(target.values)
713 return ensure_platform_int(indexer), ensure_platform_int(missing)
715 def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]:
716 """
717 _get_indexer specialized to the case where both of our sides are unique.
718 """
719 # Caller is responsible for checking
720 # `self.left.is_unique and self.right.is_unique`
722 left_indexer = self.left.get_indexer(target.left)
723 right_indexer = self.right.get_indexer(target.right)
724 indexer = np.where(left_indexer == right_indexer, left_indexer, -1)
725 return indexer
727 def _get_indexer_pointwise(
728 self, target: Index
729 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
730 """
731 pointwise implementation for get_indexer and get_indexer_non_unique.
732 """
733 indexer, missing = [], []
734 for i, key in enumerate(target):
735 try:
736 locs = self.get_loc(key)
737 if isinstance(locs, slice):
738 # Only needed for get_indexer_non_unique
739 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp")
740 elif lib.is_integer(locs):
741 locs = np.array(locs, ndmin=1)
742 else:
743 # otherwise we have ndarray[bool]
744 locs = np.where(locs)[0]
745 except KeyError:
746 missing.append(i)
747 locs = np.array([-1])
748 except InvalidIndexError:
749 # i.e. non-scalar key e.g. a tuple.
750 # see test_append_different_columns_types_raises
751 missing.append(i)
752 locs = np.array([-1])
754 indexer.append(locs)
756 indexer = np.concatenate(indexer)
757 return ensure_platform_int(indexer), ensure_platform_int(missing)
759 @cache_readonly
760 def _index_as_unique(self) -> bool:
761 return not self.is_overlapping and self._engine._na_count < 2
763 _requires_unique_msg = (
764 "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"
765 )
767 def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
768 if not (key.step is None or key.step == 1):
769 # GH#31658 if label-based, we require step == 1,
770 # if positional, we disallow float start/stop
771 msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
772 if kind == "loc":
773 raise ValueError(msg)
774 elif kind == "getitem":
775 if not is_valid_positional_slice(key):
776 # i.e. this cannot be interpreted as a positional slice
777 raise ValueError(msg)
779 return super()._convert_slice_indexer(key, kind, is_frame=is_frame)
781 @cache_readonly
782 def _should_fallback_to_positional(self) -> bool:
783 # integer lookups in Series.__getitem__ are unambiguously
784 # positional in this case
785 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],
786 # ExtensionDtype]" has no attribute "subtype"
787 return self.dtype.subtype.kind in ["m", "M"] # type: ignore[union-attr]
789 def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
790 self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound")
791 return getattr(self, side)._maybe_cast_slice_bound(label, side)
793 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
794 if not isinstance(dtype, IntervalDtype):
795 return False
796 common_subtype = find_common_type([self.dtype, dtype])
797 return not is_object_dtype(common_subtype)
799 # --------------------------------------------------------------------
801 @cache_readonly
802 def left(self) -> Index:
803 return Index(self._data.left, copy=False)
805 @cache_readonly
806 def right(self) -> Index:
807 return Index(self._data.right, copy=False)
809 @cache_readonly
810 def mid(self) -> Index:
811 return Index(self._data.mid, copy=False)
813 @property
814 def length(self) -> Index:
815 return Index(self._data.length, copy=False)
817 # --------------------------------------------------------------------
818 # Rendering Methods
819 # __repr__ associated methods are based on MultiIndex
821 def _format_with_header(self, header: list[str], na_rep: str) -> list[str]:
822 # matches base class except for whitespace padding
823 return header + list(self._format_native_types(na_rep=na_rep))
825 def _format_native_types(
826 self, *, na_rep="NaN", quoting=None, **kwargs
827 ) -> npt.NDArray[np.object_]:
828 # GH 28210: use base method but with different default na_rep
829 return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs)
831 def _format_data(self, name=None) -> str:
832 # TODO: integrate with categorical and make generic
833 # name argument is unused here; just for compat with base / categorical
834 return self._data._format_data() + "," + self._format_space()
836 # --------------------------------------------------------------------
837 # Set Operations
839 def _intersection(self, other, sort):
840 """
841 intersection specialized to the case with matching dtypes.
842 """
843 # For IntervalIndex we also know other.closed == self.closed
844 if self.left.is_unique and self.right.is_unique:
845 taken = self._intersection_unique(other)
846 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1:
847 # Swap other/self if other is unique and self does not have
848 # multiple NaNs
849 taken = other._intersection_unique(self)
850 else:
851 # duplicates
852 taken = self._intersection_non_unique(other)
854 if sort is None:
855 taken = taken.sort_values()
857 return taken
859 def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex:
860 """
861 Used when the IntervalIndex does not have any common endpoint,
862 no matter left or right.
863 Return the intersection with another IntervalIndex.
864 Parameters
865 ----------
866 other : IntervalIndex
867 Returns
868 -------
869 IntervalIndex
870 """
871 # Note: this is much more performant than super()._intersection(other)
872 lindexer = self.left.get_indexer(other.left)
873 rindexer = self.right.get_indexer(other.right)
875 match = (lindexer == rindexer) & (lindexer != -1)
876 indexer = lindexer.take(match.nonzero()[0])
877 indexer = unique(indexer)
879 return self.take(indexer)
881 def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:
882 """
883 Used when the IntervalIndex does have some common endpoints,
884 on either sides.
885 Return the intersection with another IntervalIndex.
887 Parameters
888 ----------
889 other : IntervalIndex
891 Returns
892 -------
893 IntervalIndex
894 """
895 # Note: this is about 3.25x faster than super()._intersection(other)
896 # in IntervalIndexMethod.time_intersection_both_duplicate(1000)
897 mask = np.zeros(len(self), dtype=bool)
899 if self.hasnans and other.hasnans:
900 first_nan_loc = np.arange(len(self))[self.isna()][0]
901 mask[first_nan_loc] = True
903 other_tups = set(zip(other.left, other.right))
904 for i, tup in enumerate(zip(self.left, self.right)):
905 if tup in other_tups:
906 mask[i] = True
908 return self[mask]
910 # --------------------------------------------------------------------
912 def _get_engine_target(self) -> np.ndarray:
913 # Note: we _could_ use libjoin functions by either casting to object
914 # dtype or constructing tuples (faster than constructing Intervals)
915 # but the libjoin fastpaths are no longer fast in these cases.
916 raise NotImplementedError(
917 "IntervalIndex does not use libjoin fastpaths or pass values to "
918 "IndexEngine objects"
919 )
921 def _from_join_target(self, result):
922 raise NotImplementedError("IntervalIndex does not use libjoin fastpaths")
924 # TODO: arithmetic operations
927def _is_valid_endpoint(endpoint) -> bool:
928 """
929 Helper for interval_range to check if start/end are valid types.
930 """
931 return any(
932 [
933 is_number(endpoint),
934 isinstance(endpoint, Timestamp),
935 isinstance(endpoint, Timedelta),
936 endpoint is None,
937 ]
938 )
941def _is_type_compatible(a, b) -> bool:
942 """
943 Helper for interval_range to check type compat of start/end/freq.
944 """
945 is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset))
946 is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset))
947 return (
948 (is_number(a) and is_number(b))
949 or (is_ts_compat(a) and is_ts_compat(b))
950 or (is_td_compat(a) and is_td_compat(b))
951 or com.any_none(a, b)
952 )
955def interval_range(
956 start=None,
957 end=None,
958 periods=None,
959 freq=None,
960 name: Hashable = None,
961 closed: IntervalClosedType = "right",
962) -> IntervalIndex:
963 """
964 Return a fixed frequency IntervalIndex.
966 Parameters
967 ----------
968 start : numeric or datetime-like, default None
969 Left bound for generating intervals.
970 end : numeric or datetime-like, default None
971 Right bound for generating intervals.
972 periods : int, default None
973 Number of periods to generate.
974 freq : numeric, str, or DateOffset, default None
975 The length of each interval. Must be consistent with the type of start
976 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1
977 for numeric and 'D' for datetime-like.
978 name : str, default None
979 Name of the resulting IntervalIndex.
980 closed : {'left', 'right', 'both', 'neither'}, default 'right'
981 Whether the intervals are closed on the left-side, right-side, both
982 or neither.
984 Returns
985 -------
986 IntervalIndex
988 See Also
989 --------
990 IntervalIndex : An Index of intervals that are all closed on the same side.
992 Notes
993 -----
994 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
995 exactly three must be specified. If ``freq`` is omitted, the resulting
996 ``IntervalIndex`` will have ``periods`` linearly spaced elements between
997 ``start`` and ``end``, inclusively.
999 To learn more about datetime-like frequency strings, please see `this link
1000 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
1002 Examples
1003 --------
1004 Numeric ``start`` and ``end`` is supported.
1006 >>> pd.interval_range(start=0, end=5)
1007 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
1008 dtype='interval[int64, right]')
1010 Additionally, datetime-like input is also supported.
1012 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
1013 ... end=pd.Timestamp('2017-01-04'))
1014 IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
1015 (2017-01-03, 2017-01-04]],
1016 dtype='interval[datetime64[ns], right]')
1018 The ``freq`` parameter specifies the frequency between the left and right.
1019 endpoints of the individual intervals within the ``IntervalIndex``. For
1020 numeric ``start`` and ``end``, the frequency must also be numeric.
1022 >>> pd.interval_range(start=0, periods=4, freq=1.5)
1023 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
1024 dtype='interval[float64, right]')
1026 Similarly, for datetime-like ``start`` and ``end``, the frequency must be
1027 convertible to a DateOffset.
1029 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
1030 ... periods=3, freq='MS')
1031 IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
1032 (2017-03-01, 2017-04-01]],
1033 dtype='interval[datetime64[ns], right]')
1035 Specify ``start``, ``end``, and ``periods``; the frequency is generated
1036 automatically (linearly spaced).
1038 >>> pd.interval_range(start=0, end=6, periods=4)
1039 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
1040 dtype='interval[float64, right]')
1042 The ``closed`` parameter specifies which endpoints of the individual
1043 intervals within the ``IntervalIndex`` are closed.
1045 >>> pd.interval_range(end=5, periods=4, closed='both')
1046 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
1047 dtype='interval[int64, both]')
1048 """
1049 start = maybe_box_datetimelike(start)
1050 end = maybe_box_datetimelike(end)
1051 endpoint = start if start is not None else end
1053 if freq is None and com.any_none(periods, start, end):
1054 freq = 1 if is_number(endpoint) else "D"
1056 if com.count_not_none(start, end, periods, freq) != 3:
1057 raise ValueError(
1058 "Of the four parameters: start, end, periods, and "
1059 "freq, exactly three must be specified"
1060 )
1062 if not _is_valid_endpoint(start):
1063 raise ValueError(f"start must be numeric or datetime-like, got {start}")
1064 elif not _is_valid_endpoint(end):
1065 raise ValueError(f"end must be numeric or datetime-like, got {end}")
1067 if is_float(periods):
1068 periods = int(periods)
1069 elif not is_integer(periods) and periods is not None:
1070 raise TypeError(f"periods must be a number, got {periods}")
1072 if freq is not None and not is_number(freq):
1073 try:
1074 freq = to_offset(freq)
1075 except ValueError as err:
1076 raise ValueError(
1077 f"freq must be numeric or convertible to DateOffset, got {freq}"
1078 ) from err
1080 # verify type compatibility
1081 if not all(
1082 [
1083 _is_type_compatible(start, end),
1084 _is_type_compatible(start, freq),
1085 _is_type_compatible(end, freq),
1086 ]
1087 ):
1088 raise TypeError("start, end, freq need to be type compatible")
1090 # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
1091 if periods is not None:
1092 periods += 1
1094 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex
1096 if is_number(endpoint):
1097 # force consistency between start/end/freq (lower end if freq skips it)
1098 if com.all_not_none(start, end, freq):
1099 end -= (end - start) % freq
1101 # compute the period/start/end if unspecified (at most one)
1102 if periods is None:
1103 periods = int((end - start) // freq) + 1
1104 elif start is None:
1105 start = end - (periods - 1) * freq
1106 elif end is None:
1107 end = start + (periods - 1) * freq
1109 breaks = np.linspace(start, end, periods)
1110 if all(is_integer(x) for x in com.not_none(start, end, freq)):
1111 # np.linspace always produces float output
1113 # error: Argument 1 to "maybe_downcast_numeric" has incompatible type
1114 # "Union[ndarray[Any, Any], TimedeltaIndex, DatetimeIndex]";
1115 # expected "ndarray[Any, Any]" [
1116 breaks = maybe_downcast_numeric(
1117 breaks, # type: ignore[arg-type]
1118 np.dtype("int64"),
1119 )
1120 else:
1121 # delegate to the appropriate range function
1122 if isinstance(endpoint, Timestamp):
1123 breaks = date_range(start=start, end=end, periods=periods, freq=freq)
1124 else:
1125 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)
1127 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)