Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/interval.py: 18%
610 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3import operator
4from operator import (
5 le,
6 lt,
7)
8import textwrap
9from typing import (
10 TYPE_CHECKING,
11 Literal,
12 Sequence,
13 TypeVar,
14 Union,
15 cast,
16 overload,
17)
19import numpy as np
21from pandas._config import get_option
23from pandas._libs import lib
24from pandas._libs.interval import (
25 VALID_CLOSED,
26 Interval,
27 IntervalMixin,
28 intervals_to_interval_bounds,
29)
30from pandas._libs.missing import NA
31from pandas._typing import (
32 ArrayLike,
33 Dtype,
34 IntervalClosedType,
35 NpDtype,
36 PositionalIndexer,
37 ScalarIndexer,
38 SequenceIndexer,
39 npt,
40)
41from pandas.compat.numpy import function as nv
42from pandas.errors import IntCastingNaNError
43from pandas.util._decorators import (
44 Appender,
45 deprecate_nonkeyword_arguments,
46)
48from pandas.core.dtypes.cast import LossySetitemError
49from pandas.core.dtypes.common import (
50 is_categorical_dtype,
51 is_dtype_equal,
52 is_float_dtype,
53 is_integer_dtype,
54 is_interval_dtype,
55 is_list_like,
56 is_object_dtype,
57 is_scalar,
58 is_string_dtype,
59 needs_i8_conversion,
60 pandas_dtype,
61)
62from pandas.core.dtypes.dtypes import IntervalDtype
63from pandas.core.dtypes.generic import (
64 ABCDataFrame,
65 ABCDatetimeIndex,
66 ABCIntervalIndex,
67 ABCPeriodIndex,
68)
69from pandas.core.dtypes.missing import (
70 is_valid_na_for_dtype,
71 isna,
72 notna,
73)
75from pandas.core.algorithms import (
76 isin,
77 take,
78 unique,
79 value_counts,
80)
81from pandas.core.arrays.base import (
82 ExtensionArray,
83 _extension_array_shared_docs,
84)
85import pandas.core.common as com
86from pandas.core.construction import (
87 array as pd_array,
88 ensure_wrapped_if_datetimelike,
89 extract_array,
90)
91from pandas.core.indexers import check_array_indexer
92from pandas.core.indexes.base import ensure_index
93from pandas.core.ops import (
94 invalid_comparison,
95 unpack_zerodim_and_defer,
96)
98if TYPE_CHECKING: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true
99 from pandas import (
100 Index,
101 Series,
102 )
105IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")
106IntervalOrNA = Union[Interval, float]
108_interval_shared_docs: dict[str, str] = {}
110_shared_docs_kwargs = {
111 "klass": "IntervalArray",
112 "qualname": "arrays.IntervalArray",
113 "name": "",
114}
117_interval_shared_docs[
118 "class"
119] = """
120%(summary)s
122.. versionadded:: %(versionadded)s
124Parameters
125----------
126data : array-like (1-dimensional)
127 Array-like containing Interval objects from which to build the
128 %(klass)s.
129closed : {'left', 'right', 'both', 'neither'}, default 'right'
130 Whether the intervals are closed on the left-side, right-side, both or
131 neither.
132dtype : dtype or None, default None
133 If None, dtype will be inferred.
134copy : bool, default False
135 Copy the input data.
136%(name)s\
137verify_integrity : bool, default True
138 Verify that the %(klass)s is valid.
140Attributes
141----------
142left
143right
144closed
145mid
146length
147is_empty
148is_non_overlapping_monotonic
149%(extra_attributes)s\
151Methods
152-------
153from_arrays
154from_tuples
155from_breaks
156contains
157overlaps
158set_closed
159to_tuples
160%(extra_methods)s\
162See Also
163--------
164Index : The base pandas Index type.
165Interval : A bounded slice-like interval; the elements of an %(klass)s.
166interval_range : Function to create a fixed frequency IntervalIndex.
167cut : Bin values into discrete Intervals.
168qcut : Bin values into equal-sized Intervals based on rank or sample quantiles.
170Notes
171-----
172See the `user guide
173<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__
174for more.
176%(examples)s\
177"""
180@Appender(
181 _interval_shared_docs["class"]
182 % {
183 "klass": "IntervalArray",
184 "summary": "Pandas array for interval data that are closed on the same side.",
185 "versionadded": "0.24.0",
186 "name": "",
187 "extra_attributes": "",
188 "extra_methods": "",
189 "examples": textwrap.dedent(
190 """\
191 Examples
192 --------
193 A new ``IntervalArray`` can be constructed directly from an array-like of
194 ``Interval`` objects:
196 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
197 <IntervalArray>
198 [(0, 1], (1, 5]]
199 Length: 2, dtype: interval[int64, right]
201 It may also be constructed using one of the constructor
202 methods: :meth:`IntervalArray.from_arrays`,
203 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.
204 """
205 ),
206 }
207)
208class IntervalArray(IntervalMixin, ExtensionArray):
209 can_hold_na = True
210 _na_value = _fill_value = np.nan
212 @property
213 def ndim(self) -> Literal[1]:
214 return 1
216 # To make mypy recognize the fields
217 _left: np.ndarray
218 _right: np.ndarray
219 _dtype: IntervalDtype
221 # ---------------------------------------------------------------------
222 # Constructors
224 def __new__(
225 cls: type[IntervalArrayT],
226 data,
227 closed=None,
228 dtype: Dtype | None = None,
229 copy: bool = False,
230 verify_integrity: bool = True,
231 ):
233 data = extract_array(data, extract_numpy=True)
235 if isinstance(data, cls):
236 left = data._left
237 right = data._right
238 closed = closed or data.closed
239 else:
241 # don't allow scalars
242 if is_scalar(data):
243 msg = (
244 f"{cls.__name__}(...) must be called with a collection "
245 f"of some kind, {data} was passed"
246 )
247 raise TypeError(msg)
249 # might need to convert empty or purely na data
250 data = _maybe_convert_platform_interval(data)
251 left, right, infer_closed = intervals_to_interval_bounds(
252 data, validate_closed=closed is None
253 )
254 if left.dtype == object:
255 left = lib.maybe_convert_objects(left)
256 right = lib.maybe_convert_objects(right)
257 closed = closed or infer_closed
259 return cls._simple_new(
260 left,
261 right,
262 closed,
263 copy=copy,
264 dtype=dtype,
265 verify_integrity=verify_integrity,
266 )
268 @classmethod
269 def _simple_new(
270 cls: type[IntervalArrayT],
271 left,
272 right,
273 closed: IntervalClosedType | None = None,
274 copy: bool = False,
275 dtype: Dtype | None = None,
276 verify_integrity: bool = True,
277 ) -> IntervalArrayT:
278 result = IntervalMixin.__new__(cls)
280 if closed is None and isinstance(dtype, IntervalDtype):
281 closed = dtype.closed
283 closed = closed or "right"
284 left = ensure_index(left, copy=copy)
285 right = ensure_index(right, copy=copy)
287 if dtype is not None:
288 # GH 19262: dtype must be an IntervalDtype to override inferred
289 dtype = pandas_dtype(dtype)
290 if is_interval_dtype(dtype):
291 dtype = cast(IntervalDtype, dtype)
292 if dtype.subtype is not None:
293 left = left.astype(dtype.subtype)
294 right = right.astype(dtype.subtype)
295 else:
296 msg = f"dtype must be an IntervalDtype, got {dtype}"
297 raise TypeError(msg)
299 if dtype.closed is None:
300 # possibly loading an old pickle
301 dtype = IntervalDtype(dtype.subtype, closed)
302 elif closed != dtype.closed:
303 raise ValueError("closed keyword does not match dtype.closed")
305 # coerce dtypes to match if needed
306 if is_float_dtype(left) and is_integer_dtype(right):
307 right = right.astype(left.dtype)
308 elif is_float_dtype(right) and is_integer_dtype(left):
309 left = left.astype(right.dtype)
311 if type(left) != type(right):
312 msg = (
313 f"must not have differing left [{type(left).__name__}] and "
314 f"right [{type(right).__name__}] types"
315 )
316 raise ValueError(msg)
317 elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
318 # GH 19016
319 msg = (
320 "category, object, and string subtypes are not supported "
321 "for IntervalArray"
322 )
323 raise TypeError(msg)
324 elif isinstance(left, ABCPeriodIndex):
325 msg = "Period dtypes are not supported, use a PeriodIndex instead"
326 raise ValueError(msg)
327 elif isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz):
328 msg = (
329 "left and right must have the same time zone, got "
330 f"'{left.tz}' and '{right.tz}'"
331 )
332 raise ValueError(msg)
334 # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray
335 left = ensure_wrapped_if_datetimelike(left)
336 left = extract_array(left, extract_numpy=True)
337 right = ensure_wrapped_if_datetimelike(right)
338 right = extract_array(right, extract_numpy=True)
340 lbase = getattr(left, "_ndarray", left).base
341 rbase = getattr(right, "_ndarray", right).base
342 if lbase is not None and lbase is rbase:
343 # If these share data, then setitem could corrupt our IA
344 right = right.copy()
346 dtype = IntervalDtype(left.dtype, closed=closed)
347 result._dtype = dtype
349 result._left = left
350 result._right = right
351 if verify_integrity:
352 result._validate()
353 return result
355 @classmethod
356 def _from_sequence(
357 cls: type[IntervalArrayT],
358 scalars,
359 *,
360 dtype: Dtype | None = None,
361 copy: bool = False,
362 ) -> IntervalArrayT:
363 return cls(scalars, dtype=dtype, copy=copy)
365 @classmethod
366 def _from_factorized(
367 cls: type[IntervalArrayT], values: np.ndarray, original: IntervalArrayT
368 ) -> IntervalArrayT:
369 if len(values) == 0:
370 # An empty array returns object-dtype here. We can't create
371 # a new IA from an (empty) object-dtype array, so turn it into the
372 # correct dtype.
373 values = values.astype(original.dtype.subtype)
374 return cls(values, closed=original.closed)
376 _interval_shared_docs["from_breaks"] = textwrap.dedent(
377 """
378 Construct an %(klass)s from an array of splits.
380 Parameters
381 ----------
382 breaks : array-like (1-dimensional)
383 Left and right bounds for each interval.
384 closed : {'left', 'right', 'both', 'neither'}, default 'right'
385 Whether the intervals are closed on the left-side, right-side, both
386 or neither.
387 copy : bool, default False
388 Copy the data.
389 dtype : dtype or None, default None
390 If None, dtype will be inferred.
392 Returns
393 -------
394 %(klass)s
396 See Also
397 --------
398 interval_range : Function to create a fixed frequency IntervalIndex.
399 %(klass)s.from_arrays : Construct from a left and right array.
400 %(klass)s.from_tuples : Construct from a sequence of tuples.
402 %(examples)s\
403 """
404 )
406 @classmethod
407 @Appender(
408 _interval_shared_docs["from_breaks"]
409 % {
410 "klass": "IntervalArray",
411 "examples": textwrap.dedent(
412 """\
413 Examples
414 --------
415 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])
416 <IntervalArray>
417 [(0, 1], (1, 2], (2, 3]]
418 Length: 3, dtype: interval[int64, right]
419 """
420 ),
421 }
422 )
423 def from_breaks(
424 cls: type[IntervalArrayT],
425 breaks,
426 closed: IntervalClosedType | None = "right",
427 copy: bool = False,
428 dtype: Dtype | None = None,
429 ) -> IntervalArrayT:
430 breaks = _maybe_convert_platform_interval(breaks)
432 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype)
434 _interval_shared_docs["from_arrays"] = textwrap.dedent(
435 """
436 Construct from two arrays defining the left and right bounds.
438 Parameters
439 ----------
440 left : array-like (1-dimensional)
441 Left bounds for each interval.
442 right : array-like (1-dimensional)
443 Right bounds for each interval.
444 closed : {'left', 'right', 'both', 'neither'}, default 'right'
445 Whether the intervals are closed on the left-side, right-side, both
446 or neither.
447 copy : bool, default False
448 Copy the data.
449 dtype : dtype, optional
450 If None, dtype will be inferred.
452 Returns
453 -------
454 %(klass)s
456 Raises
457 ------
458 ValueError
459 When a value is missing in only one of `left` or `right`.
460 When a value in `left` is greater than the corresponding value
461 in `right`.
463 See Also
464 --------
465 interval_range : Function to create a fixed frequency IntervalIndex.
466 %(klass)s.from_breaks : Construct an %(klass)s from an array of
467 splits.
468 %(klass)s.from_tuples : Construct an %(klass)s from an
469 array-like of tuples.
471 Notes
472 -----
473 Each element of `left` must be less than or equal to the `right`
474 element at the same position. If an element is missing, it must be
475 missing in both `left` and `right`. A TypeError is raised when
476 using an unsupported type for `left` or `right`. At the moment,
477 'category', 'object', and 'string' subtypes are not supported.
479 %(examples)s\
480 """
481 )
483 @classmethod
484 @Appender(
485 _interval_shared_docs["from_arrays"]
486 % {
487 "klass": "IntervalArray",
488 "examples": textwrap.dedent(
489 """\
490 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])
491 <IntervalArray>
492 [(0, 1], (1, 2], (2, 3]]
493 Length: 3, dtype: interval[int64, right]
494 """
495 ),
496 }
497 )
498 def from_arrays(
499 cls: type[IntervalArrayT],
500 left,
501 right,
502 closed: IntervalClosedType | None = "right",
503 copy: bool = False,
504 dtype: Dtype | None = None,
505 ) -> IntervalArrayT:
506 left = _maybe_convert_platform_interval(left)
507 right = _maybe_convert_platform_interval(right)
509 return cls._simple_new(
510 left, right, closed, copy=copy, dtype=dtype, verify_integrity=True
511 )
513 _interval_shared_docs["from_tuples"] = textwrap.dedent(
514 """
515 Construct an %(klass)s from an array-like of tuples.
517 Parameters
518 ----------
519 data : array-like (1-dimensional)
520 Array of tuples.
521 closed : {'left', 'right', 'both', 'neither'}, default 'right'
522 Whether the intervals are closed on the left-side, right-side, both
523 or neither.
524 copy : bool, default False
525 By-default copy the data, this is compat only and ignored.
526 dtype : dtype or None, default None
527 If None, dtype will be inferred.
529 Returns
530 -------
531 %(klass)s
533 See Also
534 --------
535 interval_range : Function to create a fixed frequency IntervalIndex.
536 %(klass)s.from_arrays : Construct an %(klass)s from a left and
537 right array.
538 %(klass)s.from_breaks : Construct an %(klass)s from an array of
539 splits.
541 %(examples)s\
542 """
543 )
545 @classmethod
546 @Appender(
547 _interval_shared_docs["from_tuples"]
548 % {
549 "klass": "IntervalArray",
550 "examples": textwrap.dedent(
551 """\
552 Examples
553 --------
554 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
555 <IntervalArray>
556 [(0, 1], (1, 2]]
557 Length: 2, dtype: interval[int64, right]
558 """
559 ),
560 }
561 )
562 def from_tuples(
563 cls: type[IntervalArrayT],
564 data,
565 closed="right",
566 copy: bool = False,
567 dtype: Dtype | None = None,
568 ) -> IntervalArrayT:
569 if len(data):
570 left, right = [], []
571 else:
572 # ensure that empty data keeps input dtype
573 left = right = data
575 for d in data:
576 if isna(d):
577 lhs = rhs = np.nan
578 else:
579 name = cls.__name__
580 try:
581 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...]
582 lhs, rhs = d
583 except ValueError as err:
584 msg = f"{name}.from_tuples requires tuples of length 2, got {d}"
585 raise ValueError(msg) from err
586 except TypeError as err:
587 msg = f"{name}.from_tuples received an invalid item, {d}"
588 raise TypeError(msg) from err
589 left.append(lhs)
590 right.append(rhs)
592 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)
594 def _validate(self):
595 """
596 Verify that the IntervalArray is valid.
598 Checks that
600 * closed is valid
601 * left and right match lengths
602 * left and right have the same missing values
603 * left is always below right
604 """
605 if self.closed not in VALID_CLOSED:
606 msg = f"invalid option for 'closed': {self.closed}"
607 raise ValueError(msg)
608 if len(self._left) != len(self._right):
609 msg = "left and right must have the same length"
610 raise ValueError(msg)
611 left_mask = notna(self._left)
612 right_mask = notna(self._right)
613 if not (left_mask == right_mask).all():
614 msg = (
615 "missing values must be missing in the same "
616 "location both left and right sides"
617 )
618 raise ValueError(msg)
619 if not (self._left[left_mask] <= self._right[left_mask]).all():
620 msg = "left side of interval must be <= right side"
621 raise ValueError(msg)
623 def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT:
624 """
625 Return a new IntervalArray with the replacement attributes
627 Parameters
628 ----------
629 left : Index
630 Values to be used for the left-side of the intervals.
631 right : Index
632 Values to be used for the right-side of the intervals.
633 """
634 return self._simple_new(left, right, closed=self.closed, verify_integrity=False)
636 # ---------------------------------------------------------------------
637 # Descriptive
639 @property
640 def dtype(self) -> IntervalDtype:
641 return self._dtype
643 @property
644 def nbytes(self) -> int:
645 return self.left.nbytes + self.right.nbytes
647 @property
648 def size(self) -> int:
649 # Avoid materializing self.values
650 return self.left.size
652 # ---------------------------------------------------------------------
653 # EA Interface
655 def __iter__(self):
656 return iter(np.asarray(self))
658 def __len__(self) -> int:
659 return len(self._left)
661 @overload
662 def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA:
663 ...
665 @overload
666 def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT:
667 ...
669 def __getitem__(
670 self: IntervalArrayT, key: PositionalIndexer
671 ) -> IntervalArrayT | IntervalOrNA:
672 key = check_array_indexer(self, key)
673 left = self._left[key]
674 right = self._right[key]
676 if not isinstance(left, (np.ndarray, ExtensionArray)):
677 # scalar
678 if is_scalar(left) and isna(left):
679 return self._fill_value
680 return Interval(left, right, self.closed)
681 if np.ndim(left) > 1:
682 # GH#30588 multi-dimensional indexer disallowed
683 raise ValueError("multi-dimensional indexing not allowed")
684 return self._shallow_copy(left, right)
686 def __setitem__(self, key, value) -> None:
687 value_left, value_right = self._validate_setitem_value(value)
688 key = check_array_indexer(self, key)
690 self._left[key] = value_left
691 self._right[key] = value_right
693 def _cmp_method(self, other, op):
694 # ensure pandas array for list-like and eliminate non-interval scalars
695 if is_list_like(other):
696 if len(self) != len(other):
697 raise ValueError("Lengths must match to compare")
698 other = pd_array(other)
699 elif not isinstance(other, Interval):
700 # non-interval scalar -> no matches
701 if other is NA:
702 # GH#31882
703 from pandas.core.arrays import BooleanArray
705 arr = np.empty(self.shape, dtype=bool)
706 mask = np.ones(self.shape, dtype=bool)
707 return BooleanArray(arr, mask)
708 return invalid_comparison(self, other, op)
710 # determine the dtype of the elements we want to compare
711 if isinstance(other, Interval):
712 other_dtype = pandas_dtype("interval")
713 elif not is_categorical_dtype(other.dtype):
714 other_dtype = other.dtype
715 else:
716 # for categorical defer to categories for dtype
717 other_dtype = other.categories.dtype
719 # extract intervals if we have interval categories with matching closed
720 if is_interval_dtype(other_dtype):
721 if self.closed != other.categories.closed:
722 return invalid_comparison(self, other, op)
724 other = other.categories.take(
725 other.codes, allow_fill=True, fill_value=other.categories._na_value
726 )
728 # interval-like -> need same closed and matching endpoints
729 if is_interval_dtype(other_dtype):
730 if self.closed != other.closed:
731 return invalid_comparison(self, other, op)
732 elif not isinstance(other, Interval):
733 other = type(self)(other)
735 if op is operator.eq:
736 return (self._left == other.left) & (self._right == other.right)
737 elif op is operator.ne:
738 return (self._left != other.left) | (self._right != other.right)
739 elif op is operator.gt:
740 return (self._left > other.left) | (
741 (self._left == other.left) & (self._right > other.right)
742 )
743 elif op is operator.ge:
744 return (self == other) | (self > other)
745 elif op is operator.lt:
746 return (self._left < other.left) | (
747 (self._left == other.left) & (self._right < other.right)
748 )
749 else:
750 # operator.lt
751 return (self == other) | (self < other)
753 # non-interval/non-object dtype -> no matches
754 if not is_object_dtype(other_dtype):
755 return invalid_comparison(self, other, op)
757 # object dtype -> iteratively check for intervals
758 result = np.zeros(len(self), dtype=bool)
759 for i, obj in enumerate(other):
760 try:
761 result[i] = op(self[i], obj)
762 except TypeError:
763 if obj is NA:
764 # comparison with np.nan returns NA
765 # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092
766 result = result.astype(object)
767 result[i] = NA
768 else:
769 raise
770 return result
772 @unpack_zerodim_and_defer("__eq__")
773 def __eq__(self, other):
774 return self._cmp_method(other, operator.eq)
776 @unpack_zerodim_and_defer("__ne__")
777 def __ne__(self, other):
778 return self._cmp_method(other, operator.ne)
780 @unpack_zerodim_and_defer("__gt__")
781 def __gt__(self, other):
782 return self._cmp_method(other, operator.gt)
784 @unpack_zerodim_and_defer("__ge__")
785 def __ge__(self, other):
786 return self._cmp_method(other, operator.ge)
788 @unpack_zerodim_and_defer("__lt__")
789 def __lt__(self, other):
790 return self._cmp_method(other, operator.lt)
792 @unpack_zerodim_and_defer("__le__")
793 def __le__(self, other):
794 return self._cmp_method(other, operator.le)
796 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
797 def argsort(
798 self,
799 ascending: bool = True,
800 kind: str = "quicksort",
801 na_position: str = "last",
802 *args,
803 **kwargs,
804 ) -> np.ndarray:
805 ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
807 if ascending and kind == "quicksort" and na_position == "last":
808 return np.lexsort((self.right, self.left))
810 # TODO: other cases we can use lexsort for? much more performant.
811 return super().argsort(
812 ascending=ascending, kind=kind, na_position=na_position, **kwargs
813 )
815 def min(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA:
816 nv.validate_minmax_axis(axis, self.ndim)
818 if not len(self):
819 return self._na_value
821 mask = self.isna()
822 if mask.any():
823 if not skipna:
824 return self._na_value
825 obj = self[~mask]
826 else:
827 obj = self
829 indexer = obj.argsort()[0]
830 return obj[indexer]
832 def max(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA:
833 nv.validate_minmax_axis(axis, self.ndim)
835 if not len(self):
836 return self._na_value
838 mask = self.isna()
839 if mask.any():
840 if not skipna:
841 return self._na_value
842 obj = self[~mask]
843 else:
844 obj = self
846 indexer = obj.argsort()[-1]
847 return obj[indexer]
849 def fillna(
850 self: IntervalArrayT, value=None, method=None, limit=None
851 ) -> IntervalArrayT:
852 """
853 Fill NA/NaN values using the specified method.
855 Parameters
856 ----------
857 value : scalar, dict, Series
858 If a scalar value is passed it is used to fill all missing values.
859 Alternatively, a Series or dict can be used to fill in different
860 values for each index. The value should not be a list. The
861 value(s) passed should be either Interval objects or NA/NaN.
862 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
863 (Not implemented yet for IntervalArray)
864 Method to use for filling holes in reindexed Series
865 limit : int, default None
866 (Not implemented yet for IntervalArray)
867 If method is specified, this is the maximum number of consecutive
868 NaN values to forward/backward fill. In other words, if there is
869 a gap with more than this number of consecutive NaNs, it will only
870 be partially filled. If method is not specified, this is the
871 maximum number of entries along the entire axis where NaNs will be
872 filled.
874 Returns
875 -------
876 filled : IntervalArray with NA/NaN filled
877 """
878 if method is not None:
879 raise TypeError("Filling by method is not supported for IntervalArray.")
880 if limit is not None:
881 raise TypeError("limit is not supported for IntervalArray.")
883 value_left, value_right = self._validate_scalar(value)
885 left = self.left.fillna(value=value_left)
886 right = self.right.fillna(value=value_right)
887 return self._shallow_copy(left, right)
889 def astype(self, dtype, copy: bool = True):
890 """
891 Cast to an ExtensionArray or NumPy array with dtype 'dtype'.
893 Parameters
894 ----------
895 dtype : str or dtype
896 Typecode or data-type to which the array is cast.
898 copy : bool, default True
899 Whether to copy the data, even if not necessary. If False,
900 a copy is made only if the old dtype does not match the
901 new dtype.
903 Returns
904 -------
905 array : ExtensionArray or ndarray
906 ExtensionArray or NumPy ndarray with 'dtype' for its dtype.
907 """
908 from pandas import Index
910 if dtype is not None:
911 dtype = pandas_dtype(dtype)
913 if is_interval_dtype(dtype):
914 if dtype == self.dtype:
915 return self.copy() if copy else self
917 # need to cast to different subtype
918 try:
919 # We need to use Index rules for astype to prevent casting
920 # np.nan entries to int subtypes
921 new_left = Index(self._left, copy=False).astype(dtype.subtype)
922 new_right = Index(self._right, copy=False).astype(dtype.subtype)
923 except IntCastingNaNError:
924 # e.g test_subtype_integer
925 raise
926 except (TypeError, ValueError) as err:
927 # e.g. test_subtype_integer_errors f8->u8 can be lossy
928 # and raises ValueError
929 msg = (
930 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
931 )
932 raise TypeError(msg) from err
933 return self._shallow_copy(new_left, new_right)
934 else:
935 try:
936 return super().astype(dtype, copy=copy)
937 except (TypeError, ValueError) as err:
938 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
939 raise TypeError(msg) from err
941 def equals(self, other) -> bool:
942 if type(self) != type(other):
943 return False
945 return bool(
946 self.closed == other.closed
947 and self.left.equals(other.left)
948 and self.right.equals(other.right)
949 )
951 @classmethod
952 def _concat_same_type(
953 cls: type[IntervalArrayT], to_concat: Sequence[IntervalArrayT]
954 ) -> IntervalArrayT:
955 """
956 Concatenate multiple IntervalArray
958 Parameters
959 ----------
960 to_concat : sequence of IntervalArray
962 Returns
963 -------
964 IntervalArray
965 """
966 closed_set = {interval.closed for interval in to_concat}
967 if len(closed_set) != 1:
968 raise ValueError("Intervals must all be closed on the same side.")
969 closed = closed_set.pop()
971 left = np.concatenate([interval.left for interval in to_concat])
972 right = np.concatenate([interval.right for interval in to_concat])
973 return cls._simple_new(left, right, closed=closed, copy=False)
975 def copy(self: IntervalArrayT) -> IntervalArrayT:
976 """
977 Return a copy of the array.
979 Returns
980 -------
981 IntervalArray
982 """
983 left = self._left.copy()
984 right = self._right.copy()
985 closed = self.closed
986 # TODO: Could skip verify_integrity here.
987 return type(self).from_arrays(left, right, closed=closed)
989 def isna(self) -> np.ndarray:
990 return isna(self._left)
992 def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:
993 if not len(self) or periods == 0:
994 return self.copy()
996 if isna(fill_value):
997 fill_value = self.dtype.na_value
999 # ExtensionArray.shift doesn't work for two reasons
1000 # 1. IntervalArray.dtype.na_value may not be correct for the dtype.
1001 # 2. IntervalArray._from_sequence only accepts NaN for missing values,
1002 # not other values like NaT
1004 empty_len = min(abs(periods), len(self))
1005 if isna(fill_value):
1006 from pandas import Index
1008 fill_value = Index(self._left, copy=False)._na_value
1009 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
1010 else:
1011 empty = self._from_sequence([fill_value] * empty_len)
1013 if periods > 0:
1014 a = empty
1015 b = self[:-periods]
1016 else:
1017 a = self[abs(periods) :]
1018 b = empty
1019 return self._concat_same_type([a, b])
1021 def take(
1022 self: IntervalArrayT,
1023 indices,
1024 *,
1025 allow_fill: bool = False,
1026 fill_value=None,
1027 axis=None,
1028 **kwargs,
1029 ) -> IntervalArrayT:
1030 """
1031 Take elements from the IntervalArray.
1033 Parameters
1034 ----------
1035 indices : sequence of integers
1036 Indices to be taken.
1038 allow_fill : bool, default False
1039 How to handle negative values in `indices`.
1041 * False: negative values in `indices` indicate positional indices
1042 from the right (the default). This is similar to
1043 :func:`numpy.take`.
1045 * True: negative values in `indices` indicate
1046 missing values. These values are set to `fill_value`. Any other
1047 other negative values raise a ``ValueError``.
1049 fill_value : Interval or NA, optional
1050 Fill value to use for NA-indices when `allow_fill` is True.
1051 This may be ``None``, in which case the default NA value for
1052 the type, ``self.dtype.na_value``, is used.
1054 For many ExtensionArrays, there will be two representations of
1055 `fill_value`: a user-facing "boxed" scalar, and a low-level
1056 physical NA value. `fill_value` should be the user-facing version,
1057 and the implementation should handle translating that to the
1058 physical version for processing the take if necessary.
1060 axis : any, default None
1061 Present for compat with IntervalIndex; does nothing.
1063 Returns
1064 -------
1065 IntervalArray
1067 Raises
1068 ------
1069 IndexError
1070 When the indices are out of bounds for the array.
1071 ValueError
1072 When `indices` contains negative values other than ``-1``
1073 and `allow_fill` is True.
1074 """
1075 nv.validate_take((), kwargs)
1077 fill_left = fill_right = fill_value
1078 if allow_fill:
1079 fill_left, fill_right = self._validate_scalar(fill_value)
1081 left_take = take(
1082 self._left, indices, allow_fill=allow_fill, fill_value=fill_left
1083 )
1084 right_take = take(
1085 self._right, indices, allow_fill=allow_fill, fill_value=fill_right
1086 )
1088 return self._shallow_copy(left_take, right_take)
1090 def _validate_listlike(self, value):
1091 # list-like of intervals
1092 try:
1093 array = IntervalArray(value)
1094 self._check_closed_matches(array, name="value")
1095 value_left, value_right = array.left, array.right
1096 except TypeError as err:
1097 # wrong type: not interval or NA
1098 msg = f"'value' should be an interval type, got {type(value)} instead."
1099 raise TypeError(msg) from err
1101 try:
1102 self.left._validate_fill_value(value_left)
1103 except (LossySetitemError, TypeError) as err:
1104 msg = (
1105 "'value' should be a compatible interval type, "
1106 f"got {type(value)} instead."
1107 )
1108 raise TypeError(msg) from err
1110 return value_left, value_right
1112 def _validate_scalar(self, value):
1113 if isinstance(value, Interval):
1114 self._check_closed_matches(value, name="value")
1115 left, right = value.left, value.right
1116 # TODO: check subdtype match like _validate_setitem_value?
1117 elif is_valid_na_for_dtype(value, self.left.dtype):
1118 # GH#18295
1119 left = right = self.left._na_value
1120 else:
1121 raise TypeError(
1122 "can only insert Interval objects and NA into an IntervalArray"
1123 )
1124 return left, right
1126 def _validate_setitem_value(self, value):
1128 if is_valid_na_for_dtype(value, self.left.dtype):
1129 # na value: need special casing to set directly on numpy arrays
1130 value = self.left._na_value
1131 if is_integer_dtype(self.dtype.subtype):
1132 # can't set NaN on a numpy integer array
1133 # GH#45484 TypeError, not ValueError, matches what we get with
1134 # non-NA un-holdable value.
1135 raise TypeError("Cannot set float NaN to integer-backed IntervalArray")
1136 value_left, value_right = value, value
1138 elif isinstance(value, Interval):
1139 # scalar interval
1140 self._check_closed_matches(value, name="value")
1141 value_left, value_right = value.left, value.right
1142 self.left._validate_fill_value(value_left)
1143 self.left._validate_fill_value(value_right)
1145 else:
1146 return self._validate_listlike(value)
1148 return value_left, value_right
1150 def value_counts(self, dropna: bool = True) -> Series:
1151 """
1152 Returns a Series containing counts of each interval.
1154 Parameters
1155 ----------
1156 dropna : bool, default True
1157 Don't include counts of NaN.
1159 Returns
1160 -------
1161 counts : Series
1163 See Also
1164 --------
1165 Series.value_counts
1166 """
1167 # TODO: implement this is a non-naive way!
1168 return value_counts(np.asarray(self), dropna=dropna)
1170 # ---------------------------------------------------------------------
1171 # Rendering Methods
1173 def _format_data(self) -> str:
1175 # TODO: integrate with categorical and make generic
1176 # name argument is unused here; just for compat with base / categorical
1177 n = len(self)
1178 max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10)
1180 formatter = str
1182 if n == 0:
1183 summary = "[]"
1184 elif n == 1:
1185 first = formatter(self[0])
1186 summary = f"[{first}]"
1187 elif n == 2:
1188 first = formatter(self[0])
1189 last = formatter(self[-1])
1190 summary = f"[{first}, {last}]"
1191 else:
1193 if n > max_seq_items:
1194 n = min(max_seq_items // 2, 10)
1195 head = [formatter(x) for x in self[:n]]
1196 tail = [formatter(x) for x in self[-n:]]
1197 head_str = ", ".join(head)
1198 tail_str = ", ".join(tail)
1199 summary = f"[{head_str} ... {tail_str}]"
1200 else:
1201 tail = [formatter(x) for x in self]
1202 tail_str = ", ".join(tail)
1203 summary = f"[{tail_str}]"
1205 return summary
1207 def __repr__(self) -> str:
1208 # the short repr has no trailing newline, while the truncated
1209 # repr does. So we include a newline in our template, and strip
1210 # any trailing newlines from format_object_summary
1211 data = self._format_data()
1212 class_name = f"<{type(self).__name__}>\n"
1214 template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
1215 return template
1217 def _format_space(self) -> str:
1218 space = " " * (len(type(self).__name__) + 1)
1219 return f"\n{space}"
1221 # ---------------------------------------------------------------------
1222 # Vectorized Interval Properties/Attributes
1224 @property
1225 def left(self):
1226 """
1227 Return the left endpoints of each Interval in the IntervalArray as an Index.
1228 """
1229 from pandas import Index
1231 return Index(self._left, copy=False)
1233 @property
1234 def right(self):
1235 """
1236 Return the right endpoints of each Interval in the IntervalArray as an Index.
1237 """
1238 from pandas import Index
1240 return Index(self._right, copy=False)
1242 @property
1243 def length(self) -> Index:
1244 """
1245 Return an Index with entries denoting the length of each Interval.
1246 """
1247 return self.right - self.left
1249 @property
1250 def mid(self) -> Index:
1251 """
1252 Return the midpoint of each Interval in the IntervalArray as an Index.
1253 """
1254 try:
1255 return 0.5 * (self.left + self.right)
1256 except TypeError:
1257 # datetime safe version
1258 return self.left + 0.5 * self.length
1260 _interval_shared_docs["overlaps"] = textwrap.dedent(
1261 """
1262 Check elementwise if an Interval overlaps the values in the %(klass)s.
1264 Two intervals overlap if they share a common point, including closed
1265 endpoints. Intervals that only have an open endpoint in common do not
1266 overlap.
1268 Parameters
1269 ----------
1270 other : %(klass)s
1271 Interval to check against for an overlap.
1273 Returns
1274 -------
1275 ndarray
1276 Boolean array positionally indicating where an overlap occurs.
1278 See Also
1279 --------
1280 Interval.overlaps : Check whether two Interval objects overlap.
1282 Examples
1283 --------
1284 %(examples)s
1285 >>> intervals.overlaps(pd.Interval(0.5, 1.5))
1286 array([ True, True, False])
1288 Intervals that share closed endpoints overlap:
1290 >>> intervals.overlaps(pd.Interval(1, 3, closed='left'))
1291 array([ True, True, True])
1293 Intervals that only have an open endpoint in common do not overlap:
1295 >>> intervals.overlaps(pd.Interval(1, 2, closed='right'))
1296 array([False, True, False])
1297 """
1298 )
1300 @Appender(
1301 _interval_shared_docs["overlaps"]
1302 % {
1303 "klass": "IntervalArray",
1304 "examples": textwrap.dedent(
1305 """\
1306 >>> data = [(0, 1), (1, 3), (2, 4)]
1307 >>> intervals = pd.arrays.IntervalArray.from_tuples(data)
1308 >>> intervals
1309 <IntervalArray>
1310 [(0, 1], (1, 3], (2, 4]]
1311 Length: 3, dtype: interval[int64, right]
1312 """
1313 ),
1314 }
1315 )
1316 def overlaps(self, other):
1317 if isinstance(other, (IntervalArray, ABCIntervalIndex)):
1318 raise NotImplementedError
1319 elif not isinstance(other, Interval):
1320 msg = f"`other` must be Interval-like, got {type(other).__name__}"
1321 raise TypeError(msg)
1323 # equality is okay if both endpoints are closed (overlap at a point)
1324 op1 = le if (self.closed_left and other.closed_right) else lt
1325 op2 = le if (other.closed_left and self.closed_right) else lt
1327 # overlaps is equivalent negation of two interval being disjoint:
1328 # disjoint = (A.left > B.right) or (B.left > A.right)
1329 # (simplifying the negation allows this to be done in less operations)
1330 return op1(self.left, other.right) & op2(other.left, self.right)
1332 # ---------------------------------------------------------------------
1334 @property
1335 def closed(self) -> IntervalClosedType:
1336 """
1337 String describing the inclusive side the intervals.
1339 Either ``left``, ``right``, ``both`` or ``neither``.
1340 """
1341 return self.dtype.closed
1343 _interval_shared_docs["set_closed"] = textwrap.dedent(
1344 """
1345 Return an identical %(klass)s closed on the specified side.
1347 Parameters
1348 ----------
1349 closed : {'left', 'right', 'both', 'neither'}
1350 Whether the intervals are closed on the left-side, right-side, both
1351 or neither.
1353 Returns
1354 -------
1355 new_index : %(klass)s
1357 %(examples)s\
1358 """
1359 )
1361 @Appender(
1362 _interval_shared_docs["set_closed"]
1363 % {
1364 "klass": "IntervalArray",
1365 "examples": textwrap.dedent(
1366 """\
1367 Examples
1368 --------
1369 >>> index = pd.arrays.IntervalArray.from_breaks(range(4))
1370 >>> index
1371 <IntervalArray>
1372 [(0, 1], (1, 2], (2, 3]]
1373 Length: 3, dtype: interval[int64, right]
1374 >>> index.set_closed('both')
1375 <IntervalArray>
1376 [[0, 1], [1, 2], [2, 3]]
1377 Length: 3, dtype: interval[int64, both]
1378 """
1379 ),
1380 }
1381 )
1382 def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT:
1383 if closed not in VALID_CLOSED:
1384 msg = f"invalid option for 'closed': {closed}"
1385 raise ValueError(msg)
1387 return type(self)._simple_new(
1388 left=self._left, right=self._right, closed=closed, verify_integrity=False
1389 )
1391 _interval_shared_docs[
1392 "is_non_overlapping_monotonic"
1393 ] = """
1394 Return a boolean whether the %(klass)s is non-overlapping and monotonic.
1396 Non-overlapping means (no Intervals share points), and monotonic means
1397 either monotonic increasing or monotonic decreasing.
1398 """
1400 # https://github.com/python/mypy/issues/1362
1401 # Mypy does not support decorated properties
1402 @property # type: ignore[misc]
1403 @Appender(
1404 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs
1405 )
1406 def is_non_overlapping_monotonic(self) -> bool:
1407 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... )
1408 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)
1409 # we already require left <= right
1411 # strict inequality for closed == 'both'; equality implies overlapping
1412 # at a point when both sides of intervals are included
1413 if self.closed == "both":
1414 return bool(
1415 (self._right[:-1] < self._left[1:]).all()
1416 or (self._left[:-1] > self._right[1:]).all()
1417 )
1419 # non-strict inequality when closed != 'both'; at least one side is
1420 # not included in the intervals, so equality does not imply overlapping
1421 return bool(
1422 (self._right[:-1] <= self._left[1:]).all()
1423 or (self._left[:-1] >= self._right[1:]).all()
1424 )
1426 # ---------------------------------------------------------------------
1427 # Conversion
1429 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
1430 """
1431 Return the IntervalArray's data as a numpy array of Interval
1432 objects (with dtype='object')
1433 """
1434 left = self._left
1435 right = self._right
1436 mask = self.isna()
1437 closed = self.closed
1439 result = np.empty(len(left), dtype=object)
1440 for i in range(len(left)):
1441 if mask[i]:
1442 result[i] = np.nan
1443 else:
1444 result[i] = Interval(left[i], right[i], closed)
1445 return result
1447 def __arrow_array__(self, type=None):
1448 """
1449 Convert myself into a pyarrow Array.
1450 """
1451 import pyarrow
1453 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
1455 try:
1456 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype)
1457 except TypeError as err:
1458 raise TypeError(
1459 f"Conversion to arrow with subtype '{self.dtype.subtype}' "
1460 "is not supported"
1461 ) from err
1462 interval_type = ArrowIntervalType(subtype, self.closed)
1463 storage_array = pyarrow.StructArray.from_arrays(
1464 [
1465 pyarrow.array(self._left, type=subtype, from_pandas=True),
1466 pyarrow.array(self._right, type=subtype, from_pandas=True),
1467 ],
1468 names=["left", "right"],
1469 )
1470 mask = self.isna()
1471 if mask.any():
1472 # if there are missing values, set validity bitmap also on the array level
1473 null_bitmap = pyarrow.array(~mask).buffers()[1]
1474 storage_array = pyarrow.StructArray.from_buffers(
1475 storage_array.type,
1476 len(storage_array),
1477 [null_bitmap],
1478 children=[storage_array.field(0), storage_array.field(1)],
1479 )
1481 if type is not None:
1482 if type.equals(interval_type.storage_type):
1483 return storage_array
1484 elif isinstance(type, ArrowIntervalType):
1485 # ensure we have the same subtype and closed attributes
1486 if not type.equals(interval_type):
1487 raise TypeError(
1488 "Not supported to convert IntervalArray to type with "
1489 f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) "
1490 f"and 'closed' ({self.closed} vs {type.closed}) attributes"
1491 )
1492 else:
1493 raise TypeError(
1494 f"Not supported to convert IntervalArray to '{type}' type"
1495 )
1497 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array)
1499 _interval_shared_docs[
1500 "to_tuples"
1501 ] = """
1502 Return an %(return_type)s of tuples of the form (left, right).
1504 Parameters
1505 ----------
1506 na_tuple : bool, default True
1507 Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA
1508 value itself if False, ``nan``.
1510 Returns
1511 -------
1512 tuples: %(return_type)s
1513 %(examples)s\
1514 """
1516 @Appender(
1517 _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""}
1518 )
1519 def to_tuples(self, na_tuple=True) -> np.ndarray:
1520 tuples = com.asarray_tuplesafe(zip(self._left, self._right))
1521 if not na_tuple:
1522 # GH 18756
1523 tuples = np.where(~self.isna(), tuples, np.nan)
1524 return tuples
1526 # ---------------------------------------------------------------------
1528 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
1529 value_left, value_right = self._validate_setitem_value(value)
1531 if isinstance(self._left, np.ndarray):
1532 np.putmask(self._left, mask, value_left)
1533 np.putmask(self._right, mask, value_right)
1534 else:
1535 self._left._putmask(mask, value_left)
1536 self._right._putmask(mask, value_right)
1538 def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT:
1539 """
1540 Return a new IntervalArray inserting new item at location. Follows
1541 Python numpy.insert semantics for negative values. Only Interval
1542 objects and NA can be inserted into an IntervalIndex
1544 Parameters
1545 ----------
1546 loc : int
1547 item : Interval
1549 Returns
1550 -------
1551 IntervalArray
1552 """
1553 left_insert, right_insert = self._validate_scalar(item)
1555 new_left = self.left.insert(loc, left_insert)
1556 new_right = self.right.insert(loc, right_insert)
1558 return self._shallow_copy(new_left, new_right)
1560 def delete(self: IntervalArrayT, loc) -> IntervalArrayT:
1561 if isinstance(self._left, np.ndarray):
1562 new_left = np.delete(self._left, loc)
1563 new_right = np.delete(self._right, loc)
1564 else:
1565 new_left = self._left.delete(loc)
1566 new_right = self._right.delete(loc)
1567 return self._shallow_copy(left=new_left, right=new_right)
1569 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs)
1570 def repeat(
1571 self: IntervalArrayT,
1572 repeats: int | Sequence[int],
1573 axis: int | None = None,
1574 ) -> IntervalArrayT:
1575 nv.validate_repeat((), {"axis": axis})
1576 left_repeat = self.left.repeat(repeats)
1577 right_repeat = self.right.repeat(repeats)
1578 return self._shallow_copy(left=left_repeat, right=right_repeat)
1580 _interval_shared_docs["contains"] = textwrap.dedent(
1581 """
1582 Check elementwise if the Intervals contain the value.
1584 Return a boolean mask whether the value is contained in the Intervals
1585 of the %(klass)s.
1587 .. versionadded:: 0.25.0
1589 Parameters
1590 ----------
1591 other : scalar
1592 The value to check whether it is contained in the Intervals.
1594 Returns
1595 -------
1596 boolean array
1598 See Also
1599 --------
1600 Interval.contains : Check whether Interval object contains value.
1601 %(klass)s.overlaps : Check if an Interval overlaps the values in the
1602 %(klass)s.
1604 Examples
1605 --------
1606 %(examples)s
1607 >>> intervals.contains(0.5)
1608 array([ True, False, False])
1609 """
1610 )
1612 @Appender(
1613 _interval_shared_docs["contains"]
1614 % {
1615 "klass": "IntervalArray",
1616 "examples": textwrap.dedent(
1617 """\
1618 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)])
1619 >>> intervals
1620 <IntervalArray>
1621 [(0, 1], (1, 3], (2, 4]]
1622 Length: 3, dtype: interval[int64, right]
1623 """
1624 ),
1625 }
1626 )
1627 def contains(self, other):
1628 if isinstance(other, Interval):
1629 raise NotImplementedError("contains not implemented for two intervals")
1631 return (self._left < other if self.open_left else self._left <= other) & (
1632 other < self._right if self.open_right else other <= self._right
1633 )
1635 def isin(self, values) -> npt.NDArray[np.bool_]:
1636 if not hasattr(values, "dtype"):
1637 values = np.array(values)
1638 values = extract_array(values, extract_numpy=True)
1640 if is_interval_dtype(values.dtype):
1641 if self.closed != values.closed:
1642 # not comparable -> no overlap
1643 return np.zeros(self.shape, dtype=bool)
1645 if is_dtype_equal(self.dtype, values.dtype):
1646 # GH#38353 instead of casting to object, operating on a
1647 # complex128 ndarray is much more performant.
1648 left = self._combined.view("complex128")
1649 right = values._combined.view("complex128")
1650 # error: Argument 1 to "in1d" has incompatible type
1651 # "Union[ExtensionArray, ndarray[Any, Any],
1652 # ndarray[Any, dtype[Any]]]"; expected
1653 # "Union[_SupportsArray[dtype[Any]],
1654 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,
1655 # int, float, complex, str, bytes, _NestedSequence[
1656 # Union[bool, int, float, complex, str, bytes]]]"
1657 return np.in1d(left, right) # type: ignore[arg-type]
1659 elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(
1660 values.left.dtype
1661 ):
1662 # not comparable -> no overlap
1663 return np.zeros(self.shape, dtype=bool)
1665 return isin(self.astype(object), values.astype(object))
1667 @property
1668 def _combined(self) -> ArrayLike:
1669 left = self.left._values.reshape(-1, 1)
1670 right = self.right._values.reshape(-1, 1)
1671 if needs_i8_conversion(left.dtype):
1672 comb = left._concat_same_type([left, right], axis=1)
1673 else:
1674 comb = np.concatenate([left, right], axis=1)
1675 return comb
1677 def _from_combined(self, combined: np.ndarray) -> IntervalArray:
1678 """
1679 Create a new IntervalArray with our dtype from a 1D complex128 ndarray.
1680 """
1681 nc = combined.view("i8").reshape(-1, 2)
1683 dtype = self._left.dtype
1684 if needs_i8_conversion(dtype):
1685 # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
1686 new_left = type(self._left)._from_sequence( # type: ignore[attr-defined]
1687 nc[:, 0], dtype=dtype
1688 )
1689 # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
1690 new_right = type(self._right)._from_sequence( # type: ignore[attr-defined]
1691 nc[:, 1], dtype=dtype
1692 )
1693 else:
1694 new_left = nc[:, 0].view(dtype)
1695 new_right = nc[:, 1].view(dtype)
1696 return self._shallow_copy(left=new_left, right=new_right)
1698 def unique(self) -> IntervalArray:
1699 # No overload variant of "__getitem__" of "ExtensionArray" matches argument
1700 # type "Tuple[slice, int]"
1701 nc = unique(
1702 self._combined.view("complex128")[:, 0] # type: ignore[call-overload]
1703 )
1704 nc = nc[:, None]
1705 return self._from_combined(nc)
1708def _maybe_convert_platform_interval(values) -> ArrayLike:
1709 """
1710 Try to do platform conversion, with special casing for IntervalArray.
1711 Wrapper around maybe_convert_platform that alters the default return
1712 dtype in certain cases to be compatible with IntervalArray. For example,
1713 empty lists return with integer dtype instead of object dtype, which is
1714 prohibited for IntervalArray.
1716 Parameters
1717 ----------
1718 values : array-like
1720 Returns
1721 -------
1722 array
1723 """
1724 if isinstance(values, (list, tuple)) and len(values) == 0:
1725 # GH 19016
1726 # empty lists/tuples get object dtype by default, but this is
1727 # prohibited for IntervalArray, so coerce to integer instead
1728 return np.array([], dtype=np.int64)
1729 elif not is_list_like(values) or isinstance(values, ABCDataFrame):
1730 # This will raise later, but we avoid passing to maybe_convert_platform
1731 return values
1732 elif is_categorical_dtype(values):
1733 values = np.asarray(values)
1734 elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):
1735 # TODO: should we just cast these to list?
1736 return values
1737 else:
1738 values = extract_array(values, extract_numpy=True)
1740 if not hasattr(values, "dtype"):
1741 return np.asarray(values)
1742 return values