Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/interval.py: 18%

1from __future__ import annotations

3import operator

4from operator import (

5 le,

6 lt,

8import textwrap

9from typing import (

10 TYPE_CHECKING,

11 Literal,

12 Sequence,

13 TypeVar,

14 Union,

15 cast,

16 overload,

17)

19import numpy as np

21from pandas._config import get_option

23from pandas._libs import lib

24from pandas._libs.interval import (

25 VALID_CLOSED,

26 Interval,

27 IntervalMixin,

28 intervals_to_interval_bounds,

29)

30from pandas._libs.missing import NA

31from pandas._typing import (

32 ArrayLike,

33 Dtype,

34 IntervalClosedType,

35 NpDtype,

36 PositionalIndexer,

37 ScalarIndexer,

38 SequenceIndexer,

39 npt,

40)

41from pandas.compat.numpy import function as nv

42from pandas.errors import IntCastingNaNError

43from pandas.util._decorators import (

44 Appender,

45 deprecate_nonkeyword_arguments,

46)

48from pandas.core.dtypes.cast import LossySetitemError

49from pandas.core.dtypes.common import (

50 is_categorical_dtype,

51 is_dtype_equal,

52 is_float_dtype,

53 is_integer_dtype,

54 is_interval_dtype,

55 is_list_like,

56 is_object_dtype,

57 is_scalar,

58 is_string_dtype,

59 needs_i8_conversion,

60 pandas_dtype,

61)

62from pandas.core.dtypes.dtypes import IntervalDtype

63from pandas.core.dtypes.generic import (

64 ABCDataFrame,

65 ABCDatetimeIndex,

66 ABCIntervalIndex,

67 ABCPeriodIndex,

68)

69from pandas.core.dtypes.missing import (

70 is_valid_na_for_dtype,

71 isna,

72 notna,

73)

75from pandas.core.algorithms import (

76 isin,

77 take,

78 unique,

79 value_counts,

80)

81from pandas.core.arrays.base import (

82 ExtensionArray,

83 _extension_array_shared_docs,

84)

85import pandas.core.common as com

86from pandas.core.construction import (

87 array as pd_array,

88 ensure_wrapped_if_datetimelike,

89 extract_array,

90)

91from pandas.core.indexers import check_array_indexer

92from pandas.core.indexes.base import ensure_index

93from pandas.core.ops import (

94 invalid_comparison,

95 unpack_zerodim_and_defer,

96)

98if TYPE_CHECKING: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true

99 from pandas import (

100 Index,

101 Series,

102 )

103

104

105IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")

106IntervalOrNA = Union[Interval, float]

107

108_interval_shared_docs: dict[str, str] = {}

109

110_shared_docs_kwargs = {

111 "klass": "IntervalArray",

112 "qualname": "arrays.IntervalArray",

113 "name": "",

114}

115

116

117_interval_shared_docs[

118 "class"

119] = """

120%(summary)s

121

122.. versionadded:: %(versionadded)s

123

124Parameters

125----------

126data : array-like (1-dimensional)

127 Array-like containing Interval objects from which to build the

128 %(klass)s.

129closed : {'left', 'right', 'both', 'neither'}, default 'right'

130 Whether the intervals are closed on the left-side, right-side, both or

131 neither.

132dtype : dtype or None, default None

133 If None, dtype will be inferred.

134copy : bool, default False

135 Copy the input data.

136%(name)s\

137verify_integrity : bool, default True

138 Verify that the %(klass)s is valid.

139

140Attributes

141----------

142left

143right

144closed

145mid

146length

147is_empty

148is_non_overlapping_monotonic

149%(extra_attributes)s\

150

151Methods

152-------

153from_arrays

154from_tuples

155from_breaks

156contains

157overlaps

158set_closed

159to_tuples

160%(extra_methods)s\

161

162See Also

163--------

164Index : The base pandas Index type.

165Interval : A bounded slice-like interval; the elements of an %(klass)s.

166interval_range : Function to create a fixed frequency IntervalIndex.

167cut : Bin values into discrete Intervals.

168qcut : Bin values into equal-sized Intervals based on rank or sample quantiles.

169

170Notes

171-----

172See the `user guide

173<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__

174for more.

175

176%(examples)s\

177"""

178

179

180@Appender(

181 _interval_shared_docs["class"]

182 % {

183 "klass": "IntervalArray",

184 "summary": "Pandas array for interval data that are closed on the same side.",

185 "versionadded": "0.24.0",

186 "name": "",

187 "extra_attributes": "",

188 "extra_methods": "",

189 "examples": textwrap.dedent(

190 """\

191 Examples

192 --------

193 A new ``IntervalArray`` can be constructed directly from an array-like of

194 ``Interval`` objects:

195

196 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])

197 <IntervalArray>

198 [(0, 1], (1, 5]]

199 Length: 2, dtype: interval[int64, right]

200

201 It may also be constructed using one of the constructor

202 methods: :meth:`IntervalArray.from_arrays`,

203 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.

204 """

205 ),

206 }

207)

208class IntervalArray(IntervalMixin, ExtensionArray):

209 can_hold_na = True

210 _na_value = _fill_value = np.nan

211

212 @property

213 def ndim(self) -> Literal[1]:

214 return 1

215

216 # To make mypy recognize the fields

217 _left: np.ndarray

218 _right: np.ndarray

219 _dtype: IntervalDtype

220

221 # ---------------------------------------------------------------------

222 # Constructors

223

224 def __new__(

225 cls: type[IntervalArrayT],

226 data,

227 closed=None,

228 dtype: Dtype | None = None,

229 copy: bool = False,

230 verify_integrity: bool = True,

231 ):

232

233 data = extract_array(data, extract_numpy=True)

234

235 if isinstance(data, cls):

236 left = data._left

237 right = data._right

238 closed = closed or data.closed

239 else:

240

241 # don't allow scalars

242 if is_scalar(data):

243 msg = (

244 f"{cls.__name__}(...) must be called with a collection "

245 f"of some kind, {data} was passed"

246 )

247 raise TypeError(msg)

248

249 # might need to convert empty or purely na data

250 data = _maybe_convert_platform_interval(data)

251 left, right, infer_closed = intervals_to_interval_bounds(

252 data, validate_closed=closed is None

253 )

254 if left.dtype == object:

255 left = lib.maybe_convert_objects(left)

256 right = lib.maybe_convert_objects(right)

257 closed = closed or infer_closed

258

259 return cls._simple_new(

260 left,

261 right,

262 closed,

263 copy=copy,

264 dtype=dtype,

265 verify_integrity=verify_integrity,

266 )

267

268 @classmethod

269 def _simple_new(

270 cls: type[IntervalArrayT],

271 left,

272 right,

273 closed: IntervalClosedType | None = None,

274 copy: bool = False,

275 dtype: Dtype | None = None,

276 verify_integrity: bool = True,

277 ) -> IntervalArrayT:

278 result = IntervalMixin.__new__(cls)

279

280 if closed is None and isinstance(dtype, IntervalDtype):

281 closed = dtype.closed

282

283 closed = closed or "right"

284 left = ensure_index(left, copy=copy)

285 right = ensure_index(right, copy=copy)

286

287 if dtype is not None:

288 # GH 19262: dtype must be an IntervalDtype to override inferred

289 dtype = pandas_dtype(dtype)

290 if is_interval_dtype(dtype):

291 dtype = cast(IntervalDtype, dtype)

292 if dtype.subtype is not None:

293 left = left.astype(dtype.subtype)

294 right = right.astype(dtype.subtype)

295 else:

296 msg = f"dtype must be an IntervalDtype, got {dtype}"

297 raise TypeError(msg)

298

299 if dtype.closed is None:

300 # possibly loading an old pickle

301 dtype = IntervalDtype(dtype.subtype, closed)

302 elif closed != dtype.closed:

303 raise ValueError("closed keyword does not match dtype.closed")

304

305 # coerce dtypes to match if needed

306 if is_float_dtype(left) and is_integer_dtype(right):

307 right = right.astype(left.dtype)

308 elif is_float_dtype(right) and is_integer_dtype(left):

309 left = left.astype(right.dtype)

310

311 if type(left) != type(right):

312 msg = (

313 f"must not have differing left [{type(left).__name__}] and "

314 f"right [{type(right).__name__}] types"

315 )

316 raise ValueError(msg)

317 elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):

318 # GH 19016

319 msg = (

320 "category, object, and string subtypes are not supported "

321 "for IntervalArray"

322 )

323 raise TypeError(msg)

324 elif isinstance(left, ABCPeriodIndex):

325 msg = "Period dtypes are not supported, use a PeriodIndex instead"

326 raise ValueError(msg)

327 elif isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz):

328 msg = (

329 "left and right must have the same time zone, got "

330 f"'{left.tz}' and '{right.tz}'"

331 )

332 raise ValueError(msg)

333

334 # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray

335 left = ensure_wrapped_if_datetimelike(left)

336 left = extract_array(left, extract_numpy=True)

337 right = ensure_wrapped_if_datetimelike(right)

338 right = extract_array(right, extract_numpy=True)

339

340 lbase = getattr(left, "_ndarray", left).base

341 rbase = getattr(right, "_ndarray", right).base

342 if lbase is not None and lbase is rbase:

343 # If these share data, then setitem could corrupt our IA

344 right = right.copy()

345

346 dtype = IntervalDtype(left.dtype, closed=closed)

347 result._dtype = dtype

348

349 result._left = left

350 result._right = right

351 if verify_integrity:

352 result._validate()

353 return result

354

355 @classmethod

356 def _from_sequence(

357 cls: type[IntervalArrayT],

358 scalars,

359 *,

360 dtype: Dtype | None = None,

361 copy: bool = False,

362 ) -> IntervalArrayT:

363 return cls(scalars, dtype=dtype, copy=copy)

364

365 @classmethod

366 def _from_factorized(

367 cls: type[IntervalArrayT], values: np.ndarray, original: IntervalArrayT

368 ) -> IntervalArrayT:

369 if len(values) == 0:

370 # An empty array returns object-dtype here. We can't create

371 # a new IA from an (empty) object-dtype array, so turn it into the

372 # correct dtype.

373 values = values.astype(original.dtype.subtype)

374 return cls(values, closed=original.closed)

375

376 _interval_shared_docs["from_breaks"] = textwrap.dedent(

377 """

378 Construct an %(klass)s from an array of splits.

379

380 Parameters

381 ----------

382 breaks : array-like (1-dimensional)

383 Left and right bounds for each interval.

384 closed : {'left', 'right', 'both', 'neither'}, default 'right'

385 Whether the intervals are closed on the left-side, right-side, both

386 or neither.

387 copy : bool, default False

388 Copy the data.

389 dtype : dtype or None, default None

390 If None, dtype will be inferred.

391

392 Returns

393 -------

394 %(klass)s

395

396 See Also

397 --------

398 interval_range : Function to create a fixed frequency IntervalIndex.

399 %(klass)s.from_arrays : Construct from a left and right array.

400 %(klass)s.from_tuples : Construct from a sequence of tuples.

401

402 %(examples)s\

403 """

404 )

405

406 @classmethod

407 @Appender(

408 _interval_shared_docs["from_breaks"]

409 % {

410 "klass": "IntervalArray",

411 "examples": textwrap.dedent(

412 """\

413 Examples

414 --------

415 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])

416 <IntervalArray>

417 [(0, 1], (1, 2], (2, 3]]

418 Length: 3, dtype: interval[int64, right]

419 """

420 ),

421 }

422 )

423 def from_breaks(

424 cls: type[IntervalArrayT],

425 breaks,

426 closed: IntervalClosedType | None = "right",

427 copy: bool = False,

428 dtype: Dtype | None = None,

429 ) -> IntervalArrayT:

430 breaks = _maybe_convert_platform_interval(breaks)

431

432 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype)

433

434 _interval_shared_docs["from_arrays"] = textwrap.dedent(

435 """

436 Construct from two arrays defining the left and right bounds.

437

438 Parameters

439 ----------

440 left : array-like (1-dimensional)

441 Left bounds for each interval.

442 right : array-like (1-dimensional)

443 Right bounds for each interval.

444 closed : {'left', 'right', 'both', 'neither'}, default 'right'

445 Whether the intervals are closed on the left-side, right-side, both

446 or neither.

447 copy : bool, default False

448 Copy the data.

449 dtype : dtype, optional

450 If None, dtype will be inferred.

451

452 Returns

453 -------

454 %(klass)s

455

456 Raises

457 ------

458 ValueError

459 When a value is missing in only one of `left` or `right`.

460 When a value in `left` is greater than the corresponding value

461 in `right`.

462

463 See Also

464 --------

465 interval_range : Function to create a fixed frequency IntervalIndex.

466 %(klass)s.from_breaks : Construct an %(klass)s from an array of

467 splits.

468 %(klass)s.from_tuples : Construct an %(klass)s from an

469 array-like of tuples.

470

471 Notes

472 -----

473 Each element of `left` must be less than or equal to the `right`

474 element at the same position. If an element is missing, it must be

475 missing in both `left` and `right`. A TypeError is raised when

476 using an unsupported type for `left` or `right`. At the moment,

477 'category', 'object', and 'string' subtypes are not supported.

478

479 %(examples)s\

480 """

481 )

482

483 @classmethod

484 @Appender(

485 _interval_shared_docs["from_arrays"]

486 % {

487 "klass": "IntervalArray",

488 "examples": textwrap.dedent(

489 """\

490 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])

491 <IntervalArray>

492 [(0, 1], (1, 2], (2, 3]]

493 Length: 3, dtype: interval[int64, right]

494 """

495 ),

496 }

497 )

498 def from_arrays(

499 cls: type[IntervalArrayT],

500 left,

501 right,

502 closed: IntervalClosedType | None = "right",

503 copy: bool = False,

504 dtype: Dtype | None = None,

505 ) -> IntervalArrayT:

506 left = _maybe_convert_platform_interval(left)

507 right = _maybe_convert_platform_interval(right)

508

509 return cls._simple_new(

510 left, right, closed, copy=copy, dtype=dtype, verify_integrity=True

511 )

512

513 _interval_shared_docs["from_tuples"] = textwrap.dedent(

514 """

515 Construct an %(klass)s from an array-like of tuples.

516

517 Parameters

518 ----------

519 data : array-like (1-dimensional)

520 Array of tuples.

521 closed : {'left', 'right', 'both', 'neither'}, default 'right'

522 Whether the intervals are closed on the left-side, right-side, both

523 or neither.

524 copy : bool, default False

525 By-default copy the data, this is compat only and ignored.

526 dtype : dtype or None, default None

527 If None, dtype will be inferred.

528

529 Returns

530 -------

531 %(klass)s

532

533 See Also

534 --------

535 interval_range : Function to create a fixed frequency IntervalIndex.

536 %(klass)s.from_arrays : Construct an %(klass)s from a left and

537 right array.

538 %(klass)s.from_breaks : Construct an %(klass)s from an array of

539 splits.

540

541 %(examples)s\

542 """

543 )

544

545 @classmethod

546 @Appender(

547 _interval_shared_docs["from_tuples"]

548 % {

549 "klass": "IntervalArray",

550 "examples": textwrap.dedent(

551 """\

552 Examples

553 --------

554 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])

555 <IntervalArray>

556 [(0, 1], (1, 2]]

557 Length: 2, dtype: interval[int64, right]

558 """

559 ),

560 }

561 )

562 def from_tuples(

563 cls: type[IntervalArrayT],

564 data,

565 closed="right",

566 copy: bool = False,

567 dtype: Dtype | None = None,

568 ) -> IntervalArrayT:

569 if len(data):

570 left, right = [], []

571 else:

572 # ensure that empty data keeps input dtype

573 left = right = data

574

575 for d in data:

576 if isna(d):

577 lhs = rhs = np.nan

578 else:

579 name = cls.__name__

580 try:

581 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...]

582 lhs, rhs = d

583 except ValueError as err:

584 msg = f"{name}.from_tuples requires tuples of length 2, got {d}"

585 raise ValueError(msg) from err

586 except TypeError as err:

587 msg = f"{name}.from_tuples received an invalid item, {d}"

588 raise TypeError(msg) from err

589 left.append(lhs)

590 right.append(rhs)

591

592 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)

593

594 def _validate(self):

595 """

596 Verify that the IntervalArray is valid.

597

598 Checks that

599

600 * closed is valid

601 * left and right match lengths

602 * left and right have the same missing values

603 * left is always below right

604 """

605 if self.closed not in VALID_CLOSED:

606 msg = f"invalid option for 'closed': {self.closed}"

607 raise ValueError(msg)

608 if len(self._left) != len(self._right):

609 msg = "left and right must have the same length"

610 raise ValueError(msg)

611 left_mask = notna(self._left)

612 right_mask = notna(self._right)

613 if not (left_mask == right_mask).all():

614 msg = (

615 "missing values must be missing in the same "

616 "location both left and right sides"

617 )

618 raise ValueError(msg)

619 if not (self._left[left_mask] <= self._right[left_mask]).all():

620 msg = "left side of interval must be <= right side"

621 raise ValueError(msg)

622

623 def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT:

624 """

625 Return a new IntervalArray with the replacement attributes

626

627 Parameters

628 ----------

629 left : Index

630 Values to be used for the left-side of the intervals.

631 right : Index

632 Values to be used for the right-side of the intervals.

633 """

634 return self._simple_new(left, right, closed=self.closed, verify_integrity=False)

635

636 # ---------------------------------------------------------------------

637 # Descriptive

638

639 @property

640 def dtype(self) -> IntervalDtype:

641 return self._dtype

642

643 @property

644 def nbytes(self) -> int:

645 return self.left.nbytes + self.right.nbytes

646

647 @property

648 def size(self) -> int:

649 # Avoid materializing self.values

650 return self.left.size

651

652 # ---------------------------------------------------------------------

653 # EA Interface

654

655 def __iter__(self):

656 return iter(np.asarray(self))

657

658 def __len__(self) -> int:

659 return len(self._left)

660

661 @overload

662 def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA:

663 ...

664

665 @overload

666 def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT:

667 ...

668

669 def __getitem__(

670 self: IntervalArrayT, key: PositionalIndexer

671 ) -> IntervalArrayT | IntervalOrNA:

672 key = check_array_indexer(self, key)

673 left = self._left[key]

674 right = self._right[key]

675

676 if not isinstance(left, (np.ndarray, ExtensionArray)):

677 # scalar

678 if is_scalar(left) and isna(left):

679 return self._fill_value

680 return Interval(left, right, self.closed)

681 if np.ndim(left) > 1:

682 # GH#30588 multi-dimensional indexer disallowed

683 raise ValueError("multi-dimensional indexing not allowed")

684 return self._shallow_copy(left, right)

685

686 def __setitem__(self, key, value) -> None:

687 value_left, value_right = self._validate_setitem_value(value)

688 key = check_array_indexer(self, key)

689

690 self._left[key] = value_left

691 self._right[key] = value_right

692

693 def _cmp_method(self, other, op):

694 # ensure pandas array for list-like and eliminate non-interval scalars

695 if is_list_like(other):

696 if len(self) != len(other):

697 raise ValueError("Lengths must match to compare")

698 other = pd_array(other)

699 elif not isinstance(other, Interval):

700 # non-interval scalar -> no matches

701 if other is NA:

702 # GH#31882

703 from pandas.core.arrays import BooleanArray

704

705 arr = np.empty(self.shape, dtype=bool)

706 mask = np.ones(self.shape, dtype=bool)

707 return BooleanArray(arr, mask)

708 return invalid_comparison(self, other, op)

709

710 # determine the dtype of the elements we want to compare

711 if isinstance(other, Interval):

712 other_dtype = pandas_dtype("interval")

713 elif not is_categorical_dtype(other.dtype):

714 other_dtype = other.dtype

715 else:

716 # for categorical defer to categories for dtype

717 other_dtype = other.categories.dtype

718

719 # extract intervals if we have interval categories with matching closed

720 if is_interval_dtype(other_dtype):

721 if self.closed != other.categories.closed:

722 return invalid_comparison(self, other, op)

723

724 other = other.categories.take(

725 other.codes, allow_fill=True, fill_value=other.categories._na_value

726 )

727

728 # interval-like -> need same closed and matching endpoints

729 if is_interval_dtype(other_dtype):

730 if self.closed != other.closed:

731 return invalid_comparison(self, other, op)

732 elif not isinstance(other, Interval):

733 other = type(self)(other)

734

735 if op is operator.eq:

736 return (self._left == other.left) & (self._right == other.right)

737 elif op is operator.ne:

738 return (self._left != other.left) | (self._right != other.right)

739 elif op is operator.gt:

740 return (self._left > other.left) | (

741 (self._left == other.left) & (self._right > other.right)

742 )

743 elif op is operator.ge:

744 return (self == other) | (self > other)

745 elif op is operator.lt:

746 return (self._left < other.left) | (

747 (self._left == other.left) & (self._right < other.right)

748 )

749 else:

750 # operator.lt

751 return (self == other) | (self < other)

752

753 # non-interval/non-object dtype -> no matches

754 if not is_object_dtype(other_dtype):

755 return invalid_comparison(self, other, op)

756

757 # object dtype -> iteratively check for intervals

758 result = np.zeros(len(self), dtype=bool)

759 for i, obj in enumerate(other):

760 try:

761 result[i] = op(self[i], obj)

762 except TypeError:

763 if obj is NA:

764 # comparison with np.nan returns NA

765 # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092

766 result = result.astype(object)

767 result[i] = NA

768 else:

769 raise

770 return result

771

772 @unpack_zerodim_and_defer("__eq__")

773 def __eq__(self, other):

774 return self._cmp_method(other, operator.eq)

775

776 @unpack_zerodim_and_defer("__ne__")

777 def __ne__(self, other):

778 return self._cmp_method(other, operator.ne)

779

780 @unpack_zerodim_and_defer("__gt__")

781 def __gt__(self, other):

782 return self._cmp_method(other, operator.gt)

783

784 @unpack_zerodim_and_defer("__ge__")

785 def __ge__(self, other):

786 return self._cmp_method(other, operator.ge)

787

788 @unpack_zerodim_and_defer("__lt__")

789 def __lt__(self, other):

790 return self._cmp_method(other, operator.lt)

791

792 @unpack_zerodim_and_defer("__le__")

793 def __le__(self, other):

794 return self._cmp_method(other, operator.le)

795

796 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])

797 def argsort(

798 self,

799 ascending: bool = True,

800 kind: str = "quicksort",

801 na_position: str = "last",

802 *args,

803 **kwargs,

804 ) -> np.ndarray:

805 ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)

806

807 if ascending and kind == "quicksort" and na_position == "last":

808 return np.lexsort((self.right, self.left))

809

810 # TODO: other cases we can use lexsort for? much more performant.

811 return super().argsort(

812 ascending=ascending, kind=kind, na_position=na_position, **kwargs

813 )

814

815 def min(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA:

816 nv.validate_minmax_axis(axis, self.ndim)

817

818 if not len(self):

819 return self._na_value

820

821 mask = self.isna()

822 if mask.any():

823 if not skipna:

824 return self._na_value

825 obj = self[~mask]

826 else:

827 obj = self

828

829 indexer = obj.argsort()[0]

830 return obj[indexer]

831

832 def max(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA:

833 nv.validate_minmax_axis(axis, self.ndim)

834

835 if not len(self):

836 return self._na_value

837

838 mask = self.isna()

839 if mask.any():

840 if not skipna:

841 return self._na_value

842 obj = self[~mask]

843 else:

844 obj = self

845

846 indexer = obj.argsort()[-1]

847 return obj[indexer]

848

849 def fillna(

850 self: IntervalArrayT, value=None, method=None, limit=None

851 ) -> IntervalArrayT:

852 """

853 Fill NA/NaN values using the specified method.

854

855 Parameters

856 ----------

857 value : scalar, dict, Series

858 If a scalar value is passed it is used to fill all missing values.

859 Alternatively, a Series or dict can be used to fill in different

860 values for each index. The value should not be a list. The

861 value(s) passed should be either Interval objects or NA/NaN.

862 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None

863 (Not implemented yet for IntervalArray)

864 Method to use for filling holes in reindexed Series

865 limit : int, default None

866 (Not implemented yet for IntervalArray)

867 If method is specified, this is the maximum number of consecutive

868 NaN values to forward/backward fill. In other words, if there is

869 a gap with more than this number of consecutive NaNs, it will only

870 be partially filled. If method is not specified, this is the

871 maximum number of entries along the entire axis where NaNs will be

872 filled.

873

874 Returns

875 -------

876 filled : IntervalArray with NA/NaN filled

877 """

878 if method is not None:

879 raise TypeError("Filling by method is not supported for IntervalArray.")

880 if limit is not None:

881 raise TypeError("limit is not supported for IntervalArray.")

882

883 value_left, value_right = self._validate_scalar(value)

884

885 left = self.left.fillna(value=value_left)

886 right = self.right.fillna(value=value_right)

887 return self._shallow_copy(left, right)

888

889 def astype(self, dtype, copy: bool = True):

890 """

891 Cast to an ExtensionArray or NumPy array with dtype 'dtype'.

892

893 Parameters

894 ----------

895 dtype : str or dtype

896 Typecode or data-type to which the array is cast.

897

898 copy : bool, default True

899 Whether to copy the data, even if not necessary. If False,

900 a copy is made only if the old dtype does not match the

901 new dtype.

902

903 Returns

904 -------

905 array : ExtensionArray or ndarray

906 ExtensionArray or NumPy ndarray with 'dtype' for its dtype.

907 """

908 from pandas import Index

909

910 if dtype is not None:

911 dtype = pandas_dtype(dtype)

912

913 if is_interval_dtype(dtype):

914 if dtype == self.dtype:

915 return self.copy() if copy else self

916

917 # need to cast to different subtype

918 try:

919 # We need to use Index rules for astype to prevent casting

920 # np.nan entries to int subtypes

921 new_left = Index(self._left, copy=False).astype(dtype.subtype)

922 new_right = Index(self._right, copy=False).astype(dtype.subtype)

923 except IntCastingNaNError:

924 # e.g test_subtype_integer

925 raise

926 except (TypeError, ValueError) as err:

927 # e.g. test_subtype_integer_errors f8->u8 can be lossy

928 # and raises ValueError

929 msg = (

930 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"

931 )

932 raise TypeError(msg) from err

933 return self._shallow_copy(new_left, new_right)

934 else:

935 try:

936 return super().astype(dtype, copy=copy)

937 except (TypeError, ValueError) as err:

938 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"

939 raise TypeError(msg) from err

940

941 def equals(self, other) -> bool:

942 if type(self) != type(other):

943 return False

944

945 return bool(

946 self.closed == other.closed

947 and self.left.equals(other.left)

948 and self.right.equals(other.right)

949 )

950

951 @classmethod

952 def _concat_same_type(

953 cls: type[IntervalArrayT], to_concat: Sequence[IntervalArrayT]

954 ) -> IntervalArrayT:

955 """

956 Concatenate multiple IntervalArray

957

958 Parameters

959 ----------

960 to_concat : sequence of IntervalArray

961

962 Returns

963 -------

964 IntervalArray

965 """

966 closed_set = {interval.closed for interval in to_concat}

967 if len(closed_set) != 1:

968 raise ValueError("Intervals must all be closed on the same side.")

969 closed = closed_set.pop()

970

971 left = np.concatenate([interval.left for interval in to_concat])

972 right = np.concatenate([interval.right for interval in to_concat])

973 return cls._simple_new(left, right, closed=closed, copy=False)

974

975 def copy(self: IntervalArrayT) -> IntervalArrayT:

976 """

977 Return a copy of the array.

978

979 Returns

980 -------

981 IntervalArray

982 """

983 left = self._left.copy()

984 right = self._right.copy()

985 closed = self.closed

986 # TODO: Could skip verify_integrity here.

987 return type(self).from_arrays(left, right, closed=closed)

988

989 def isna(self) -> np.ndarray:

990 return isna(self._left)

991

992 def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:

993 if not len(self) or periods == 0:

994 return self.copy()

995

996 if isna(fill_value):

997 fill_value = self.dtype.na_value

998

999 # ExtensionArray.shift doesn't work for two reasons

1000 # 1. IntervalArray.dtype.na_value may not be correct for the dtype.

1001 # 2. IntervalArray._from_sequence only accepts NaN for missing values,

1002 # not other values like NaT

1003

1004 empty_len = min(abs(periods), len(self))

1005 if isna(fill_value):

1006 from pandas import Index

1007

1008 fill_value = Index(self._left, copy=False)._na_value

1009 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))

1010 else:

1011 empty = self._from_sequence([fill_value] * empty_len)

1012

1013 if periods > 0:

1014 a = empty

1015 b = self[:-periods]

1016 else:

1017 a = self[abs(periods) :]

1018 b = empty

1019 return self._concat_same_type([a, b])

1020

1021 def take(

1022 self: IntervalArrayT,

1023 indices,

1024 *,

1025 allow_fill: bool = False,

1026 fill_value=None,

1027 axis=None,

1028 **kwargs,

1029 ) -> IntervalArrayT:

1030 """

1031 Take elements from the IntervalArray.

1032

1033 Parameters

1034 ----------

1035 indices : sequence of integers

1036 Indices to be taken.

1037

1038 allow_fill : bool, default False

1039 How to handle negative values in `indices`.

1040

1041 * False: negative values in `indices` indicate positional indices

1042 from the right (the default). This is similar to

1043 :func:`numpy.take`.

1044

1045 * True: negative values in `indices` indicate

1046 missing values. These values are set to `fill_value`. Any other

1047 other negative values raise a ``ValueError``.

1048

1049 fill_value : Interval or NA, optional

1050 Fill value to use for NA-indices when `allow_fill` is True.

1051 This may be ``None``, in which case the default NA value for

1052 the type, ``self.dtype.na_value``, is used.

1053

1054 For many ExtensionArrays, there will be two representations of

1055 `fill_value`: a user-facing "boxed" scalar, and a low-level

1056 physical NA value. `fill_value` should be the user-facing version,

1057 and the implementation should handle translating that to the

1058 physical version for processing the take if necessary.

1059

1060 axis : any, default None

1061 Present for compat with IntervalIndex; does nothing.

1062

1063 Returns

1064 -------

1065 IntervalArray

1066

1067 Raises

1068 ------

1069 IndexError

1070 When the indices are out of bounds for the array.

1071 ValueError

1072 When `indices` contains negative values other than ``-1``

1073 and `allow_fill` is True.

1074 """

1075 nv.validate_take((), kwargs)

1076

1077 fill_left = fill_right = fill_value

1078 if allow_fill:

1079 fill_left, fill_right = self._validate_scalar(fill_value)

1080

1081 left_take = take(

1082 self._left, indices, allow_fill=allow_fill, fill_value=fill_left

1083 )

1084 right_take = take(

1085 self._right, indices, allow_fill=allow_fill, fill_value=fill_right

1086 )

1087

1088 return self._shallow_copy(left_take, right_take)

1089

1090 def _validate_listlike(self, value):

1091 # list-like of intervals

1092 try:

1093 array = IntervalArray(value)

1094 self._check_closed_matches(array, name="value")

1095 value_left, value_right = array.left, array.right

1096 except TypeError as err:

1097 # wrong type: not interval or NA

1098 msg = f"'value' should be an interval type, got {type(value)} instead."

1099 raise TypeError(msg) from err

1100

1101 try:

1102 self.left._validate_fill_value(value_left)

1103 except (LossySetitemError, TypeError) as err:

1104 msg = (

1105 "'value' should be a compatible interval type, "

1106 f"got {type(value)} instead."

1107 )

1108 raise TypeError(msg) from err

1109

1110 return value_left, value_right

1111

1112 def _validate_scalar(self, value):

1113 if isinstance(value, Interval):

1114 self._check_closed_matches(value, name="value")

1115 left, right = value.left, value.right

1116 # TODO: check subdtype match like _validate_setitem_value?

1117 elif is_valid_na_for_dtype(value, self.left.dtype):

1118 # GH#18295

1119 left = right = self.left._na_value

1120 else:

1121 raise TypeError(

1122 "can only insert Interval objects and NA into an IntervalArray"

1123 )

1124 return left, right

1125

1126 def _validate_setitem_value(self, value):

1127

1128 if is_valid_na_for_dtype(value, self.left.dtype):

1129 # na value: need special casing to set directly on numpy arrays

1130 value = self.left._na_value

1131 if is_integer_dtype(self.dtype.subtype):

1132 # can't set NaN on a numpy integer array

1133 # GH#45484 TypeError, not ValueError, matches what we get with

1134 # non-NA un-holdable value.

1135 raise TypeError("Cannot set float NaN to integer-backed IntervalArray")

1136 value_left, value_right = value, value

1137

1138 elif isinstance(value, Interval):

1139 # scalar interval

1140 self._check_closed_matches(value, name="value")

1141 value_left, value_right = value.left, value.right

1142 self.left._validate_fill_value(value_left)

1143 self.left._validate_fill_value(value_right)

1144

1145 else:

1146 return self._validate_listlike(value)

1147

1148 return value_left, value_right

1149

1150 def value_counts(self, dropna: bool = True) -> Series:

1151 """

1152 Returns a Series containing counts of each interval.

1153

1154 Parameters

1155 ----------

1156 dropna : bool, default True

1157 Don't include counts of NaN.

1158

1159 Returns

1160 -------

1161 counts : Series

1162

1163 See Also

1164 --------

1165 Series.value_counts

1166 """

1167 # TODO: implement this is a non-naive way!

1168 return value_counts(np.asarray(self), dropna=dropna)

1169

1170 # ---------------------------------------------------------------------

1171 # Rendering Methods

1172

1173 def _format_data(self) -> str:

1174

1175 # TODO: integrate with categorical and make generic

1176 # name argument is unused here; just for compat with base / categorical

1177 n = len(self)

1178 max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10)

1179

1180 formatter = str

1181

1182 if n == 0:

1183 summary = "[]"

1184 elif n == 1:

1185 first = formatter(self[0])

1186 summary = f"[{first}]"

1187 elif n == 2:

1188 first = formatter(self[0])

1189 last = formatter(self[-1])

1190 summary = f"[{first}, {last}]"

1191 else:

1192

1193 if n > max_seq_items:

1194 n = min(max_seq_items // 2, 10)

1195 head = [formatter(x) for x in self[:n]]

1196 tail = [formatter(x) for x in self[-n:]]

1197 head_str = ", ".join(head)

1198 tail_str = ", ".join(tail)

1199 summary = f"[{head_str} ... {tail_str}]"

1200 else:

1201 tail = [formatter(x) for x in self]

1202 tail_str = ", ".join(tail)

1203 summary = f"[{tail_str}]"

1204

1205 return summary

1206

1207 def __repr__(self) -> str:

1208 # the short repr has no trailing newline, while the truncated

1209 # repr does. So we include a newline in our template, and strip

1210 # any trailing newlines from format_object_summary

1211 data = self._format_data()

1212 class_name = f"<{type(self).__name__}>\n"

1213

1214 template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"

1215 return template

1216

1217 def _format_space(self) -> str:

1218 space = " " * (len(type(self).__name__) + 1)

1219 return f"\n{space}"

1220

1221 # ---------------------------------------------------------------------

1222 # Vectorized Interval Properties/Attributes

1223

1224 @property

1225 def left(self):

1226 """

1227 Return the left endpoints of each Interval in the IntervalArray as an Index.

1228 """

1229 from pandas import Index

1230

1231 return Index(self._left, copy=False)

1232

1233 @property

1234 def right(self):

1235 """

1236 Return the right endpoints of each Interval in the IntervalArray as an Index.

1237 """

1238 from pandas import Index

1239

1240 return Index(self._right, copy=False)

1241

1242 @property

1243 def length(self) -> Index:

1244 """

1245 Return an Index with entries denoting the length of each Interval.

1246 """

1247 return self.right - self.left

1248

1249 @property

1250 def mid(self) -> Index:

1251 """

1252 Return the midpoint of each Interval in the IntervalArray as an Index.

1253 """

1254 try:

1255 return 0.5 * (self.left + self.right)

1256 except TypeError:

1257 # datetime safe version

1258 return self.left + 0.5 * self.length

1259

1260 _interval_shared_docs["overlaps"] = textwrap.dedent(

1261 """

1262 Check elementwise if an Interval overlaps the values in the %(klass)s.

1263

1264 Two intervals overlap if they share a common point, including closed

1265 endpoints. Intervals that only have an open endpoint in common do not

1266 overlap.

1267

1268 Parameters

1269 ----------

1270 other : %(klass)s

1271 Interval to check against for an overlap.

1272

1273 Returns

1274 -------

1275 ndarray

1276 Boolean array positionally indicating where an overlap occurs.

1277

1278 See Also

1279 --------

1280 Interval.overlaps : Check whether two Interval objects overlap.

1281

1282 Examples

1283 --------

1284 %(examples)s

1285 >>> intervals.overlaps(pd.Interval(0.5, 1.5))

1286 array([ True, True, False])

1287

1288 Intervals that share closed endpoints overlap:

1289

1290 >>> intervals.overlaps(pd.Interval(1, 3, closed='left'))

1291 array([ True, True, True])

1292

1293 Intervals that only have an open endpoint in common do not overlap:

1294

1295 >>> intervals.overlaps(pd.Interval(1, 2, closed='right'))

1296 array([False, True, False])

1297 """

1298 )

1299

1300 @Appender(

1301 _interval_shared_docs["overlaps"]

1302 % {

1303 "klass": "IntervalArray",

1304 "examples": textwrap.dedent(

1305 """\

1306 >>> data = [(0, 1), (1, 3), (2, 4)]

1307 >>> intervals = pd.arrays.IntervalArray.from_tuples(data)

1308 >>> intervals

1309 <IntervalArray>

1310 [(0, 1], (1, 3], (2, 4]]

1311 Length: 3, dtype: interval[int64, right]

1312 """

1313 ),

1314 }

1315 )

1316 def overlaps(self, other):

1317 if isinstance(other, (IntervalArray, ABCIntervalIndex)):

1318 raise NotImplementedError

1319 elif not isinstance(other, Interval):

1320 msg = f"`other` must be Interval-like, got {type(other).__name__}"

1321 raise TypeError(msg)

1322

1323 # equality is okay if both endpoints are closed (overlap at a point)

1324 op1 = le if (self.closed_left and other.closed_right) else lt

1325 op2 = le if (other.closed_left and self.closed_right) else lt

1326

1327 # overlaps is equivalent negation of two interval being disjoint:

1328 # disjoint = (A.left > B.right) or (B.left > A.right)

1329 # (simplifying the negation allows this to be done in less operations)

1330 return op1(self.left, other.right) & op2(other.left, self.right)

1331

1332 # ---------------------------------------------------------------------

1333

1334 @property

1335 def closed(self) -> IntervalClosedType:

1336 """

1337 String describing the inclusive side the intervals.

1338

1339 Either ``left``, ``right``, ``both`` or ``neither``.

1340 """

1341 return self.dtype.closed

1342

1343 _interval_shared_docs["set_closed"] = textwrap.dedent(

1344 """

1345 Return an identical %(klass)s closed on the specified side.

1346

1347 Parameters

1348 ----------

1349 closed : {'left', 'right', 'both', 'neither'}

1350 Whether the intervals are closed on the left-side, right-side, both

1351 or neither.

1352

1353 Returns

1354 -------

1355 new_index : %(klass)s

1356

1357 %(examples)s\

1358 """

1359 )

1360

1361 @Appender(

1362 _interval_shared_docs["set_closed"]

1363 % {

1364 "klass": "IntervalArray",

1365 "examples": textwrap.dedent(

1366 """\

1367 Examples

1368 --------

1369 >>> index = pd.arrays.IntervalArray.from_breaks(range(4))

1370 >>> index

1371 <IntervalArray>

1372 [(0, 1], (1, 2], (2, 3]]

1373 Length: 3, dtype: interval[int64, right]

1374 >>> index.set_closed('both')

1375 <IntervalArray>

1376 [[0, 1], [1, 2], [2, 3]]

1377 Length: 3, dtype: interval[int64, both]

1378 """

1379 ),

1380 }

1381 )

1382 def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT:

1383 if closed not in VALID_CLOSED:

1384 msg = f"invalid option for 'closed': {closed}"

1385 raise ValueError(msg)

1386

1387 return type(self)._simple_new(

1388 left=self._left, right=self._right, closed=closed, verify_integrity=False

1389 )

1390

1391 _interval_shared_docs[

1392 "is_non_overlapping_monotonic"

1393 ] = """

1394 Return a boolean whether the %(klass)s is non-overlapping and monotonic.

1395

1396 Non-overlapping means (no Intervals share points), and monotonic means

1397 either monotonic increasing or monotonic decreasing.

1398 """

1399

1400 # https://github.com/python/mypy/issues/1362

1401 # Mypy does not support decorated properties

1402 @property # type: ignore[misc]

1403 @Appender(

1404 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs

1405 )

1406 def is_non_overlapping_monotonic(self) -> bool:

1407 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... )

1408 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)

1409 # we already require left <= right

1410

1411 # strict inequality for closed == 'both'; equality implies overlapping

1412 # at a point when both sides of intervals are included

1413 if self.closed == "both":

1414 return bool(

1415 (self._right[:-1] < self._left[1:]).all()

1416 or (self._left[:-1] > self._right[1:]).all()

1417 )

1418

1419 # non-strict inequality when closed != 'both'; at least one side is

1420 # not included in the intervals, so equality does not imply overlapping

1421 return bool(

1422 (self._right[:-1] <= self._left[1:]).all()

1423 or (self._left[:-1] >= self._right[1:]).all()

1424 )

1425

1426 # ---------------------------------------------------------------------

1427 # Conversion

1428

1429 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:

1430 """

1431 Return the IntervalArray's data as a numpy array of Interval

1432 objects (with dtype='object')

1433 """

1434 left = self._left

1435 right = self._right

1436 mask = self.isna()

1437 closed = self.closed

1438

1439 result = np.empty(len(left), dtype=object)

1440 for i in range(len(left)):

1441 if mask[i]:

1442 result[i] = np.nan

1443 else:

1444 result[i] = Interval(left[i], right[i], closed)

1445 return result

1446

1447 def __arrow_array__(self, type=None):

1448 """

1449 Convert myself into a pyarrow Array.

1450 """

1451 import pyarrow

1452

1453 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType

1454

1455 try:

1456 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype)

1457 except TypeError as err:

1458 raise TypeError(

1459 f"Conversion to arrow with subtype '{self.dtype.subtype}' "

1460 "is not supported"

1461 ) from err

1462 interval_type = ArrowIntervalType(subtype, self.closed)

1463 storage_array = pyarrow.StructArray.from_arrays(

1464 [

1465 pyarrow.array(self._left, type=subtype, from_pandas=True),

1466 pyarrow.array(self._right, type=subtype, from_pandas=True),

1467 ],

1468 names=["left", "right"],

1469 )

1470 mask = self.isna()

1471 if mask.any():

1472 # if there are missing values, set validity bitmap also on the array level

1473 null_bitmap = pyarrow.array(~mask).buffers()[1]

1474 storage_array = pyarrow.StructArray.from_buffers(

1475 storage_array.type,

1476 len(storage_array),

1477 [null_bitmap],

1478 children=[storage_array.field(0), storage_array.field(1)],

1479 )

1480

1481 if type is not None:

1482 if type.equals(interval_type.storage_type):

1483 return storage_array

1484 elif isinstance(type, ArrowIntervalType):

1485 # ensure we have the same subtype and closed attributes

1486 if not type.equals(interval_type):

1487 raise TypeError(

1488 "Not supported to convert IntervalArray to type with "

1489 f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) "

1490 f"and 'closed' ({self.closed} vs {type.closed}) attributes"

1491 )

1492 else:

1493 raise TypeError(

1494 f"Not supported to convert IntervalArray to '{type}' type"

1495 )

1496

1497 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array)

1498

1499 _interval_shared_docs[

1500 "to_tuples"

1501 ] = """

1502 Return an %(return_type)s of tuples of the form (left, right).

1503

1504 Parameters

1505 ----------

1506 na_tuple : bool, default True

1507 Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA

1508 value itself if False, ``nan``.

1509

1510 Returns

1511 -------

1512 tuples: %(return_type)s

1513 %(examples)s\

1514 """

1515

1516 @Appender(

1517 _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""}

1518 )

1519 def to_tuples(self, na_tuple=True) -> np.ndarray:

1520 tuples = com.asarray_tuplesafe(zip(self._left, self._right))

1521 if not na_tuple:

1522 # GH 18756

1523 tuples = np.where(~self.isna(), tuples, np.nan)

1524 return tuples

1525

1526 # ---------------------------------------------------------------------

1527

1528 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:

1529 value_left, value_right = self._validate_setitem_value(value)

1530

1531 if isinstance(self._left, np.ndarray):

1532 np.putmask(self._left, mask, value_left)

1533 np.putmask(self._right, mask, value_right)

1534 else:

1535 self._left._putmask(mask, value_left)

1536 self._right._putmask(mask, value_right)

1537

1538 def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT:

1539 """

1540 Return a new IntervalArray inserting new item at location. Follows

1541 Python numpy.insert semantics for negative values. Only Interval

1542 objects and NA can be inserted into an IntervalIndex

1543

1544 Parameters

1545 ----------

1546 loc : int

1547 item : Interval

1548

1549 Returns

1550 -------

1551 IntervalArray

1552 """

1553 left_insert, right_insert = self._validate_scalar(item)

1554

1555 new_left = self.left.insert(loc, left_insert)

1556 new_right = self.right.insert(loc, right_insert)

1557

1558 return self._shallow_copy(new_left, new_right)

1559

1560 def delete(self: IntervalArrayT, loc) -> IntervalArrayT:

1561 if isinstance(self._left, np.ndarray):

1562 new_left = np.delete(self._left, loc)

1563 new_right = np.delete(self._right, loc)

1564 else:

1565 new_left = self._left.delete(loc)

1566 new_right = self._right.delete(loc)

1567 return self._shallow_copy(left=new_left, right=new_right)

1568

1569 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs)

1570 def repeat(

1571 self: IntervalArrayT,

1572 repeats: int | Sequence[int],

1573 axis: int | None = None,

1574 ) -> IntervalArrayT:

1575 nv.validate_repeat((), {"axis": axis})

1576 left_repeat = self.left.repeat(repeats)

1577 right_repeat = self.right.repeat(repeats)

1578 return self._shallow_copy(left=left_repeat, right=right_repeat)

1579

1580 _interval_shared_docs["contains"] = textwrap.dedent(

1581 """

1582 Check elementwise if the Intervals contain the value.

1583

1584 Return a boolean mask whether the value is contained in the Intervals

1585 of the %(klass)s.

1586

1587 .. versionadded:: 0.25.0

1588

1589 Parameters

1590 ----------

1591 other : scalar

1592 The value to check whether it is contained in the Intervals.

1593

1594 Returns

1595 -------

1596 boolean array

1597

1598 See Also

1599 --------

1600 Interval.contains : Check whether Interval object contains value.

1601 %(klass)s.overlaps : Check if an Interval overlaps the values in the

1602 %(klass)s.

1603

1604 Examples

1605 --------

1606 %(examples)s

1607 >>> intervals.contains(0.5)

1608 array([ True, False, False])

1609 """

1610 )

1611

1612 @Appender(

1613 _interval_shared_docs["contains"]

1614 % {

1615 "klass": "IntervalArray",

1616 "examples": textwrap.dedent(

1617 """\

1618 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)])

1619 >>> intervals

1620 <IntervalArray>

1621 [(0, 1], (1, 3], (2, 4]]

1622 Length: 3, dtype: interval[int64, right]

1623 """

1624 ),

1625 }

1626 )

1627 def contains(self, other):

1628 if isinstance(other, Interval):

1629 raise NotImplementedError("contains not implemented for two intervals")

1630

1631 return (self._left < other if self.open_left else self._left <= other) & (

1632 other < self._right if self.open_right else other <= self._right

1633 )

1634

1635 def isin(self, values) -> npt.NDArray[np.bool_]:

1636 if not hasattr(values, "dtype"):

1637 values = np.array(values)

1638 values = extract_array(values, extract_numpy=True)

1639

1640 if is_interval_dtype(values.dtype):

1641 if self.closed != values.closed:

1642 # not comparable -> no overlap

1643 return np.zeros(self.shape, dtype=bool)

1644

1645 if is_dtype_equal(self.dtype, values.dtype):

1646 # GH#38353 instead of casting to object, operating on a

1647 # complex128 ndarray is much more performant.

1648 left = self._combined.view("complex128")

1649 right = values._combined.view("complex128")

1650 # error: Argument 1 to "in1d" has incompatible type

1651 # "Union[ExtensionArray, ndarray[Any, Any],

1652 # ndarray[Any, dtype[Any]]]"; expected

1653 # "Union[_SupportsArray[dtype[Any]],

1654 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,

1655 # int, float, complex, str, bytes, _NestedSequence[

1656 # Union[bool, int, float, complex, str, bytes]]]"

1657 return np.in1d(left, right) # type: ignore[arg-type]

1658

1659 elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(

1660 values.left.dtype

1661 ):

1662 # not comparable -> no overlap

1663 return np.zeros(self.shape, dtype=bool)

1664

1665 return isin(self.astype(object), values.astype(object))

1666

1667 @property

1668 def _combined(self) -> ArrayLike:

1669 left = self.left._values.reshape(-1, 1)

1670 right = self.right._values.reshape(-1, 1)

1671 if needs_i8_conversion(left.dtype):

1672 comb = left._concat_same_type([left, right], axis=1)

1673 else:

1674 comb = np.concatenate([left, right], axis=1)

1675 return comb

1676

1677 def _from_combined(self, combined: np.ndarray) -> IntervalArray:

1678 """

1679 Create a new IntervalArray with our dtype from a 1D complex128 ndarray.

1680 """

1681 nc = combined.view("i8").reshape(-1, 2)

1682

1683 dtype = self._left.dtype

1684 if needs_i8_conversion(dtype):

1685 # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"

1686 new_left = type(self._left)._from_sequence( # type: ignore[attr-defined]

1687 nc[:, 0], dtype=dtype

1688 )

1689 # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"

1690 new_right = type(self._right)._from_sequence( # type: ignore[attr-defined]

1691 nc[:, 1], dtype=dtype

1692 )

1693 else:

1694 new_left = nc[:, 0].view(dtype)

1695 new_right = nc[:, 1].view(dtype)

1696 return self._shallow_copy(left=new_left, right=new_right)

1697

1698 def unique(self) -> IntervalArray:

1699 # No overload variant of "__getitem__" of "ExtensionArray" matches argument

1700 # type "Tuple[slice, int]"

1701 nc = unique(

1702 self._combined.view("complex128")[:, 0] # type: ignore[call-overload]

1703 )

1704 nc = nc[:, None]

1705 return self._from_combined(nc)

1706

1707

1708def _maybe_convert_platform_interval(values) -> ArrayLike:

1709 """

1710 Try to do platform conversion, with special casing for IntervalArray.

1711 Wrapper around maybe_convert_platform that alters the default return

1712 dtype in certain cases to be compatible with IntervalArray. For example,

1713 empty lists return with integer dtype instead of object dtype, which is

1714 prohibited for IntervalArray.

1715

1716 Parameters

1717 ----------

1718 values : array-like

1719

1720 Returns

1721 -------

1722 array

1723 """

1724 if isinstance(values, (list, tuple)) and len(values) == 0:

1725 # GH 19016

1726 # empty lists/tuples get object dtype by default, but this is

1727 # prohibited for IntervalArray, so coerce to integer instead

1728 return np.array([], dtype=np.int64)

1729 elif not is_list_like(values) or isinstance(values, ABCDataFrame):

1730 # This will raise later, but we avoid passing to maybe_convert_platform

1731 return values

1732 elif is_categorical_dtype(values):

1733 values = np.asarray(values)

1734 elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):

1735 # TODO: should we just cast these to list?

1736 return values

1737 else:

1738 values = extract_array(values, extract_numpy=True)

1739

1740 if not hasattr(values, "dtype"):

1741 return np.asarray(values)

1742 return values