Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/interval.py: 18%

610 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import operator 

4from operator import ( 

5 le, 

6 lt, 

7) 

8import textwrap 

9from typing import ( 

10 TYPE_CHECKING, 

11 Literal, 

12 Sequence, 

13 TypeVar, 

14 Union, 

15 cast, 

16 overload, 

17) 

18 

19import numpy as np 

20 

21from pandas._config import get_option 

22 

23from pandas._libs import lib 

24from pandas._libs.interval import ( 

25 VALID_CLOSED, 

26 Interval, 

27 IntervalMixin, 

28 intervals_to_interval_bounds, 

29) 

30from pandas._libs.missing import NA 

31from pandas._typing import ( 

32 ArrayLike, 

33 Dtype, 

34 IntervalClosedType, 

35 NpDtype, 

36 PositionalIndexer, 

37 ScalarIndexer, 

38 SequenceIndexer, 

39 npt, 

40) 

41from pandas.compat.numpy import function as nv 

42from pandas.errors import IntCastingNaNError 

43from pandas.util._decorators import ( 

44 Appender, 

45 deprecate_nonkeyword_arguments, 

46) 

47 

48from pandas.core.dtypes.cast import LossySetitemError 

49from pandas.core.dtypes.common import ( 

50 is_categorical_dtype, 

51 is_dtype_equal, 

52 is_float_dtype, 

53 is_integer_dtype, 

54 is_interval_dtype, 

55 is_list_like, 

56 is_object_dtype, 

57 is_scalar, 

58 is_string_dtype, 

59 needs_i8_conversion, 

60 pandas_dtype, 

61) 

62from pandas.core.dtypes.dtypes import IntervalDtype 

63from pandas.core.dtypes.generic import ( 

64 ABCDataFrame, 

65 ABCDatetimeIndex, 

66 ABCIntervalIndex, 

67 ABCPeriodIndex, 

68) 

69from pandas.core.dtypes.missing import ( 

70 is_valid_na_for_dtype, 

71 isna, 

72 notna, 

73) 

74 

75from pandas.core.algorithms import ( 

76 isin, 

77 take, 

78 unique, 

79 value_counts, 

80) 

81from pandas.core.arrays.base import ( 

82 ExtensionArray, 

83 _extension_array_shared_docs, 

84) 

85import pandas.core.common as com 

86from pandas.core.construction import ( 

87 array as pd_array, 

88 ensure_wrapped_if_datetimelike, 

89 extract_array, 

90) 

91from pandas.core.indexers import check_array_indexer 

92from pandas.core.indexes.base import ensure_index 

93from pandas.core.ops import ( 

94 invalid_comparison, 

95 unpack_zerodim_and_defer, 

96) 

97 

98if TYPE_CHECKING: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true

99 from pandas import ( 

100 Index, 

101 Series, 

102 ) 

103 

104 

105IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray") 

106IntervalOrNA = Union[Interval, float] 

107 

108_interval_shared_docs: dict[str, str] = {} 

109 

110_shared_docs_kwargs = { 

111 "klass": "IntervalArray", 

112 "qualname": "arrays.IntervalArray", 

113 "name": "", 

114} 

115 

116 

117_interval_shared_docs[ 

118 "class" 

119] = """ 

120%(summary)s 

121 

122.. versionadded:: %(versionadded)s 

123 

124Parameters 

125---------- 

126data : array-like (1-dimensional) 

127 Array-like containing Interval objects from which to build the 

128 %(klass)s. 

129closed : {'left', 'right', 'both', 'neither'}, default 'right' 

130 Whether the intervals are closed on the left-side, right-side, both or 

131 neither. 

132dtype : dtype or None, default None 

133 If None, dtype will be inferred. 

134copy : bool, default False 

135 Copy the input data. 

136%(name)s\ 

137verify_integrity : bool, default True 

138 Verify that the %(klass)s is valid. 

139 

140Attributes 

141---------- 

142left 

143right 

144closed 

145mid 

146length 

147is_empty 

148is_non_overlapping_monotonic 

149%(extra_attributes)s\ 

150 

151Methods 

152------- 

153from_arrays 

154from_tuples 

155from_breaks 

156contains 

157overlaps 

158set_closed 

159to_tuples 

160%(extra_methods)s\ 

161 

162See Also 

163-------- 

164Index : The base pandas Index type. 

165Interval : A bounded slice-like interval; the elements of an %(klass)s. 

166interval_range : Function to create a fixed frequency IntervalIndex. 

167cut : Bin values into discrete Intervals. 

168qcut : Bin values into equal-sized Intervals based on rank or sample quantiles. 

169 

170Notes 

171----- 

172See the `user guide 

173<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__ 

174for more. 

175 

176%(examples)s\ 

177""" 

178 

179 

180@Appender( 

181 _interval_shared_docs["class"] 

182 % { 

183 "klass": "IntervalArray", 

184 "summary": "Pandas array for interval data that are closed on the same side.", 

185 "versionadded": "0.24.0", 

186 "name": "", 

187 "extra_attributes": "", 

188 "extra_methods": "", 

189 "examples": textwrap.dedent( 

190 """\ 

191 Examples 

192 -------- 

193 A new ``IntervalArray`` can be constructed directly from an array-like of 

194 ``Interval`` objects: 

195 

196 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) 

197 <IntervalArray> 

198 [(0, 1], (1, 5]] 

199 Length: 2, dtype: interval[int64, right] 

200 

201 It may also be constructed using one of the constructor 

202 methods: :meth:`IntervalArray.from_arrays`, 

203 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. 

204 """ 

205 ), 

206 } 

207) 

208class IntervalArray(IntervalMixin, ExtensionArray): 

209 can_hold_na = True 

210 _na_value = _fill_value = np.nan 

211 

212 @property 

213 def ndim(self) -> Literal[1]: 

214 return 1 

215 

216 # To make mypy recognize the fields 

217 _left: np.ndarray 

218 _right: np.ndarray 

219 _dtype: IntervalDtype 

220 

221 # --------------------------------------------------------------------- 

222 # Constructors 

223 

224 def __new__( 

225 cls: type[IntervalArrayT], 

226 data, 

227 closed=None, 

228 dtype: Dtype | None = None, 

229 copy: bool = False, 

230 verify_integrity: bool = True, 

231 ): 

232 

233 data = extract_array(data, extract_numpy=True) 

234 

235 if isinstance(data, cls): 

236 left = data._left 

237 right = data._right 

238 closed = closed or data.closed 

239 else: 

240 

241 # don't allow scalars 

242 if is_scalar(data): 

243 msg = ( 

244 f"{cls.__name__}(...) must be called with a collection " 

245 f"of some kind, {data} was passed" 

246 ) 

247 raise TypeError(msg) 

248 

249 # might need to convert empty or purely na data 

250 data = _maybe_convert_platform_interval(data) 

251 left, right, infer_closed = intervals_to_interval_bounds( 

252 data, validate_closed=closed is None 

253 ) 

254 if left.dtype == object: 

255 left = lib.maybe_convert_objects(left) 

256 right = lib.maybe_convert_objects(right) 

257 closed = closed or infer_closed 

258 

259 return cls._simple_new( 

260 left, 

261 right, 

262 closed, 

263 copy=copy, 

264 dtype=dtype, 

265 verify_integrity=verify_integrity, 

266 ) 

267 

268 @classmethod 

269 def _simple_new( 

270 cls: type[IntervalArrayT], 

271 left, 

272 right, 

273 closed: IntervalClosedType | None = None, 

274 copy: bool = False, 

275 dtype: Dtype | None = None, 

276 verify_integrity: bool = True, 

277 ) -> IntervalArrayT: 

278 result = IntervalMixin.__new__(cls) 

279 

280 if closed is None and isinstance(dtype, IntervalDtype): 

281 closed = dtype.closed 

282 

283 closed = closed or "right" 

284 left = ensure_index(left, copy=copy) 

285 right = ensure_index(right, copy=copy) 

286 

287 if dtype is not None: 

288 # GH 19262: dtype must be an IntervalDtype to override inferred 

289 dtype = pandas_dtype(dtype) 

290 if is_interval_dtype(dtype): 

291 dtype = cast(IntervalDtype, dtype) 

292 if dtype.subtype is not None: 

293 left = left.astype(dtype.subtype) 

294 right = right.astype(dtype.subtype) 

295 else: 

296 msg = f"dtype must be an IntervalDtype, got {dtype}" 

297 raise TypeError(msg) 

298 

299 if dtype.closed is None: 

300 # possibly loading an old pickle 

301 dtype = IntervalDtype(dtype.subtype, closed) 

302 elif closed != dtype.closed: 

303 raise ValueError("closed keyword does not match dtype.closed") 

304 

305 # coerce dtypes to match if needed 

306 if is_float_dtype(left) and is_integer_dtype(right): 

307 right = right.astype(left.dtype) 

308 elif is_float_dtype(right) and is_integer_dtype(left): 

309 left = left.astype(right.dtype) 

310 

311 if type(left) != type(right): 

312 msg = ( 

313 f"must not have differing left [{type(left).__name__}] and " 

314 f"right [{type(right).__name__}] types" 

315 ) 

316 raise ValueError(msg) 

317 elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): 

318 # GH 19016 

319 msg = ( 

320 "category, object, and string subtypes are not supported " 

321 "for IntervalArray" 

322 ) 

323 raise TypeError(msg) 

324 elif isinstance(left, ABCPeriodIndex): 

325 msg = "Period dtypes are not supported, use a PeriodIndex instead" 

326 raise ValueError(msg) 

327 elif isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz): 

328 msg = ( 

329 "left and right must have the same time zone, got " 

330 f"'{left.tz}' and '{right.tz}'" 

331 ) 

332 raise ValueError(msg) 

333 

334 # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray 

335 left = ensure_wrapped_if_datetimelike(left) 

336 left = extract_array(left, extract_numpy=True) 

337 right = ensure_wrapped_if_datetimelike(right) 

338 right = extract_array(right, extract_numpy=True) 

339 

340 lbase = getattr(left, "_ndarray", left).base 

341 rbase = getattr(right, "_ndarray", right).base 

342 if lbase is not None and lbase is rbase: 

343 # If these share data, then setitem could corrupt our IA 

344 right = right.copy() 

345 

346 dtype = IntervalDtype(left.dtype, closed=closed) 

347 result._dtype = dtype 

348 

349 result._left = left 

350 result._right = right 

351 if verify_integrity: 

352 result._validate() 

353 return result 

354 

355 @classmethod 

356 def _from_sequence( 

357 cls: type[IntervalArrayT], 

358 scalars, 

359 *, 

360 dtype: Dtype | None = None, 

361 copy: bool = False, 

362 ) -> IntervalArrayT: 

363 return cls(scalars, dtype=dtype, copy=copy) 

364 

365 @classmethod 

366 def _from_factorized( 

367 cls: type[IntervalArrayT], values: np.ndarray, original: IntervalArrayT 

368 ) -> IntervalArrayT: 

369 if len(values) == 0: 

370 # An empty array returns object-dtype here. We can't create 

371 # a new IA from an (empty) object-dtype array, so turn it into the 

372 # correct dtype. 

373 values = values.astype(original.dtype.subtype) 

374 return cls(values, closed=original.closed) 

375 

376 _interval_shared_docs["from_breaks"] = textwrap.dedent( 

377 """ 

378 Construct an %(klass)s from an array of splits. 

379 

380 Parameters 

381 ---------- 

382 breaks : array-like (1-dimensional) 

383 Left and right bounds for each interval. 

384 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

385 Whether the intervals are closed on the left-side, right-side, both 

386 or neither. 

387 copy : bool, default False 

388 Copy the data. 

389 dtype : dtype or None, default None 

390 If None, dtype will be inferred. 

391 

392 Returns 

393 ------- 

394 %(klass)s 

395 

396 See Also 

397 -------- 

398 interval_range : Function to create a fixed frequency IntervalIndex. 

399 %(klass)s.from_arrays : Construct from a left and right array. 

400 %(klass)s.from_tuples : Construct from a sequence of tuples. 

401 

402 %(examples)s\ 

403 """ 

404 ) 

405 

406 @classmethod 

407 @Appender( 

408 _interval_shared_docs["from_breaks"] 

409 % { 

410 "klass": "IntervalArray", 

411 "examples": textwrap.dedent( 

412 """\ 

413 Examples 

414 -------- 

415 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3]) 

416 <IntervalArray> 

417 [(0, 1], (1, 2], (2, 3]] 

418 Length: 3, dtype: interval[int64, right] 

419 """ 

420 ), 

421 } 

422 ) 

423 def from_breaks( 

424 cls: type[IntervalArrayT], 

425 breaks, 

426 closed: IntervalClosedType | None = "right", 

427 copy: bool = False, 

428 dtype: Dtype | None = None, 

429 ) -> IntervalArrayT: 

430 breaks = _maybe_convert_platform_interval(breaks) 

431 

432 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype) 

433 

434 _interval_shared_docs["from_arrays"] = textwrap.dedent( 

435 """ 

436 Construct from two arrays defining the left and right bounds. 

437 

438 Parameters 

439 ---------- 

440 left : array-like (1-dimensional) 

441 Left bounds for each interval. 

442 right : array-like (1-dimensional) 

443 Right bounds for each interval. 

444 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

445 Whether the intervals are closed on the left-side, right-side, both 

446 or neither. 

447 copy : bool, default False 

448 Copy the data. 

449 dtype : dtype, optional 

450 If None, dtype will be inferred. 

451 

452 Returns 

453 ------- 

454 %(klass)s 

455 

456 Raises 

457 ------ 

458 ValueError 

459 When a value is missing in only one of `left` or `right`. 

460 When a value in `left` is greater than the corresponding value 

461 in `right`. 

462 

463 See Also 

464 -------- 

465 interval_range : Function to create a fixed frequency IntervalIndex. 

466 %(klass)s.from_breaks : Construct an %(klass)s from an array of 

467 splits. 

468 %(klass)s.from_tuples : Construct an %(klass)s from an 

469 array-like of tuples. 

470 

471 Notes 

472 ----- 

473 Each element of `left` must be less than or equal to the `right` 

474 element at the same position. If an element is missing, it must be 

475 missing in both `left` and `right`. A TypeError is raised when 

476 using an unsupported type for `left` or `right`. At the moment, 

477 'category', 'object', and 'string' subtypes are not supported. 

478 

479 %(examples)s\ 

480 """ 

481 ) 

482 

483 @classmethod 

484 @Appender( 

485 _interval_shared_docs["from_arrays"] 

486 % { 

487 "klass": "IntervalArray", 

488 "examples": textwrap.dedent( 

489 """\ 

490 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3]) 

491 <IntervalArray> 

492 [(0, 1], (1, 2], (2, 3]] 

493 Length: 3, dtype: interval[int64, right] 

494 """ 

495 ), 

496 } 

497 ) 

498 def from_arrays( 

499 cls: type[IntervalArrayT], 

500 left, 

501 right, 

502 closed: IntervalClosedType | None = "right", 

503 copy: bool = False, 

504 dtype: Dtype | None = None, 

505 ) -> IntervalArrayT: 

506 left = _maybe_convert_platform_interval(left) 

507 right = _maybe_convert_platform_interval(right) 

508 

509 return cls._simple_new( 

510 left, right, closed, copy=copy, dtype=dtype, verify_integrity=True 

511 ) 

512 

513 _interval_shared_docs["from_tuples"] = textwrap.dedent( 

514 """ 

515 Construct an %(klass)s from an array-like of tuples. 

516 

517 Parameters 

518 ---------- 

519 data : array-like (1-dimensional) 

520 Array of tuples. 

521 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

522 Whether the intervals are closed on the left-side, right-side, both 

523 or neither. 

524 copy : bool, default False 

525 By-default copy the data, this is compat only and ignored. 

526 dtype : dtype or None, default None 

527 If None, dtype will be inferred. 

528 

529 Returns 

530 ------- 

531 %(klass)s 

532 

533 See Also 

534 -------- 

535 interval_range : Function to create a fixed frequency IntervalIndex. 

536 %(klass)s.from_arrays : Construct an %(klass)s from a left and 

537 right array. 

538 %(klass)s.from_breaks : Construct an %(klass)s from an array of 

539 splits. 

540 

541 %(examples)s\ 

542 """ 

543 ) 

544 

545 @classmethod 

546 @Appender( 

547 _interval_shared_docs["from_tuples"] 

548 % { 

549 "klass": "IntervalArray", 

550 "examples": textwrap.dedent( 

551 """\ 

552 Examples 

553 -------- 

554 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)]) 

555 <IntervalArray> 

556 [(0, 1], (1, 2]] 

557 Length: 2, dtype: interval[int64, right] 

558 """ 

559 ), 

560 } 

561 ) 

562 def from_tuples( 

563 cls: type[IntervalArrayT], 

564 data, 

565 closed="right", 

566 copy: bool = False, 

567 dtype: Dtype | None = None, 

568 ) -> IntervalArrayT: 

569 if len(data): 

570 left, right = [], [] 

571 else: 

572 # ensure that empty data keeps input dtype 

573 left = right = data 

574 

575 for d in data: 

576 if isna(d): 

577 lhs = rhs = np.nan 

578 else: 

579 name = cls.__name__ 

580 try: 

581 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] 

582 lhs, rhs = d 

583 except ValueError as err: 

584 msg = f"{name}.from_tuples requires tuples of length 2, got {d}" 

585 raise ValueError(msg) from err 

586 except TypeError as err: 

587 msg = f"{name}.from_tuples received an invalid item, {d}" 

588 raise TypeError(msg) from err 

589 left.append(lhs) 

590 right.append(rhs) 

591 

592 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) 

593 

594 def _validate(self): 

595 """ 

596 Verify that the IntervalArray is valid. 

597 

598 Checks that 

599 

600 * closed is valid 

601 * left and right match lengths 

602 * left and right have the same missing values 

603 * left is always below right 

604 """ 

605 if self.closed not in VALID_CLOSED: 

606 msg = f"invalid option for 'closed': {self.closed}" 

607 raise ValueError(msg) 

608 if len(self._left) != len(self._right): 

609 msg = "left and right must have the same length" 

610 raise ValueError(msg) 

611 left_mask = notna(self._left) 

612 right_mask = notna(self._right) 

613 if not (left_mask == right_mask).all(): 

614 msg = ( 

615 "missing values must be missing in the same " 

616 "location both left and right sides" 

617 ) 

618 raise ValueError(msg) 

619 if not (self._left[left_mask] <= self._right[left_mask]).all(): 

620 msg = "left side of interval must be <= right side" 

621 raise ValueError(msg) 

622 

623 def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT: 

624 """ 

625 Return a new IntervalArray with the replacement attributes 

626 

627 Parameters 

628 ---------- 

629 left : Index 

630 Values to be used for the left-side of the intervals. 

631 right : Index 

632 Values to be used for the right-side of the intervals. 

633 """ 

634 return self._simple_new(left, right, closed=self.closed, verify_integrity=False) 

635 

636 # --------------------------------------------------------------------- 

637 # Descriptive 

638 

639 @property 

640 def dtype(self) -> IntervalDtype: 

641 return self._dtype 

642 

643 @property 

644 def nbytes(self) -> int: 

645 return self.left.nbytes + self.right.nbytes 

646 

647 @property 

648 def size(self) -> int: 

649 # Avoid materializing self.values 

650 return self.left.size 

651 

652 # --------------------------------------------------------------------- 

653 # EA Interface 

654 

655 def __iter__(self): 

656 return iter(np.asarray(self)) 

657 

658 def __len__(self) -> int: 

659 return len(self._left) 

660 

661 @overload 

662 def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA: 

663 ... 

664 

665 @overload 

666 def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT: 

667 ... 

668 

669 def __getitem__( 

670 self: IntervalArrayT, key: PositionalIndexer 

671 ) -> IntervalArrayT | IntervalOrNA: 

672 key = check_array_indexer(self, key) 

673 left = self._left[key] 

674 right = self._right[key] 

675 

676 if not isinstance(left, (np.ndarray, ExtensionArray)): 

677 # scalar 

678 if is_scalar(left) and isna(left): 

679 return self._fill_value 

680 return Interval(left, right, self.closed) 

681 if np.ndim(left) > 1: 

682 # GH#30588 multi-dimensional indexer disallowed 

683 raise ValueError("multi-dimensional indexing not allowed") 

684 return self._shallow_copy(left, right) 

685 

686 def __setitem__(self, key, value) -> None: 

687 value_left, value_right = self._validate_setitem_value(value) 

688 key = check_array_indexer(self, key) 

689 

690 self._left[key] = value_left 

691 self._right[key] = value_right 

692 

693 def _cmp_method(self, other, op): 

694 # ensure pandas array for list-like and eliminate non-interval scalars 

695 if is_list_like(other): 

696 if len(self) != len(other): 

697 raise ValueError("Lengths must match to compare") 

698 other = pd_array(other) 

699 elif not isinstance(other, Interval): 

700 # non-interval scalar -> no matches 

701 if other is NA: 

702 # GH#31882 

703 from pandas.core.arrays import BooleanArray 

704 

705 arr = np.empty(self.shape, dtype=bool) 

706 mask = np.ones(self.shape, dtype=bool) 

707 return BooleanArray(arr, mask) 

708 return invalid_comparison(self, other, op) 

709 

710 # determine the dtype of the elements we want to compare 

711 if isinstance(other, Interval): 

712 other_dtype = pandas_dtype("interval") 

713 elif not is_categorical_dtype(other.dtype): 

714 other_dtype = other.dtype 

715 else: 

716 # for categorical defer to categories for dtype 

717 other_dtype = other.categories.dtype 

718 

719 # extract intervals if we have interval categories with matching closed 

720 if is_interval_dtype(other_dtype): 

721 if self.closed != other.categories.closed: 

722 return invalid_comparison(self, other, op) 

723 

724 other = other.categories.take( 

725 other.codes, allow_fill=True, fill_value=other.categories._na_value 

726 ) 

727 

728 # interval-like -> need same closed and matching endpoints 

729 if is_interval_dtype(other_dtype): 

730 if self.closed != other.closed: 

731 return invalid_comparison(self, other, op) 

732 elif not isinstance(other, Interval): 

733 other = type(self)(other) 

734 

735 if op is operator.eq: 

736 return (self._left == other.left) & (self._right == other.right) 

737 elif op is operator.ne: 

738 return (self._left != other.left) | (self._right != other.right) 

739 elif op is operator.gt: 

740 return (self._left > other.left) | ( 

741 (self._left == other.left) & (self._right > other.right) 

742 ) 

743 elif op is operator.ge: 

744 return (self == other) | (self > other) 

745 elif op is operator.lt: 

746 return (self._left < other.left) | ( 

747 (self._left == other.left) & (self._right < other.right) 

748 ) 

749 else: 

750 # operator.lt 

751 return (self == other) | (self < other) 

752 

753 # non-interval/non-object dtype -> no matches 

754 if not is_object_dtype(other_dtype): 

755 return invalid_comparison(self, other, op) 

756 

757 # object dtype -> iteratively check for intervals 

758 result = np.zeros(len(self), dtype=bool) 

759 for i, obj in enumerate(other): 

760 try: 

761 result[i] = op(self[i], obj) 

762 except TypeError: 

763 if obj is NA: 

764 # comparison with np.nan returns NA 

765 # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092 

766 result = result.astype(object) 

767 result[i] = NA 

768 else: 

769 raise 

770 return result 

771 

772 @unpack_zerodim_and_defer("__eq__") 

773 def __eq__(self, other): 

774 return self._cmp_method(other, operator.eq) 

775 

776 @unpack_zerodim_and_defer("__ne__") 

777 def __ne__(self, other): 

778 return self._cmp_method(other, operator.ne) 

779 

780 @unpack_zerodim_and_defer("__gt__") 

781 def __gt__(self, other): 

782 return self._cmp_method(other, operator.gt) 

783 

784 @unpack_zerodim_and_defer("__ge__") 

785 def __ge__(self, other): 

786 return self._cmp_method(other, operator.ge) 

787 

788 @unpack_zerodim_and_defer("__lt__") 

789 def __lt__(self, other): 

790 return self._cmp_method(other, operator.lt) 

791 

792 @unpack_zerodim_and_defer("__le__") 

793 def __le__(self, other): 

794 return self._cmp_method(other, operator.le) 

795 

796 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) 

797 def argsort( 

798 self, 

799 ascending: bool = True, 

800 kind: str = "quicksort", 

801 na_position: str = "last", 

802 *args, 

803 **kwargs, 

804 ) -> np.ndarray: 

805 ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) 

806 

807 if ascending and kind == "quicksort" and na_position == "last": 

808 return np.lexsort((self.right, self.left)) 

809 

810 # TODO: other cases we can use lexsort for? much more performant. 

811 return super().argsort( 

812 ascending=ascending, kind=kind, na_position=na_position, **kwargs 

813 ) 

814 

815 def min(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA: 

816 nv.validate_minmax_axis(axis, self.ndim) 

817 

818 if not len(self): 

819 return self._na_value 

820 

821 mask = self.isna() 

822 if mask.any(): 

823 if not skipna: 

824 return self._na_value 

825 obj = self[~mask] 

826 else: 

827 obj = self 

828 

829 indexer = obj.argsort()[0] 

830 return obj[indexer] 

831 

832 def max(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA: 

833 nv.validate_minmax_axis(axis, self.ndim) 

834 

835 if not len(self): 

836 return self._na_value 

837 

838 mask = self.isna() 

839 if mask.any(): 

840 if not skipna: 

841 return self._na_value 

842 obj = self[~mask] 

843 else: 

844 obj = self 

845 

846 indexer = obj.argsort()[-1] 

847 return obj[indexer] 

848 

849 def fillna( 

850 self: IntervalArrayT, value=None, method=None, limit=None 

851 ) -> IntervalArrayT: 

852 """ 

853 Fill NA/NaN values using the specified method. 

854 

855 Parameters 

856 ---------- 

857 value : scalar, dict, Series 

858 If a scalar value is passed it is used to fill all missing values. 

859 Alternatively, a Series or dict can be used to fill in different 

860 values for each index. The value should not be a list. The 

861 value(s) passed should be either Interval objects or NA/NaN. 

862 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None 

863 (Not implemented yet for IntervalArray) 

864 Method to use for filling holes in reindexed Series 

865 limit : int, default None 

866 (Not implemented yet for IntervalArray) 

867 If method is specified, this is the maximum number of consecutive 

868 NaN values to forward/backward fill. In other words, if there is 

869 a gap with more than this number of consecutive NaNs, it will only 

870 be partially filled. If method is not specified, this is the 

871 maximum number of entries along the entire axis where NaNs will be 

872 filled. 

873 

874 Returns 

875 ------- 

876 filled : IntervalArray with NA/NaN filled 

877 """ 

878 if method is not None: 

879 raise TypeError("Filling by method is not supported for IntervalArray.") 

880 if limit is not None: 

881 raise TypeError("limit is not supported for IntervalArray.") 

882 

883 value_left, value_right = self._validate_scalar(value) 

884 

885 left = self.left.fillna(value=value_left) 

886 right = self.right.fillna(value=value_right) 

887 return self._shallow_copy(left, right) 

888 

889 def astype(self, dtype, copy: bool = True): 

890 """ 

891 Cast to an ExtensionArray or NumPy array with dtype 'dtype'. 

892 

893 Parameters 

894 ---------- 

895 dtype : str or dtype 

896 Typecode or data-type to which the array is cast. 

897 

898 copy : bool, default True 

899 Whether to copy the data, even if not necessary. If False, 

900 a copy is made only if the old dtype does not match the 

901 new dtype. 

902 

903 Returns 

904 ------- 

905 array : ExtensionArray or ndarray 

906 ExtensionArray or NumPy ndarray with 'dtype' for its dtype. 

907 """ 

908 from pandas import Index 

909 

910 if dtype is not None: 

911 dtype = pandas_dtype(dtype) 

912 

913 if is_interval_dtype(dtype): 

914 if dtype == self.dtype: 

915 return self.copy() if copy else self 

916 

917 # need to cast to different subtype 

918 try: 

919 # We need to use Index rules for astype to prevent casting 

920 # np.nan entries to int subtypes 

921 new_left = Index(self._left, copy=False).astype(dtype.subtype) 

922 new_right = Index(self._right, copy=False).astype(dtype.subtype) 

923 except IntCastingNaNError: 

924 # e.g test_subtype_integer 

925 raise 

926 except (TypeError, ValueError) as err: 

927 # e.g. test_subtype_integer_errors f8->u8 can be lossy 

928 # and raises ValueError 

929 msg = ( 

930 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" 

931 ) 

932 raise TypeError(msg) from err 

933 return self._shallow_copy(new_left, new_right) 

934 else: 

935 try: 

936 return super().astype(dtype, copy=copy) 

937 except (TypeError, ValueError) as err: 

938 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" 

939 raise TypeError(msg) from err 

940 

941 def equals(self, other) -> bool: 

942 if type(self) != type(other): 

943 return False 

944 

945 return bool( 

946 self.closed == other.closed 

947 and self.left.equals(other.left) 

948 and self.right.equals(other.right) 

949 ) 

950 

951 @classmethod 

952 def _concat_same_type( 

953 cls: type[IntervalArrayT], to_concat: Sequence[IntervalArrayT] 

954 ) -> IntervalArrayT: 

955 """ 

956 Concatenate multiple IntervalArray 

957 

958 Parameters 

959 ---------- 

960 to_concat : sequence of IntervalArray 

961 

962 Returns 

963 ------- 

964 IntervalArray 

965 """ 

966 closed_set = {interval.closed for interval in to_concat} 

967 if len(closed_set) != 1: 

968 raise ValueError("Intervals must all be closed on the same side.") 

969 closed = closed_set.pop() 

970 

971 left = np.concatenate([interval.left for interval in to_concat]) 

972 right = np.concatenate([interval.right for interval in to_concat]) 

973 return cls._simple_new(left, right, closed=closed, copy=False) 

974 

975 def copy(self: IntervalArrayT) -> IntervalArrayT: 

976 """ 

977 Return a copy of the array. 

978 

979 Returns 

980 ------- 

981 IntervalArray 

982 """ 

983 left = self._left.copy() 

984 right = self._right.copy() 

985 closed = self.closed 

986 # TODO: Could skip verify_integrity here. 

987 return type(self).from_arrays(left, right, closed=closed) 

988 

989 def isna(self) -> np.ndarray: 

990 return isna(self._left) 

991 

992 def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray: 

993 if not len(self) or periods == 0: 

994 return self.copy() 

995 

996 if isna(fill_value): 

997 fill_value = self.dtype.na_value 

998 

999 # ExtensionArray.shift doesn't work for two reasons 

1000 # 1. IntervalArray.dtype.na_value may not be correct for the dtype. 

1001 # 2. IntervalArray._from_sequence only accepts NaN for missing values, 

1002 # not other values like NaT 

1003 

1004 empty_len = min(abs(periods), len(self)) 

1005 if isna(fill_value): 

1006 from pandas import Index 

1007 

1008 fill_value = Index(self._left, copy=False)._na_value 

1009 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) 

1010 else: 

1011 empty = self._from_sequence([fill_value] * empty_len) 

1012 

1013 if periods > 0: 

1014 a = empty 

1015 b = self[:-periods] 

1016 else: 

1017 a = self[abs(periods) :] 

1018 b = empty 

1019 return self._concat_same_type([a, b]) 

1020 

1021 def take( 

1022 self: IntervalArrayT, 

1023 indices, 

1024 *, 

1025 allow_fill: bool = False, 

1026 fill_value=None, 

1027 axis=None, 

1028 **kwargs, 

1029 ) -> IntervalArrayT: 

1030 """ 

1031 Take elements from the IntervalArray. 

1032 

1033 Parameters 

1034 ---------- 

1035 indices : sequence of integers 

1036 Indices to be taken. 

1037 

1038 allow_fill : bool, default False 

1039 How to handle negative values in `indices`. 

1040 

1041 * False: negative values in `indices` indicate positional indices 

1042 from the right (the default). This is similar to 

1043 :func:`numpy.take`. 

1044 

1045 * True: negative values in `indices` indicate 

1046 missing values. These values are set to `fill_value`. Any other 

1047 other negative values raise a ``ValueError``. 

1048 

1049 fill_value : Interval or NA, optional 

1050 Fill value to use for NA-indices when `allow_fill` is True. 

1051 This may be ``None``, in which case the default NA value for 

1052 the type, ``self.dtype.na_value``, is used. 

1053 

1054 For many ExtensionArrays, there will be two representations of 

1055 `fill_value`: a user-facing "boxed" scalar, and a low-level 

1056 physical NA value. `fill_value` should be the user-facing version, 

1057 and the implementation should handle translating that to the 

1058 physical version for processing the take if necessary. 

1059 

1060 axis : any, default None 

1061 Present for compat with IntervalIndex; does nothing. 

1062 

1063 Returns 

1064 ------- 

1065 IntervalArray 

1066 

1067 Raises 

1068 ------ 

1069 IndexError 

1070 When the indices are out of bounds for the array. 

1071 ValueError 

1072 When `indices` contains negative values other than ``-1`` 

1073 and `allow_fill` is True. 

1074 """ 

1075 nv.validate_take((), kwargs) 

1076 

1077 fill_left = fill_right = fill_value 

1078 if allow_fill: 

1079 fill_left, fill_right = self._validate_scalar(fill_value) 

1080 

1081 left_take = take( 

1082 self._left, indices, allow_fill=allow_fill, fill_value=fill_left 

1083 ) 

1084 right_take = take( 

1085 self._right, indices, allow_fill=allow_fill, fill_value=fill_right 

1086 ) 

1087 

1088 return self._shallow_copy(left_take, right_take) 

1089 

1090 def _validate_listlike(self, value): 

1091 # list-like of intervals 

1092 try: 

1093 array = IntervalArray(value) 

1094 self._check_closed_matches(array, name="value") 

1095 value_left, value_right = array.left, array.right 

1096 except TypeError as err: 

1097 # wrong type: not interval or NA 

1098 msg = f"'value' should be an interval type, got {type(value)} instead." 

1099 raise TypeError(msg) from err 

1100 

1101 try: 

1102 self.left._validate_fill_value(value_left) 

1103 except (LossySetitemError, TypeError) as err: 

1104 msg = ( 

1105 "'value' should be a compatible interval type, " 

1106 f"got {type(value)} instead." 

1107 ) 

1108 raise TypeError(msg) from err 

1109 

1110 return value_left, value_right 

1111 

1112 def _validate_scalar(self, value): 

1113 if isinstance(value, Interval): 

1114 self._check_closed_matches(value, name="value") 

1115 left, right = value.left, value.right 

1116 # TODO: check subdtype match like _validate_setitem_value? 

1117 elif is_valid_na_for_dtype(value, self.left.dtype): 

1118 # GH#18295 

1119 left = right = self.left._na_value 

1120 else: 

1121 raise TypeError( 

1122 "can only insert Interval objects and NA into an IntervalArray" 

1123 ) 

1124 return left, right 

1125 

1126 def _validate_setitem_value(self, value): 

1127 

1128 if is_valid_na_for_dtype(value, self.left.dtype): 

1129 # na value: need special casing to set directly on numpy arrays 

1130 value = self.left._na_value 

1131 if is_integer_dtype(self.dtype.subtype): 

1132 # can't set NaN on a numpy integer array 

1133 # GH#45484 TypeError, not ValueError, matches what we get with 

1134 # non-NA un-holdable value. 

1135 raise TypeError("Cannot set float NaN to integer-backed IntervalArray") 

1136 value_left, value_right = value, value 

1137 

1138 elif isinstance(value, Interval): 

1139 # scalar interval 

1140 self._check_closed_matches(value, name="value") 

1141 value_left, value_right = value.left, value.right 

1142 self.left._validate_fill_value(value_left) 

1143 self.left._validate_fill_value(value_right) 

1144 

1145 else: 

1146 return self._validate_listlike(value) 

1147 

1148 return value_left, value_right 

1149 

1150 def value_counts(self, dropna: bool = True) -> Series: 

1151 """ 

1152 Returns a Series containing counts of each interval. 

1153 

1154 Parameters 

1155 ---------- 

1156 dropna : bool, default True 

1157 Don't include counts of NaN. 

1158 

1159 Returns 

1160 ------- 

1161 counts : Series 

1162 

1163 See Also 

1164 -------- 

1165 Series.value_counts 

1166 """ 

1167 # TODO: implement this is a non-naive way! 

1168 return value_counts(np.asarray(self), dropna=dropna) 

1169 

1170 # --------------------------------------------------------------------- 

1171 # Rendering Methods 

1172 

1173 def _format_data(self) -> str: 

1174 

1175 # TODO: integrate with categorical and make generic 

1176 # name argument is unused here; just for compat with base / categorical 

1177 n = len(self) 

1178 max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10) 

1179 

1180 formatter = str 

1181 

1182 if n == 0: 

1183 summary = "[]" 

1184 elif n == 1: 

1185 first = formatter(self[0]) 

1186 summary = f"[{first}]" 

1187 elif n == 2: 

1188 first = formatter(self[0]) 

1189 last = formatter(self[-1]) 

1190 summary = f"[{first}, {last}]" 

1191 else: 

1192 

1193 if n > max_seq_items: 

1194 n = min(max_seq_items // 2, 10) 

1195 head = [formatter(x) for x in self[:n]] 

1196 tail = [formatter(x) for x in self[-n:]] 

1197 head_str = ", ".join(head) 

1198 tail_str = ", ".join(tail) 

1199 summary = f"[{head_str} ... {tail_str}]" 

1200 else: 

1201 tail = [formatter(x) for x in self] 

1202 tail_str = ", ".join(tail) 

1203 summary = f"[{tail_str}]" 

1204 

1205 return summary 

1206 

1207 def __repr__(self) -> str: 

1208 # the short repr has no trailing newline, while the truncated 

1209 # repr does. So we include a newline in our template, and strip 

1210 # any trailing newlines from format_object_summary 

1211 data = self._format_data() 

1212 class_name = f"<{type(self).__name__}>\n" 

1213 

1214 template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" 

1215 return template 

1216 

1217 def _format_space(self) -> str: 

1218 space = " " * (len(type(self).__name__) + 1) 

1219 return f"\n{space}" 

1220 

1221 # --------------------------------------------------------------------- 

1222 # Vectorized Interval Properties/Attributes 

1223 

1224 @property 

1225 def left(self): 

1226 """ 

1227 Return the left endpoints of each Interval in the IntervalArray as an Index. 

1228 """ 

1229 from pandas import Index 

1230 

1231 return Index(self._left, copy=False) 

1232 

1233 @property 

1234 def right(self): 

1235 """ 

1236 Return the right endpoints of each Interval in the IntervalArray as an Index. 

1237 """ 

1238 from pandas import Index 

1239 

1240 return Index(self._right, copy=False) 

1241 

1242 @property 

1243 def length(self) -> Index: 

1244 """ 

1245 Return an Index with entries denoting the length of each Interval. 

1246 """ 

1247 return self.right - self.left 

1248 

1249 @property 

1250 def mid(self) -> Index: 

1251 """ 

1252 Return the midpoint of each Interval in the IntervalArray as an Index. 

1253 """ 

1254 try: 

1255 return 0.5 * (self.left + self.right) 

1256 except TypeError: 

1257 # datetime safe version 

1258 return self.left + 0.5 * self.length 

1259 

1260 _interval_shared_docs["overlaps"] = textwrap.dedent( 

1261 """ 

1262 Check elementwise if an Interval overlaps the values in the %(klass)s. 

1263 

1264 Two intervals overlap if they share a common point, including closed 

1265 endpoints. Intervals that only have an open endpoint in common do not 

1266 overlap. 

1267 

1268 Parameters 

1269 ---------- 

1270 other : %(klass)s 

1271 Interval to check against for an overlap. 

1272 

1273 Returns 

1274 ------- 

1275 ndarray 

1276 Boolean array positionally indicating where an overlap occurs. 

1277 

1278 See Also 

1279 -------- 

1280 Interval.overlaps : Check whether two Interval objects overlap. 

1281 

1282 Examples 

1283 -------- 

1284 %(examples)s 

1285 >>> intervals.overlaps(pd.Interval(0.5, 1.5)) 

1286 array([ True, True, False]) 

1287 

1288 Intervals that share closed endpoints overlap: 

1289 

1290 >>> intervals.overlaps(pd.Interval(1, 3, closed='left')) 

1291 array([ True, True, True]) 

1292 

1293 Intervals that only have an open endpoint in common do not overlap: 

1294 

1295 >>> intervals.overlaps(pd.Interval(1, 2, closed='right')) 

1296 array([False, True, False]) 

1297 """ 

1298 ) 

1299 

1300 @Appender( 

1301 _interval_shared_docs["overlaps"] 

1302 % { 

1303 "klass": "IntervalArray", 

1304 "examples": textwrap.dedent( 

1305 """\ 

1306 >>> data = [(0, 1), (1, 3), (2, 4)] 

1307 >>> intervals = pd.arrays.IntervalArray.from_tuples(data) 

1308 >>> intervals 

1309 <IntervalArray> 

1310 [(0, 1], (1, 3], (2, 4]] 

1311 Length: 3, dtype: interval[int64, right] 

1312 """ 

1313 ), 

1314 } 

1315 ) 

1316 def overlaps(self, other): 

1317 if isinstance(other, (IntervalArray, ABCIntervalIndex)): 

1318 raise NotImplementedError 

1319 elif not isinstance(other, Interval): 

1320 msg = f"`other` must be Interval-like, got {type(other).__name__}" 

1321 raise TypeError(msg) 

1322 

1323 # equality is okay if both endpoints are closed (overlap at a point) 

1324 op1 = le if (self.closed_left and other.closed_right) else lt 

1325 op2 = le if (other.closed_left and self.closed_right) else lt 

1326 

1327 # overlaps is equivalent negation of two interval being disjoint: 

1328 # disjoint = (A.left > B.right) or (B.left > A.right) 

1329 # (simplifying the negation allows this to be done in less operations) 

1330 return op1(self.left, other.right) & op2(other.left, self.right) 

1331 

1332 # --------------------------------------------------------------------- 

1333 

1334 @property 

1335 def closed(self) -> IntervalClosedType: 

1336 """ 

1337 String describing the inclusive side the intervals. 

1338 

1339 Either ``left``, ``right``, ``both`` or ``neither``. 

1340 """ 

1341 return self.dtype.closed 

1342 

1343 _interval_shared_docs["set_closed"] = textwrap.dedent( 

1344 """ 

1345 Return an identical %(klass)s closed on the specified side. 

1346 

1347 Parameters 

1348 ---------- 

1349 closed : {'left', 'right', 'both', 'neither'} 

1350 Whether the intervals are closed on the left-side, right-side, both 

1351 or neither. 

1352 

1353 Returns 

1354 ------- 

1355 new_index : %(klass)s 

1356 

1357 %(examples)s\ 

1358 """ 

1359 ) 

1360 

1361 @Appender( 

1362 _interval_shared_docs["set_closed"] 

1363 % { 

1364 "klass": "IntervalArray", 

1365 "examples": textwrap.dedent( 

1366 """\ 

1367 Examples 

1368 -------- 

1369 >>> index = pd.arrays.IntervalArray.from_breaks(range(4)) 

1370 >>> index 

1371 <IntervalArray> 

1372 [(0, 1], (1, 2], (2, 3]] 

1373 Length: 3, dtype: interval[int64, right] 

1374 >>> index.set_closed('both') 

1375 <IntervalArray> 

1376 [[0, 1], [1, 2], [2, 3]] 

1377 Length: 3, dtype: interval[int64, both] 

1378 """ 

1379 ), 

1380 } 

1381 ) 

1382 def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT: 

1383 if closed not in VALID_CLOSED: 

1384 msg = f"invalid option for 'closed': {closed}" 

1385 raise ValueError(msg) 

1386 

1387 return type(self)._simple_new( 

1388 left=self._left, right=self._right, closed=closed, verify_integrity=False 

1389 ) 

1390 

1391 _interval_shared_docs[ 

1392 "is_non_overlapping_monotonic" 

1393 ] = """ 

1394 Return a boolean whether the %(klass)s is non-overlapping and monotonic. 

1395 

1396 Non-overlapping means (no Intervals share points), and monotonic means 

1397 either monotonic increasing or monotonic decreasing. 

1398 """ 

1399 

1400 # https://github.com/python/mypy/issues/1362 

1401 # Mypy does not support decorated properties 

1402 @property # type: ignore[misc] 

1403 @Appender( 

1404 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs 

1405 ) 

1406 def is_non_overlapping_monotonic(self) -> bool: 

1407 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) 

1408 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) 

1409 # we already require left <= right 

1410 

1411 # strict inequality for closed == 'both'; equality implies overlapping 

1412 # at a point when both sides of intervals are included 

1413 if self.closed == "both": 

1414 return bool( 

1415 (self._right[:-1] < self._left[1:]).all() 

1416 or (self._left[:-1] > self._right[1:]).all() 

1417 ) 

1418 

1419 # non-strict inequality when closed != 'both'; at least one side is 

1420 # not included in the intervals, so equality does not imply overlapping 

1421 return bool( 

1422 (self._right[:-1] <= self._left[1:]).all() 

1423 or (self._left[:-1] >= self._right[1:]).all() 

1424 ) 

1425 

1426 # --------------------------------------------------------------------- 

1427 # Conversion 

1428 

1429 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: 

1430 """ 

1431 Return the IntervalArray's data as a numpy array of Interval 

1432 objects (with dtype='object') 

1433 """ 

1434 left = self._left 

1435 right = self._right 

1436 mask = self.isna() 

1437 closed = self.closed 

1438 

1439 result = np.empty(len(left), dtype=object) 

1440 for i in range(len(left)): 

1441 if mask[i]: 

1442 result[i] = np.nan 

1443 else: 

1444 result[i] = Interval(left[i], right[i], closed) 

1445 return result 

1446 

1447 def __arrow_array__(self, type=None): 

1448 """ 

1449 Convert myself into a pyarrow Array. 

1450 """ 

1451 import pyarrow 

1452 

1453 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType 

1454 

1455 try: 

1456 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype) 

1457 except TypeError as err: 

1458 raise TypeError( 

1459 f"Conversion to arrow with subtype '{self.dtype.subtype}' " 

1460 "is not supported" 

1461 ) from err 

1462 interval_type = ArrowIntervalType(subtype, self.closed) 

1463 storage_array = pyarrow.StructArray.from_arrays( 

1464 [ 

1465 pyarrow.array(self._left, type=subtype, from_pandas=True), 

1466 pyarrow.array(self._right, type=subtype, from_pandas=True), 

1467 ], 

1468 names=["left", "right"], 

1469 ) 

1470 mask = self.isna() 

1471 if mask.any(): 

1472 # if there are missing values, set validity bitmap also on the array level 

1473 null_bitmap = pyarrow.array(~mask).buffers()[1] 

1474 storage_array = pyarrow.StructArray.from_buffers( 

1475 storage_array.type, 

1476 len(storage_array), 

1477 [null_bitmap], 

1478 children=[storage_array.field(0), storage_array.field(1)], 

1479 ) 

1480 

1481 if type is not None: 

1482 if type.equals(interval_type.storage_type): 

1483 return storage_array 

1484 elif isinstance(type, ArrowIntervalType): 

1485 # ensure we have the same subtype and closed attributes 

1486 if not type.equals(interval_type): 

1487 raise TypeError( 

1488 "Not supported to convert IntervalArray to type with " 

1489 f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) " 

1490 f"and 'closed' ({self.closed} vs {type.closed}) attributes" 

1491 ) 

1492 else: 

1493 raise TypeError( 

1494 f"Not supported to convert IntervalArray to '{type}' type" 

1495 ) 

1496 

1497 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array) 

1498 

1499 _interval_shared_docs[ 

1500 "to_tuples" 

1501 ] = """ 

1502 Return an %(return_type)s of tuples of the form (left, right). 

1503 

1504 Parameters 

1505 ---------- 

1506 na_tuple : bool, default True 

1507 Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA 

1508 value itself if False, ``nan``. 

1509 

1510 Returns 

1511 ------- 

1512 tuples: %(return_type)s 

1513 %(examples)s\ 

1514 """ 

1515 

1516 @Appender( 

1517 _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""} 

1518 ) 

1519 def to_tuples(self, na_tuple=True) -> np.ndarray: 

1520 tuples = com.asarray_tuplesafe(zip(self._left, self._right)) 

1521 if not na_tuple: 

1522 # GH 18756 

1523 tuples = np.where(~self.isna(), tuples, np.nan) 

1524 return tuples 

1525 

1526 # --------------------------------------------------------------------- 

1527 

1528 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: 

1529 value_left, value_right = self._validate_setitem_value(value) 

1530 

1531 if isinstance(self._left, np.ndarray): 

1532 np.putmask(self._left, mask, value_left) 

1533 np.putmask(self._right, mask, value_right) 

1534 else: 

1535 self._left._putmask(mask, value_left) 

1536 self._right._putmask(mask, value_right) 

1537 

1538 def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT: 

1539 """ 

1540 Return a new IntervalArray inserting new item at location. Follows 

1541 Python numpy.insert semantics for negative values. Only Interval 

1542 objects and NA can be inserted into an IntervalIndex 

1543 

1544 Parameters 

1545 ---------- 

1546 loc : int 

1547 item : Interval 

1548 

1549 Returns 

1550 ------- 

1551 IntervalArray 

1552 """ 

1553 left_insert, right_insert = self._validate_scalar(item) 

1554 

1555 new_left = self.left.insert(loc, left_insert) 

1556 new_right = self.right.insert(loc, right_insert) 

1557 

1558 return self._shallow_copy(new_left, new_right) 

1559 

1560 def delete(self: IntervalArrayT, loc) -> IntervalArrayT: 

1561 if isinstance(self._left, np.ndarray): 

1562 new_left = np.delete(self._left, loc) 

1563 new_right = np.delete(self._right, loc) 

1564 else: 

1565 new_left = self._left.delete(loc) 

1566 new_right = self._right.delete(loc) 

1567 return self._shallow_copy(left=new_left, right=new_right) 

1568 

1569 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs) 

1570 def repeat( 

1571 self: IntervalArrayT, 

1572 repeats: int | Sequence[int], 

1573 axis: int | None = None, 

1574 ) -> IntervalArrayT: 

1575 nv.validate_repeat((), {"axis": axis}) 

1576 left_repeat = self.left.repeat(repeats) 

1577 right_repeat = self.right.repeat(repeats) 

1578 return self._shallow_copy(left=left_repeat, right=right_repeat) 

1579 

1580 _interval_shared_docs["contains"] = textwrap.dedent( 

1581 """ 

1582 Check elementwise if the Intervals contain the value. 

1583 

1584 Return a boolean mask whether the value is contained in the Intervals 

1585 of the %(klass)s. 

1586 

1587 .. versionadded:: 0.25.0 

1588 

1589 Parameters 

1590 ---------- 

1591 other : scalar 

1592 The value to check whether it is contained in the Intervals. 

1593 

1594 Returns 

1595 ------- 

1596 boolean array 

1597 

1598 See Also 

1599 -------- 

1600 Interval.contains : Check whether Interval object contains value. 

1601 %(klass)s.overlaps : Check if an Interval overlaps the values in the 

1602 %(klass)s. 

1603 

1604 Examples 

1605 -------- 

1606 %(examples)s 

1607 >>> intervals.contains(0.5) 

1608 array([ True, False, False]) 

1609 """ 

1610 ) 

1611 

1612 @Appender( 

1613 _interval_shared_docs["contains"] 

1614 % { 

1615 "klass": "IntervalArray", 

1616 "examples": textwrap.dedent( 

1617 """\ 

1618 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)]) 

1619 >>> intervals 

1620 <IntervalArray> 

1621 [(0, 1], (1, 3], (2, 4]] 

1622 Length: 3, dtype: interval[int64, right] 

1623 """ 

1624 ), 

1625 } 

1626 ) 

1627 def contains(self, other): 

1628 if isinstance(other, Interval): 

1629 raise NotImplementedError("contains not implemented for two intervals") 

1630 

1631 return (self._left < other if self.open_left else self._left <= other) & ( 

1632 other < self._right if self.open_right else other <= self._right 

1633 ) 

1634 

1635 def isin(self, values) -> npt.NDArray[np.bool_]: 

1636 if not hasattr(values, "dtype"): 

1637 values = np.array(values) 

1638 values = extract_array(values, extract_numpy=True) 

1639 

1640 if is_interval_dtype(values.dtype): 

1641 if self.closed != values.closed: 

1642 # not comparable -> no overlap 

1643 return np.zeros(self.shape, dtype=bool) 

1644 

1645 if is_dtype_equal(self.dtype, values.dtype): 

1646 # GH#38353 instead of casting to object, operating on a 

1647 # complex128 ndarray is much more performant. 

1648 left = self._combined.view("complex128") 

1649 right = values._combined.view("complex128") 

1650 # error: Argument 1 to "in1d" has incompatible type 

1651 # "Union[ExtensionArray, ndarray[Any, Any], 

1652 # ndarray[Any, dtype[Any]]]"; expected 

1653 # "Union[_SupportsArray[dtype[Any]], 

1654 # _NestedSequence[_SupportsArray[dtype[Any]]], bool, 

1655 # int, float, complex, str, bytes, _NestedSequence[ 

1656 # Union[bool, int, float, complex, str, bytes]]]" 

1657 return np.in1d(left, right) # type: ignore[arg-type] 

1658 

1659 elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion( 

1660 values.left.dtype 

1661 ): 

1662 # not comparable -> no overlap 

1663 return np.zeros(self.shape, dtype=bool) 

1664 

1665 return isin(self.astype(object), values.astype(object)) 

1666 

1667 @property 

1668 def _combined(self) -> ArrayLike: 

1669 left = self.left._values.reshape(-1, 1) 

1670 right = self.right._values.reshape(-1, 1) 

1671 if needs_i8_conversion(left.dtype): 

1672 comb = left._concat_same_type([left, right], axis=1) 

1673 else: 

1674 comb = np.concatenate([left, right], axis=1) 

1675 return comb 

1676 

1677 def _from_combined(self, combined: np.ndarray) -> IntervalArray: 

1678 """ 

1679 Create a new IntervalArray with our dtype from a 1D complex128 ndarray. 

1680 """ 

1681 nc = combined.view("i8").reshape(-1, 2) 

1682 

1683 dtype = self._left.dtype 

1684 if needs_i8_conversion(dtype): 

1685 # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence" 

1686 new_left = type(self._left)._from_sequence( # type: ignore[attr-defined] 

1687 nc[:, 0], dtype=dtype 

1688 ) 

1689 # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence" 

1690 new_right = type(self._right)._from_sequence( # type: ignore[attr-defined] 

1691 nc[:, 1], dtype=dtype 

1692 ) 

1693 else: 

1694 new_left = nc[:, 0].view(dtype) 

1695 new_right = nc[:, 1].view(dtype) 

1696 return self._shallow_copy(left=new_left, right=new_right) 

1697 

1698 def unique(self) -> IntervalArray: 

1699 # No overload variant of "__getitem__" of "ExtensionArray" matches argument 

1700 # type "Tuple[slice, int]" 

1701 nc = unique( 

1702 self._combined.view("complex128")[:, 0] # type: ignore[call-overload] 

1703 ) 

1704 nc = nc[:, None] 

1705 return self._from_combined(nc) 

1706 

1707 

1708def _maybe_convert_platform_interval(values) -> ArrayLike: 

1709 """ 

1710 Try to do platform conversion, with special casing for IntervalArray. 

1711 Wrapper around maybe_convert_platform that alters the default return 

1712 dtype in certain cases to be compatible with IntervalArray. For example, 

1713 empty lists return with integer dtype instead of object dtype, which is 

1714 prohibited for IntervalArray. 

1715 

1716 Parameters 

1717 ---------- 

1718 values : array-like 

1719 

1720 Returns 

1721 ------- 

1722 array 

1723 """ 

1724 if isinstance(values, (list, tuple)) and len(values) == 0: 

1725 # GH 19016 

1726 # empty lists/tuples get object dtype by default, but this is 

1727 # prohibited for IntervalArray, so coerce to integer instead 

1728 return np.array([], dtype=np.int64) 

1729 elif not is_list_like(values) or isinstance(values, ABCDataFrame): 

1730 # This will raise later, but we avoid passing to maybe_convert_platform 

1731 return values 

1732 elif is_categorical_dtype(values): 

1733 values = np.asarray(values) 

1734 elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)): 

1735 # TODO: should we just cast these to list? 

1736 return values 

1737 else: 

1738 values = extract_array(values, extract_numpy=True) 

1739 

1740 if not hasattr(values, "dtype"): 

1741 return np.asarray(values) 

1742 return values