Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/interval.py: 21%

376 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" define the IntervalIndex """ 

2from __future__ import annotations 

3 

4from operator import ( 

5 le, 

6 lt, 

7) 

8import textwrap 

9from typing import ( 

10 Any, 

11 Hashable, 

12 Literal, 

13) 

14 

15import numpy as np 

16 

17from pandas._libs import lib 

18from pandas._libs.interval import ( 

19 Interval, 

20 IntervalMixin, 

21 IntervalTree, 

22) 

23from pandas._libs.tslibs import ( 

24 BaseOffset, 

25 Timedelta, 

26 Timestamp, 

27 to_offset, 

28) 

29from pandas._typing import ( 

30 Dtype, 

31 DtypeObj, 

32 IntervalClosedType, 

33 npt, 

34) 

35from pandas.errors import InvalidIndexError 

36from pandas.util._decorators import ( 

37 Appender, 

38 cache_readonly, 

39) 

40from pandas.util._exceptions import rewrite_exception 

41 

42from pandas.core.dtypes.cast import ( 

43 find_common_type, 

44 infer_dtype_from_scalar, 

45 maybe_box_datetimelike, 

46 maybe_downcast_numeric, 

47) 

48from pandas.core.dtypes.common import ( 

49 ensure_platform_int, 

50 is_datetime64tz_dtype, 

51 is_datetime_or_timedelta_dtype, 

52 is_dtype_equal, 

53 is_float, 

54 is_float_dtype, 

55 is_integer, 

56 is_integer_dtype, 

57 is_interval_dtype, 

58 is_list_like, 

59 is_number, 

60 is_object_dtype, 

61 is_scalar, 

62) 

63from pandas.core.dtypes.dtypes import IntervalDtype 

64from pandas.core.dtypes.missing import is_valid_na_for_dtype 

65 

66from pandas.core.algorithms import unique 

67from pandas.core.arrays.interval import ( 

68 IntervalArray, 

69 _interval_shared_docs, 

70) 

71import pandas.core.common as com 

72from pandas.core.indexers import is_valid_positional_slice 

73import pandas.core.indexes.base as ibase 

74from pandas.core.indexes.base import ( 

75 Index, 

76 _index_shared_docs, 

77 ensure_index, 

78 maybe_extract_name, 

79) 

80from pandas.core.indexes.datetimes import ( 

81 DatetimeIndex, 

82 date_range, 

83) 

84from pandas.core.indexes.extension import ( 

85 ExtensionIndex, 

86 inherit_names, 

87) 

88from pandas.core.indexes.multi import MultiIndex 

89from pandas.core.indexes.timedeltas import ( 

90 TimedeltaIndex, 

91 timedelta_range, 

92) 

93 

94_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

95 

96_index_doc_kwargs.update( 

97 { 

98 "klass": "IntervalIndex", 

99 "qualname": "IntervalIndex", 

100 "target_klass": "IntervalIndex or list of Intervals", 

101 "name": textwrap.dedent( 

102 """\ 

103 name : object, optional 

104 Name to be stored in the index. 

105 """ 

106 ), 

107 } 

108) 

109 

110 

111def _get_next_label(label): 

112 dtype = getattr(label, "dtype", type(label)) 

113 if isinstance(label, (Timestamp, Timedelta)): 

114 dtype = "datetime64" 

115 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): 

116 return label + np.timedelta64(1, "ns") 

117 elif is_integer_dtype(dtype): 

118 return label + 1 

119 elif is_float_dtype(dtype): 

120 return np.nextafter(label, np.infty) 

121 else: 

122 raise TypeError(f"cannot determine next label for type {repr(type(label))}") 

123 

124 

125def _get_prev_label(label): 

126 dtype = getattr(label, "dtype", type(label)) 

127 if isinstance(label, (Timestamp, Timedelta)): 

128 dtype = "datetime64" 

129 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): 

130 return label - np.timedelta64(1, "ns") 

131 elif is_integer_dtype(dtype): 

132 return label - 1 

133 elif is_float_dtype(dtype): 

134 return np.nextafter(label, -np.infty) 

135 else: 

136 raise TypeError(f"cannot determine next label for type {repr(type(label))}") 

137 

138 

139def _new_IntervalIndex(cls, d): 

140 """ 

141 This is called upon unpickling, rather than the default which doesn't have 

142 arguments and breaks __new__. 

143 """ 

144 return cls.from_arrays(**d) 

145 

146 

147@Appender( 

148 _interval_shared_docs["class"] 

149 % { 

150 "klass": "IntervalIndex", 

151 "summary": "Immutable index of intervals that are closed on the same side.", 

152 "name": _index_doc_kwargs["name"], 

153 "versionadded": "0.20.0", 

154 "extra_attributes": "is_overlapping\nvalues\n", 

155 "extra_methods": "", 

156 "examples": textwrap.dedent( 

157 """\ 

158 Examples 

159 -------- 

160 A new ``IntervalIndex`` is typically constructed using 

161 :func:`interval_range`: 

162 

163 >>> pd.interval_range(start=0, end=5) 

164 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], 

165 dtype='interval[int64, right]') 

166 

167 It may also be constructed using one of the constructor 

168 methods: :meth:`IntervalIndex.from_arrays`, 

169 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. 

170 

171 See further examples in the doc strings of ``interval_range`` and the 

172 mentioned constructor methods. 

173 """ 

174 ), 

175 } 

176) 

177@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True) 

178@inherit_names( 

179 [ 

180 "__array__", 

181 "overlaps", 

182 "contains", 

183 "closed_left", 

184 "closed_right", 

185 "open_left", 

186 "open_right", 

187 "is_empty", 

188 ], 

189 IntervalArray, 

190) 

191@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True) 

192class IntervalIndex(ExtensionIndex): 

193 _typ = "intervalindex" 

194 

195 # annotate properties pinned via inherit_names 

196 closed: IntervalClosedType 

197 is_non_overlapping_monotonic: bool 

198 closed_left: bool 

199 closed_right: bool 

200 open_left: bool 

201 open_right: bool 

202 

203 _data: IntervalArray 

204 _values: IntervalArray 

205 _can_hold_strings = False 

206 _data_cls = IntervalArray 

207 

208 # -------------------------------------------------------------------- 

209 # Constructors 

210 

211 def __new__( 

212 cls, 

213 data, 

214 closed=None, 

215 dtype: Dtype | None = None, 

216 copy: bool = False, 

217 name: Hashable = None, 

218 verify_integrity: bool = True, 

219 ) -> IntervalIndex: 

220 

221 name = maybe_extract_name(name, data, cls) 

222 

223 with rewrite_exception("IntervalArray", cls.__name__): 

224 array = IntervalArray( 

225 data, 

226 closed=closed, 

227 copy=copy, 

228 dtype=dtype, 

229 verify_integrity=verify_integrity, 

230 ) 

231 

232 return cls._simple_new(array, name) 

233 

234 @classmethod 

235 @Appender( 

236 _interval_shared_docs["from_breaks"] 

237 % { 

238 "klass": "IntervalIndex", 

239 "examples": textwrap.dedent( 

240 """\ 

241 Examples 

242 -------- 

243 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) 

244 IntervalIndex([(0, 1], (1, 2], (2, 3]], 

245 dtype='interval[int64, right]') 

246 """ 

247 ), 

248 } 

249 ) 

250 def from_breaks( 

251 cls, 

252 breaks, 

253 closed: IntervalClosedType | None = "right", 

254 name: Hashable = None, 

255 copy: bool = False, 

256 dtype: Dtype | None = None, 

257 ) -> IntervalIndex: 

258 with rewrite_exception("IntervalArray", cls.__name__): 

259 array = IntervalArray.from_breaks( 

260 breaks, closed=closed, copy=copy, dtype=dtype 

261 ) 

262 return cls._simple_new(array, name=name) 

263 

264 @classmethod 

265 @Appender( 

266 _interval_shared_docs["from_arrays"] 

267 % { 

268 "klass": "IntervalIndex", 

269 "examples": textwrap.dedent( 

270 """\ 

271 Examples 

272 -------- 

273 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) 

274 IntervalIndex([(0, 1], (1, 2], (2, 3]], 

275 dtype='interval[int64, right]') 

276 """ 

277 ), 

278 } 

279 ) 

280 def from_arrays( 

281 cls, 

282 left, 

283 right, 

284 closed: IntervalClosedType = "right", 

285 name: Hashable = None, 

286 copy: bool = False, 

287 dtype: Dtype | None = None, 

288 ) -> IntervalIndex: 

289 with rewrite_exception("IntervalArray", cls.__name__): 

290 array = IntervalArray.from_arrays( 

291 left, right, closed, copy=copy, dtype=dtype 

292 ) 

293 return cls._simple_new(array, name=name) 

294 

295 @classmethod 

296 @Appender( 

297 _interval_shared_docs["from_tuples"] 

298 % { 

299 "klass": "IntervalIndex", 

300 "examples": textwrap.dedent( 

301 """\ 

302 Examples 

303 -------- 

304 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) 

305 IntervalIndex([(0, 1], (1, 2]], 

306 dtype='interval[int64, right]') 

307 """ 

308 ), 

309 } 

310 ) 

311 def from_tuples( 

312 cls, 

313 data, 

314 closed: str = "right", 

315 name: Hashable = None, 

316 copy: bool = False, 

317 dtype: Dtype | None = None, 

318 ) -> IntervalIndex: 

319 with rewrite_exception("IntervalArray", cls.__name__): 

320 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) 

321 return cls._simple_new(arr, name=name) 

322 

323 # -------------------------------------------------------------------- 

324 # error: Return type "IntervalTree" of "_engine" incompatible with return type 

325 # "Union[IndexEngine, ExtensionEngine]" in supertype "Index" 

326 @cache_readonly 

327 def _engine(self) -> IntervalTree: # type: ignore[override] 

328 left = self._maybe_convert_i8(self.left) 

329 right = self._maybe_convert_i8(self.right) 

330 return IntervalTree(left, right, closed=self.closed) 

331 

332 def __contains__(self, key: Any) -> bool: 

333 """ 

334 return a boolean if this key is IN the index 

335 We *only* accept an Interval 

336 

337 Parameters 

338 ---------- 

339 key : Interval 

340 

341 Returns 

342 ------- 

343 bool 

344 """ 

345 hash(key) 

346 if not isinstance(key, Interval): 

347 if is_valid_na_for_dtype(key, self.dtype): 

348 return self.hasnans 

349 return False 

350 

351 try: 

352 self.get_loc(key) 

353 return True 

354 except KeyError: 

355 return False 

356 

357 @cache_readonly 

358 def _multiindex(self) -> MultiIndex: 

359 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"]) 

360 

361 def __reduce__(self): 

362 d = { 

363 "left": self.left, 

364 "right": self.right, 

365 "closed": self.closed, 

366 "name": self.name, 

367 } 

368 return _new_IntervalIndex, (type(self), d), None 

369 

370 @property 

371 def inferred_type(self) -> str: 

372 """Return a string of the type inferred from the values""" 

373 return "interval" 

374 

375 @Appender(Index.memory_usage.__doc__) 

376 def memory_usage(self, deep: bool = False) -> int: 

377 # we don't use an explicit engine 

378 # so return the bytes here 

379 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) 

380 

381 # IntervalTree doesn't have a is_monotonic_decreasing, so have to override 

382 # the Index implementation 

383 @cache_readonly 

384 def is_monotonic_decreasing(self) -> bool: 

385 """ 

386 Return True if the IntervalIndex is monotonic decreasing (only equal or 

387 decreasing values), else False 

388 """ 

389 return self[::-1].is_monotonic_increasing 

390 

391 @cache_readonly 

392 def is_unique(self) -> bool: 

393 """ 

394 Return True if the IntervalIndex contains unique elements, else False. 

395 """ 

396 left = self.left 

397 right = self.right 

398 

399 if self.isna().sum() > 1: 

400 return False 

401 

402 if left.is_unique or right.is_unique: 

403 return True 

404 

405 seen_pairs = set() 

406 check_idx = np.where(left.duplicated(keep=False))[0] 

407 for idx in check_idx: 

408 pair = (left[idx], right[idx]) 

409 if pair in seen_pairs: 

410 return False 

411 seen_pairs.add(pair) 

412 

413 return True 

414 

415 @property 

416 def is_overlapping(self) -> bool: 

417 """ 

418 Return True if the IntervalIndex has overlapping intervals, else False. 

419 

420 Two intervals overlap if they share a common point, including closed 

421 endpoints. Intervals that only have an open endpoint in common do not 

422 overlap. 

423 

424 Returns 

425 ------- 

426 bool 

427 Boolean indicating if the IntervalIndex has overlapping intervals. 

428 

429 See Also 

430 -------- 

431 Interval.overlaps : Check whether two Interval objects overlap. 

432 IntervalIndex.overlaps : Check an IntervalIndex elementwise for 

433 overlaps. 

434 

435 Examples 

436 -------- 

437 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)]) 

438 >>> index 

439 IntervalIndex([(0, 2], (1, 3], (4, 5]], 

440 dtype='interval[int64, right]') 

441 >>> index.is_overlapping 

442 True 

443 

444 Intervals that share closed endpoints overlap: 

445 

446 >>> index = pd.interval_range(0, 3, closed='both') 

447 >>> index 

448 IntervalIndex([[0, 1], [1, 2], [2, 3]], 

449 dtype='interval[int64, both]') 

450 >>> index.is_overlapping 

451 True 

452 

453 Intervals that only have an open endpoint in common do not overlap: 

454 

455 >>> index = pd.interval_range(0, 3, closed='left') 

456 >>> index 

457 IntervalIndex([[0, 1), [1, 2), [2, 3)], 

458 dtype='interval[int64, left]') 

459 >>> index.is_overlapping 

460 False 

461 """ 

462 # GH 23309 

463 return self._engine.is_overlapping 

464 

465 def _needs_i8_conversion(self, key) -> bool: 

466 """ 

467 Check if a given key needs i8 conversion. Conversion is necessary for 

468 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An 

469 Interval-like requires conversion if its endpoints are one of the 

470 aforementioned types. 

471 

472 Assumes that any list-like data has already been cast to an Index. 

473 

474 Parameters 

475 ---------- 

476 key : scalar or Index-like 

477 The key that should be checked for i8 conversion 

478 

479 Returns 

480 ------- 

481 bool 

482 """ 

483 if is_interval_dtype(key) or isinstance(key, Interval): 

484 return self._needs_i8_conversion(key.left) 

485 

486 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex) 

487 return isinstance(key, i8_types) 

488 

489 def _maybe_convert_i8(self, key): 

490 """ 

491 Maybe convert a given key to its equivalent i8 value(s). Used as a 

492 preprocessing step prior to IntervalTree queries (self._engine), which 

493 expects numeric data. 

494 

495 Parameters 

496 ---------- 

497 key : scalar or list-like 

498 The key that should maybe be converted to i8. 

499 

500 Returns 

501 ------- 

502 scalar or list-like 

503 The original key if no conversion occurred, int if converted scalar, 

504 Int64Index if converted list-like. 

505 """ 

506 original = key 

507 if is_list_like(key): 

508 key = ensure_index(key) 

509 

510 if not self._needs_i8_conversion(key): 

511 return original 

512 

513 scalar = is_scalar(key) 

514 if is_interval_dtype(key) or isinstance(key, Interval): 

515 # convert left/right and reconstruct 

516 left = self._maybe_convert_i8(key.left) 

517 right = self._maybe_convert_i8(key.right) 

518 constructor = Interval if scalar else IntervalIndex.from_arrays 

519 # error: "object" not callable 

520 return constructor( 

521 left, right, closed=self.closed 

522 ) # type: ignore[operator] 

523 

524 if scalar: 

525 # Timestamp/Timedelta 

526 key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) 

527 if lib.is_period(key): 

528 key_i8 = key.ordinal 

529 elif isinstance(key_i8, Timestamp): 

530 key_i8 = key_i8.value 

531 elif isinstance(key_i8, (np.datetime64, np.timedelta64)): 

532 key_i8 = key_i8.view("i8") 

533 else: 

534 # DatetimeIndex/TimedeltaIndex 

535 key_dtype, key_i8 = key.dtype, Index(key.asi8) 

536 if key.hasnans: 

537 # convert NaT from its i8 value to np.nan so it's not viewed 

538 # as a valid value, maybe causing errors (e.g. is_overlapping) 

539 key_i8 = key_i8.where(~key._isnan) 

540 

541 # ensure consistency with IntervalIndex subtype 

542 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], 

543 # ExtensionDtype]" has no attribute "subtype" 

544 subtype = self.dtype.subtype # type: ignore[union-attr] 

545 

546 if not is_dtype_equal(subtype, key_dtype): 

547 raise ValueError( 

548 f"Cannot index an IntervalIndex of subtype {subtype} with " 

549 f"values of dtype {key_dtype}" 

550 ) 

551 

552 return key_i8 

553 

554 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): 

555 if not self.is_non_overlapping_monotonic: 

556 raise KeyError( 

557 "can only get slices from an IntervalIndex if bounds are " 

558 "non-overlapping and all monotonic increasing or decreasing" 

559 ) 

560 

561 if isinstance(label, (IntervalMixin, IntervalIndex)): 

562 raise NotImplementedError("Interval objects are not currently supported") 

563 

564 # GH 20921: "not is_monotonic_increasing" for the second condition 

565 # instead of "is_monotonic_decreasing" to account for single element 

566 # indexes being both increasing and decreasing 

567 if (side == "left" and self.left.is_monotonic_increasing) or ( 

568 side == "right" and not self.left.is_monotonic_increasing 

569 ): 

570 sub_idx = self.right 

571 if self.open_right: 

572 label = _get_next_label(label) 

573 else: 

574 sub_idx = self.left 

575 if self.open_left: 

576 label = _get_prev_label(label) 

577 

578 return sub_idx._searchsorted_monotonic(label, side) 

579 

580 # -------------------------------------------------------------------- 

581 # Indexing Methods 

582 

583 def get_loc( 

584 self, key, method: str | None = None, tolerance=None 

585 ) -> int | slice | np.ndarray: 

586 """ 

587 Get integer location, slice or boolean mask for requested label. 

588 

589 Parameters 

590 ---------- 

591 key : label 

592 method : {None}, optional 

593 * default: matches where the label is within an interval only. 

594 

595 .. deprecated:: 1.4 

596 

597 Returns 

598 ------- 

599 int if unique index, slice if monotonic index, else mask 

600 

601 Examples 

602 -------- 

603 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) 

604 >>> index = pd.IntervalIndex([i1, i2]) 

605 >>> index.get_loc(1) 

606 0 

607 

608 You can also supply a point inside an interval. 

609 

610 >>> index.get_loc(1.5) 

611 1 

612 

613 If a label is in several intervals, you get the locations of all the 

614 relevant intervals. 

615 

616 >>> i3 = pd.Interval(0, 2) 

617 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3]) 

618 >>> overlapping_index.get_loc(0.5) 

619 array([ True, False, True]) 

620 

621 Only exact matches will be returned if an interval is provided. 

622 

623 >>> index.get_loc(pd.Interval(0, 1)) 

624 0 

625 """ 

626 self._check_indexing_method(method) 

627 self._check_indexing_error(key) 

628 

629 if isinstance(key, Interval): 

630 if self.closed != key.closed: 

631 raise KeyError(key) 

632 mask = (self.left == key.left) & (self.right == key.right) 

633 elif is_valid_na_for_dtype(key, self.dtype): 

634 mask = self.isna() 

635 else: 

636 # assume scalar 

637 op_left = le if self.closed_left else lt 

638 op_right = le if self.closed_right else lt 

639 try: 

640 mask = op_left(self.left, key) & op_right(key, self.right) 

641 except TypeError as err: 

642 # scalar is not comparable to II subtype --> invalid label 

643 raise KeyError(key) from err 

644 

645 matches = mask.sum() 

646 if matches == 0: 

647 raise KeyError(key) 

648 elif matches == 1: 

649 return mask.argmax() 

650 

651 res = lib.maybe_booleans_to_slice(mask.view("u1")) 

652 if isinstance(res, slice) and res.stop is None: 

653 # TODO: DO this in maybe_booleans_to_slice? 

654 res = slice(res.start, len(self), res.step) 

655 return res 

656 

657 def _get_indexer( 

658 self, 

659 target: Index, 

660 method: str | None = None, 

661 limit: int | None = None, 

662 tolerance: Any | None = None, 

663 ) -> npt.NDArray[np.intp]: 

664 

665 if isinstance(target, IntervalIndex): 

666 # We only get here with not self.is_overlapping 

667 # -> at most one match per interval in target 

668 # want exact matches -> need both left/right to match, so defer to 

669 # left/right get_indexer, compare elementwise, equality -> match 

670 indexer = self._get_indexer_unique_sides(target) 

671 

672 elif not is_object_dtype(target.dtype): 

673 # homogeneous scalar index: use IntervalTree 

674 # we should always have self._should_partial_index(target) here 

675 target = self._maybe_convert_i8(target) 

676 indexer = self._engine.get_indexer(target.values) 

677 else: 

678 # heterogeneous scalar index: defer elementwise to get_loc 

679 # we should always have self._should_partial_index(target) here 

680 return self._get_indexer_pointwise(target)[0] 

681 

682 return ensure_platform_int(indexer) 

683 

684 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) 

685 def get_indexer_non_unique( 

686 self, target: Index 

687 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

688 target = ensure_index(target) 

689 

690 if not self._should_compare(target) and not self._should_partial_index(target): 

691 # e.g. IntervalIndex with different closed or incompatible subtype 

692 # -> no matches 

693 return self._get_indexer_non_comparable(target, None, unique=False) 

694 

695 elif isinstance(target, IntervalIndex): 

696 if self.left.is_unique and self.right.is_unique: 

697 # fastpath available even if we don't have self._index_as_unique 

698 indexer = self._get_indexer_unique_sides(target) 

699 missing = (indexer == -1).nonzero()[0] 

700 else: 

701 return self._get_indexer_pointwise(target) 

702 

703 elif is_object_dtype(target.dtype) or not self._should_partial_index(target): 

704 # target might contain intervals: defer elementwise to get_loc 

705 return self._get_indexer_pointwise(target) 

706 

707 else: 

708 # Note: this case behaves differently from other Index subclasses 

709 # because IntervalIndex does partial-int indexing 

710 target = self._maybe_convert_i8(target) 

711 indexer, missing = self._engine.get_indexer_non_unique(target.values) 

712 

713 return ensure_platform_int(indexer), ensure_platform_int(missing) 

714 

715 def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]: 

716 """ 

717 _get_indexer specialized to the case where both of our sides are unique. 

718 """ 

719 # Caller is responsible for checking 

720 # `self.left.is_unique and self.right.is_unique` 

721 

722 left_indexer = self.left.get_indexer(target.left) 

723 right_indexer = self.right.get_indexer(target.right) 

724 indexer = np.where(left_indexer == right_indexer, left_indexer, -1) 

725 return indexer 

726 

727 def _get_indexer_pointwise( 

728 self, target: Index 

729 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

730 """ 

731 pointwise implementation for get_indexer and get_indexer_non_unique. 

732 """ 

733 indexer, missing = [], [] 

734 for i, key in enumerate(target): 

735 try: 

736 locs = self.get_loc(key) 

737 if isinstance(locs, slice): 

738 # Only needed for get_indexer_non_unique 

739 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") 

740 elif lib.is_integer(locs): 

741 locs = np.array(locs, ndmin=1) 

742 else: 

743 # otherwise we have ndarray[bool] 

744 locs = np.where(locs)[0] 

745 except KeyError: 

746 missing.append(i) 

747 locs = np.array([-1]) 

748 except InvalidIndexError: 

749 # i.e. non-scalar key e.g. a tuple. 

750 # see test_append_different_columns_types_raises 

751 missing.append(i) 

752 locs = np.array([-1]) 

753 

754 indexer.append(locs) 

755 

756 indexer = np.concatenate(indexer) 

757 return ensure_platform_int(indexer), ensure_platform_int(missing) 

758 

759 @cache_readonly 

760 def _index_as_unique(self) -> bool: 

761 return not self.is_overlapping and self._engine._na_count < 2 

762 

763 _requires_unique_msg = ( 

764 "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique" 

765 ) 

766 

767 def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False): 

768 if not (key.step is None or key.step == 1): 

769 # GH#31658 if label-based, we require step == 1, 

770 # if positional, we disallow float start/stop 

771 msg = "label-based slicing with step!=1 is not supported for IntervalIndex" 

772 if kind == "loc": 

773 raise ValueError(msg) 

774 elif kind == "getitem": 

775 if not is_valid_positional_slice(key): 

776 # i.e. this cannot be interpreted as a positional slice 

777 raise ValueError(msg) 

778 

779 return super()._convert_slice_indexer(key, kind, is_frame=is_frame) 

780 

781 @cache_readonly 

782 def _should_fallback_to_positional(self) -> bool: 

783 # integer lookups in Series.__getitem__ are unambiguously 

784 # positional in this case 

785 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], 

786 # ExtensionDtype]" has no attribute "subtype" 

787 return self.dtype.subtype.kind in ["m", "M"] # type: ignore[union-attr] 

788 

789 def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): 

790 self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") 

791 return getattr(self, side)._maybe_cast_slice_bound(label, side) 

792 

793 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

794 if not isinstance(dtype, IntervalDtype): 

795 return False 

796 common_subtype = find_common_type([self.dtype, dtype]) 

797 return not is_object_dtype(common_subtype) 

798 

799 # -------------------------------------------------------------------- 

800 

801 @cache_readonly 

802 def left(self) -> Index: 

803 return Index(self._data.left, copy=False) 

804 

805 @cache_readonly 

806 def right(self) -> Index: 

807 return Index(self._data.right, copy=False) 

808 

809 @cache_readonly 

810 def mid(self) -> Index: 

811 return Index(self._data.mid, copy=False) 

812 

813 @property 

814 def length(self) -> Index: 

815 return Index(self._data.length, copy=False) 

816 

817 # -------------------------------------------------------------------- 

818 # Rendering Methods 

819 # __repr__ associated methods are based on MultiIndex 

820 

821 def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: 

822 # matches base class except for whitespace padding 

823 return header + list(self._format_native_types(na_rep=na_rep)) 

824 

825 def _format_native_types( 

826 self, *, na_rep="NaN", quoting=None, **kwargs 

827 ) -> npt.NDArray[np.object_]: 

828 # GH 28210: use base method but with different default na_rep 

829 return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs) 

830 

831 def _format_data(self, name=None) -> str: 

832 # TODO: integrate with categorical and make generic 

833 # name argument is unused here; just for compat with base / categorical 

834 return self._data._format_data() + "," + self._format_space() 

835 

836 # -------------------------------------------------------------------- 

837 # Set Operations 

838 

839 def _intersection(self, other, sort): 

840 """ 

841 intersection specialized to the case with matching dtypes. 

842 """ 

843 # For IntervalIndex we also know other.closed == self.closed 

844 if self.left.is_unique and self.right.is_unique: 

845 taken = self._intersection_unique(other) 

846 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1: 

847 # Swap other/self if other is unique and self does not have 

848 # multiple NaNs 

849 taken = other._intersection_unique(self) 

850 else: 

851 # duplicates 

852 taken = self._intersection_non_unique(other) 

853 

854 if sort is None: 

855 taken = taken.sort_values() 

856 

857 return taken 

858 

859 def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex: 

860 """ 

861 Used when the IntervalIndex does not have any common endpoint, 

862 no matter left or right. 

863 Return the intersection with another IntervalIndex. 

864 Parameters 

865 ---------- 

866 other : IntervalIndex 

867 Returns 

868 ------- 

869 IntervalIndex 

870 """ 

871 # Note: this is much more performant than super()._intersection(other) 

872 lindexer = self.left.get_indexer(other.left) 

873 rindexer = self.right.get_indexer(other.right) 

874 

875 match = (lindexer == rindexer) & (lindexer != -1) 

876 indexer = lindexer.take(match.nonzero()[0]) 

877 indexer = unique(indexer) 

878 

879 return self.take(indexer) 

880 

881 def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex: 

882 """ 

883 Used when the IntervalIndex does have some common endpoints, 

884 on either sides. 

885 Return the intersection with another IntervalIndex. 

886 

887 Parameters 

888 ---------- 

889 other : IntervalIndex 

890 

891 Returns 

892 ------- 

893 IntervalIndex 

894 """ 

895 # Note: this is about 3.25x faster than super()._intersection(other) 

896 # in IntervalIndexMethod.time_intersection_both_duplicate(1000) 

897 mask = np.zeros(len(self), dtype=bool) 

898 

899 if self.hasnans and other.hasnans: 

900 first_nan_loc = np.arange(len(self))[self.isna()][0] 

901 mask[first_nan_loc] = True 

902 

903 other_tups = set(zip(other.left, other.right)) 

904 for i, tup in enumerate(zip(self.left, self.right)): 

905 if tup in other_tups: 

906 mask[i] = True 

907 

908 return self[mask] 

909 

910 # -------------------------------------------------------------------- 

911 

912 def _get_engine_target(self) -> np.ndarray: 

913 # Note: we _could_ use libjoin functions by either casting to object 

914 # dtype or constructing tuples (faster than constructing Intervals) 

915 # but the libjoin fastpaths are no longer fast in these cases. 

916 raise NotImplementedError( 

917 "IntervalIndex does not use libjoin fastpaths or pass values to " 

918 "IndexEngine objects" 

919 ) 

920 

921 def _from_join_target(self, result): 

922 raise NotImplementedError("IntervalIndex does not use libjoin fastpaths") 

923 

924 # TODO: arithmetic operations 

925 

926 

927def _is_valid_endpoint(endpoint) -> bool: 

928 """ 

929 Helper for interval_range to check if start/end are valid types. 

930 """ 

931 return any( 

932 [ 

933 is_number(endpoint), 

934 isinstance(endpoint, Timestamp), 

935 isinstance(endpoint, Timedelta), 

936 endpoint is None, 

937 ] 

938 ) 

939 

940 

941def _is_type_compatible(a, b) -> bool: 

942 """ 

943 Helper for interval_range to check type compat of start/end/freq. 

944 """ 

945 is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset)) 

946 is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset)) 

947 return ( 

948 (is_number(a) and is_number(b)) 

949 or (is_ts_compat(a) and is_ts_compat(b)) 

950 or (is_td_compat(a) and is_td_compat(b)) 

951 or com.any_none(a, b) 

952 ) 

953 

954 

955def interval_range( 

956 start=None, 

957 end=None, 

958 periods=None, 

959 freq=None, 

960 name: Hashable = None, 

961 closed: IntervalClosedType = "right", 

962) -> IntervalIndex: 

963 """ 

964 Return a fixed frequency IntervalIndex. 

965 

966 Parameters 

967 ---------- 

968 start : numeric or datetime-like, default None 

969 Left bound for generating intervals. 

970 end : numeric or datetime-like, default None 

971 Right bound for generating intervals. 

972 periods : int, default None 

973 Number of periods to generate. 

974 freq : numeric, str, or DateOffset, default None 

975 The length of each interval. Must be consistent with the type of start 

976 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 

977 for numeric and 'D' for datetime-like. 

978 name : str, default None 

979 Name of the resulting IntervalIndex. 

980 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

981 Whether the intervals are closed on the left-side, right-side, both 

982 or neither. 

983 

984 Returns 

985 ------- 

986 IntervalIndex 

987 

988 See Also 

989 -------- 

990 IntervalIndex : An Index of intervals that are all closed on the same side. 

991 

992 Notes 

993 ----- 

994 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, 

995 exactly three must be specified. If ``freq`` is omitted, the resulting 

996 ``IntervalIndex`` will have ``periods`` linearly spaced elements between 

997 ``start`` and ``end``, inclusively. 

998 

999 To learn more about datetime-like frequency strings, please see `this link 

1000 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

1001 

1002 Examples 

1003 -------- 

1004 Numeric ``start`` and ``end`` is supported. 

1005 

1006 >>> pd.interval_range(start=0, end=5) 

1007 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], 

1008 dtype='interval[int64, right]') 

1009 

1010 Additionally, datetime-like input is also supported. 

1011 

1012 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), 

1013 ... end=pd.Timestamp('2017-01-04')) 

1014 IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], 

1015 (2017-01-03, 2017-01-04]], 

1016 dtype='interval[datetime64[ns], right]') 

1017 

1018 The ``freq`` parameter specifies the frequency between the left and right. 

1019 endpoints of the individual intervals within the ``IntervalIndex``. For 

1020 numeric ``start`` and ``end``, the frequency must also be numeric. 

1021 

1022 >>> pd.interval_range(start=0, periods=4, freq=1.5) 

1023 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], 

1024 dtype='interval[float64, right]') 

1025 

1026 Similarly, for datetime-like ``start`` and ``end``, the frequency must be 

1027 convertible to a DateOffset. 

1028 

1029 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), 

1030 ... periods=3, freq='MS') 

1031 IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], 

1032 (2017-03-01, 2017-04-01]], 

1033 dtype='interval[datetime64[ns], right]') 

1034 

1035 Specify ``start``, ``end``, and ``periods``; the frequency is generated 

1036 automatically (linearly spaced). 

1037 

1038 >>> pd.interval_range(start=0, end=6, periods=4) 

1039 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], 

1040 dtype='interval[float64, right]') 

1041 

1042 The ``closed`` parameter specifies which endpoints of the individual 

1043 intervals within the ``IntervalIndex`` are closed. 

1044 

1045 >>> pd.interval_range(end=5, periods=4, closed='both') 

1046 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], 

1047 dtype='interval[int64, both]') 

1048 """ 

1049 start = maybe_box_datetimelike(start) 

1050 end = maybe_box_datetimelike(end) 

1051 endpoint = start if start is not None else end 

1052 

1053 if freq is None and com.any_none(periods, start, end): 

1054 freq = 1 if is_number(endpoint) else "D" 

1055 

1056 if com.count_not_none(start, end, periods, freq) != 3: 

1057 raise ValueError( 

1058 "Of the four parameters: start, end, periods, and " 

1059 "freq, exactly three must be specified" 

1060 ) 

1061 

1062 if not _is_valid_endpoint(start): 

1063 raise ValueError(f"start must be numeric or datetime-like, got {start}") 

1064 elif not _is_valid_endpoint(end): 

1065 raise ValueError(f"end must be numeric or datetime-like, got {end}") 

1066 

1067 if is_float(periods): 

1068 periods = int(periods) 

1069 elif not is_integer(periods) and periods is not None: 

1070 raise TypeError(f"periods must be a number, got {periods}") 

1071 

1072 if freq is not None and not is_number(freq): 

1073 try: 

1074 freq = to_offset(freq) 

1075 except ValueError as err: 

1076 raise ValueError( 

1077 f"freq must be numeric or convertible to DateOffset, got {freq}" 

1078 ) from err 

1079 

1080 # verify type compatibility 

1081 if not all( 

1082 [ 

1083 _is_type_compatible(start, end), 

1084 _is_type_compatible(start, freq), 

1085 _is_type_compatible(end, freq), 

1086 ] 

1087 ): 

1088 raise TypeError("start, end, freq need to be type compatible") 

1089 

1090 # +1 to convert interval count to breaks count (n breaks = n-1 intervals) 

1091 if periods is not None: 

1092 periods += 1 

1093 

1094 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex 

1095 

1096 if is_number(endpoint): 

1097 # force consistency between start/end/freq (lower end if freq skips it) 

1098 if com.all_not_none(start, end, freq): 

1099 end -= (end - start) % freq 

1100 

1101 # compute the period/start/end if unspecified (at most one) 

1102 if periods is None: 

1103 periods = int((end - start) // freq) + 1 

1104 elif start is None: 

1105 start = end - (periods - 1) * freq 

1106 elif end is None: 

1107 end = start + (periods - 1) * freq 

1108 

1109 breaks = np.linspace(start, end, periods) 

1110 if all(is_integer(x) for x in com.not_none(start, end, freq)): 

1111 # np.linspace always produces float output 

1112 

1113 # error: Argument 1 to "maybe_downcast_numeric" has incompatible type 

1114 # "Union[ndarray[Any, Any], TimedeltaIndex, DatetimeIndex]"; 

1115 # expected "ndarray[Any, Any]" [ 

1116 breaks = maybe_downcast_numeric( 

1117 breaks, # type: ignore[arg-type] 

1118 np.dtype("int64"), 

1119 ) 

1120 else: 

1121 # delegate to the appropriate range function 

1122 if isinstance(endpoint, Timestamp): 

1123 breaks = date_range(start=start, end=end, periods=periods, freq=freq) 

1124 else: 

1125 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) 

1126 

1127 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)