Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/interval.py: 21%

1""" define the IntervalIndex """

2from __future__ import annotations

4from operator import (

5 le,

6 lt,

8import textwrap

9from typing import (

10 Any,

11 Hashable,

12 Literal,

13)

15import numpy as np

17from pandas._libs import lib

18from pandas._libs.interval import (

19 Interval,

20 IntervalMixin,

21 IntervalTree,

22)

23from pandas._libs.tslibs import (

24 BaseOffset,

25 Timedelta,

26 Timestamp,

27 to_offset,

28)

29from pandas._typing import (

30 Dtype,

31 DtypeObj,

32 IntervalClosedType,

33 npt,

34)

35from pandas.errors import InvalidIndexError

36from pandas.util._decorators import (

37 Appender,

38 cache_readonly,

39)

40from pandas.util._exceptions import rewrite_exception

42from pandas.core.dtypes.cast import (

43 find_common_type,

44 infer_dtype_from_scalar,

45 maybe_box_datetimelike,

46 maybe_downcast_numeric,

47)

48from pandas.core.dtypes.common import (

49 ensure_platform_int,

50 is_datetime64tz_dtype,

51 is_datetime_or_timedelta_dtype,

52 is_dtype_equal,

53 is_float,

54 is_float_dtype,

55 is_integer,

56 is_integer_dtype,

57 is_interval_dtype,

58 is_list_like,

59 is_number,

60 is_object_dtype,

61 is_scalar,

62)

63from pandas.core.dtypes.dtypes import IntervalDtype

64from pandas.core.dtypes.missing import is_valid_na_for_dtype

66from pandas.core.algorithms import unique

67from pandas.core.arrays.interval import (

68 IntervalArray,

69 _interval_shared_docs,

70)

71import pandas.core.common as com

72from pandas.core.indexers import is_valid_positional_slice

73import pandas.core.indexes.base as ibase

74from pandas.core.indexes.base import (

75 Index,

76 _index_shared_docs,

77 ensure_index,

78 maybe_extract_name,

79)

80from pandas.core.indexes.datetimes import (

81 DatetimeIndex,

82 date_range,

83)

84from pandas.core.indexes.extension import (

85 ExtensionIndex,

86 inherit_names,

87)

88from pandas.core.indexes.multi import MultiIndex

89from pandas.core.indexes.timedeltas import (

90 TimedeltaIndex,

91 timedelta_range,

92)

94_index_doc_kwargs = dict(ibase._index_doc_kwargs)

96_index_doc_kwargs.update(

97 {

98 "klass": "IntervalIndex",

99 "qualname": "IntervalIndex",

100 "target_klass": "IntervalIndex or list of Intervals",

101 "name": textwrap.dedent(

102 """\

103 name : object, optional

104 Name to be stored in the index.

105 """

106 ),

107 }

108)

109

110

111def _get_next_label(label):

112 dtype = getattr(label, "dtype", type(label))

113 if isinstance(label, (Timestamp, Timedelta)):

114 dtype = "datetime64"

115 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):

116 return label + np.timedelta64(1, "ns")

117 elif is_integer_dtype(dtype):

118 return label + 1

119 elif is_float_dtype(dtype):

120 return np.nextafter(label, np.infty)

121 else:

122 raise TypeError(f"cannot determine next label for type {repr(type(label))}")

123

124

125def _get_prev_label(label):

126 dtype = getattr(label, "dtype", type(label))

127 if isinstance(label, (Timestamp, Timedelta)):

128 dtype = "datetime64"

129 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):

130 return label - np.timedelta64(1, "ns")

131 elif is_integer_dtype(dtype):

132 return label - 1

133 elif is_float_dtype(dtype):

134 return np.nextafter(label, -np.infty)

135 else:

136 raise TypeError(f"cannot determine next label for type {repr(type(label))}")

137

138

139def _new_IntervalIndex(cls, d):

140 """

141 This is called upon unpickling, rather than the default which doesn't have

142 arguments and breaks __new__.

143 """

144 return cls.from_arrays(**d)

145

146

147@Appender(

148 _interval_shared_docs["class"]

149 % {

150 "klass": "IntervalIndex",

151 "summary": "Immutable index of intervals that are closed on the same side.",

152 "name": _index_doc_kwargs["name"],

153 "versionadded": "0.20.0",

154 "extra_attributes": "is_overlapping\nvalues\n",

155 "extra_methods": "",

156 "examples": textwrap.dedent(

157 """\

158 Examples

159 --------

160 A new ``IntervalIndex`` is typically constructed using

161 :func:`interval_range`:

162

163 >>> pd.interval_range(start=0, end=5)

164 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],

165 dtype='interval[int64, right]')

166

167 It may also be constructed using one of the constructor

168 methods: :meth:`IntervalIndex.from_arrays`,

169 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`.

170

171 See further examples in the doc strings of ``interval_range`` and the

172 mentioned constructor methods.

173 """

174 ),

175 }

176)

177@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True)

178@inherit_names(

179 [

180 "__array__",

181 "overlaps",

182 "contains",

183 "closed_left",

184 "closed_right",

185 "open_left",

186 "open_right",

187 "is_empty",

188 ],

189 IntervalArray,

190)

191@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True)

192class IntervalIndex(ExtensionIndex):

193 _typ = "intervalindex"

194

195 # annotate properties pinned via inherit_names

196 closed: IntervalClosedType

197 is_non_overlapping_monotonic: bool

198 closed_left: bool

199 closed_right: bool

200 open_left: bool

201 open_right: bool

202

203 _data: IntervalArray

204 _values: IntervalArray

205 _can_hold_strings = False

206 _data_cls = IntervalArray

207

208 # --------------------------------------------------------------------

209 # Constructors

210

211 def __new__(

212 cls,

213 data,

214 closed=None,

215 dtype: Dtype | None = None,

216 copy: bool = False,

217 name: Hashable = None,

218 verify_integrity: bool = True,

219 ) -> IntervalIndex:

220

221 name = maybe_extract_name(name, data, cls)

222

223 with rewrite_exception("IntervalArray", cls.__name__):

224 array = IntervalArray(

225 data,

226 closed=closed,

227 copy=copy,

228 dtype=dtype,

229 verify_integrity=verify_integrity,

230 )

231

232 return cls._simple_new(array, name)

233

234 @classmethod

235 @Appender(

236 _interval_shared_docs["from_breaks"]

237 % {

238 "klass": "IntervalIndex",

239 "examples": textwrap.dedent(

240 """\

241 Examples

242 --------

243 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])

244 IntervalIndex([(0, 1], (1, 2], (2, 3]],

245 dtype='interval[int64, right]')

246 """

247 ),

248 }

249 )

250 def from_breaks(

251 cls,

252 breaks,

253 closed: IntervalClosedType | None = "right",

254 name: Hashable = None,

255 copy: bool = False,

256 dtype: Dtype | None = None,

257 ) -> IntervalIndex:

258 with rewrite_exception("IntervalArray", cls.__name__):

259 array = IntervalArray.from_breaks(

260 breaks, closed=closed, copy=copy, dtype=dtype

261 )

262 return cls._simple_new(array, name=name)

263

264 @classmethod

265 @Appender(

266 _interval_shared_docs["from_arrays"]

267 % {

268 "klass": "IntervalIndex",

269 "examples": textwrap.dedent(

270 """\

271 Examples

272 --------

273 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])

274 IntervalIndex([(0, 1], (1, 2], (2, 3]],

275 dtype='interval[int64, right]')

276 """

277 ),

278 }

279 )

280 def from_arrays(

281 cls,

282 left,

283 right,

284 closed: IntervalClosedType = "right",

285 name: Hashable = None,

286 copy: bool = False,

287 dtype: Dtype | None = None,

288 ) -> IntervalIndex:

289 with rewrite_exception("IntervalArray", cls.__name__):

290 array = IntervalArray.from_arrays(

291 left, right, closed, copy=copy, dtype=dtype

292 )

293 return cls._simple_new(array, name=name)

294

295 @classmethod

296 @Appender(

297 _interval_shared_docs["from_tuples"]

298 % {

299 "klass": "IntervalIndex",

300 "examples": textwrap.dedent(

301 """\

302 Examples

303 --------

304 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)])

305 IntervalIndex([(0, 1], (1, 2]],

306 dtype='interval[int64, right]')

307 """

308 ),

309 }

310 )

311 def from_tuples(

312 cls,

313 data,

314 closed: str = "right",

315 name: Hashable = None,

316 copy: bool = False,

317 dtype: Dtype | None = None,

318 ) -> IntervalIndex:

319 with rewrite_exception("IntervalArray", cls.__name__):

320 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype)

321 return cls._simple_new(arr, name=name)

322

323 # --------------------------------------------------------------------

324 # error: Return type "IntervalTree" of "_engine" incompatible with return type

325 # "Union[IndexEngine, ExtensionEngine]" in supertype "Index"

326 @cache_readonly

327 def _engine(self) -> IntervalTree: # type: ignore[override]

328 left = self._maybe_convert_i8(self.left)

329 right = self._maybe_convert_i8(self.right)

330 return IntervalTree(left, right, closed=self.closed)

331

332 def __contains__(self, key: Any) -> bool:

333 """

334 return a boolean if this key is IN the index

335 We *only* accept an Interval

336

337 Parameters

338 ----------

339 key : Interval

340

341 Returns

342 -------

343 bool

344 """

345 hash(key)

346 if not isinstance(key, Interval):

347 if is_valid_na_for_dtype(key, self.dtype):

348 return self.hasnans

349 return False

350

351 try:

352 self.get_loc(key)

353 return True

354 except KeyError:

355 return False

356

357 @cache_readonly

358 def _multiindex(self) -> MultiIndex:

359 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"])

360

361 def __reduce__(self):

362 d = {

363 "left": self.left,

364 "right": self.right,

365 "closed": self.closed,

366 "name": self.name,

367 }

368 return _new_IntervalIndex, (type(self), d), None

369

370 @property

371 def inferred_type(self) -> str:

372 """Return a string of the type inferred from the values"""

373 return "interval"

374

375 @Appender(Index.memory_usage.__doc__)

376 def memory_usage(self, deep: bool = False) -> int:

377 # we don't use an explicit engine

378 # so return the bytes here

379 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep)

380

381 # IntervalTree doesn't have a is_monotonic_decreasing, so have to override

382 # the Index implementation

383 @cache_readonly

384 def is_monotonic_decreasing(self) -> bool:

385 """

386 Return True if the IntervalIndex is monotonic decreasing (only equal or

387 decreasing values), else False

388 """

389 return self[::-1].is_monotonic_increasing

390

391 @cache_readonly

392 def is_unique(self) -> bool:

393 """

394 Return True if the IntervalIndex contains unique elements, else False.

395 """

396 left = self.left

397 right = self.right

398

399 if self.isna().sum() > 1:

400 return False

401

402 if left.is_unique or right.is_unique:

403 return True

404

405 seen_pairs = set()

406 check_idx = np.where(left.duplicated(keep=False))[0]

407 for idx in check_idx:

408 pair = (left[idx], right[idx])

409 if pair in seen_pairs:

410 return False

411 seen_pairs.add(pair)

412

413 return True

414

415 @property

416 def is_overlapping(self) -> bool:

417 """

418 Return True if the IntervalIndex has overlapping intervals, else False.

419

420 Two intervals overlap if they share a common point, including closed

421 endpoints. Intervals that only have an open endpoint in common do not

422 overlap.

423

424 Returns

425 -------

426 bool

427 Boolean indicating if the IntervalIndex has overlapping intervals.

428

429 See Also

430 --------

431 Interval.overlaps : Check whether two Interval objects overlap.

432 IntervalIndex.overlaps : Check an IntervalIndex elementwise for

433 overlaps.

434

435 Examples

436 --------

437 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])

438 >>> index

439 IntervalIndex([(0, 2], (1, 3], (4, 5]],

440 dtype='interval[int64, right]')

441 >>> index.is_overlapping

442 True

443

444 Intervals that share closed endpoints overlap:

445

446 >>> index = pd.interval_range(0, 3, closed='both')

447 >>> index

448 IntervalIndex([[0, 1], [1, 2], [2, 3]],

449 dtype='interval[int64, both]')

450 >>> index.is_overlapping

451 True

452

453 Intervals that only have an open endpoint in common do not overlap:

454

455 >>> index = pd.interval_range(0, 3, closed='left')

456 >>> index

457 IntervalIndex([[0, 1), [1, 2), [2, 3)],

458 dtype='interval[int64, left]')

459 >>> index.is_overlapping

460 False

461 """

462 # GH 23309

463 return self._engine.is_overlapping

464

465 def _needs_i8_conversion(self, key) -> bool:

466 """

467 Check if a given key needs i8 conversion. Conversion is necessary for

468 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An

469 Interval-like requires conversion if its endpoints are one of the

470 aforementioned types.

471

472 Assumes that any list-like data has already been cast to an Index.

473

474 Parameters

475 ----------

476 key : scalar or Index-like

477 The key that should be checked for i8 conversion

478

479 Returns

480 -------

481 bool

482 """

483 if is_interval_dtype(key) or isinstance(key, Interval):

484 return self._needs_i8_conversion(key.left)

485

486 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex)

487 return isinstance(key, i8_types)

488

489 def _maybe_convert_i8(self, key):

490 """

491 Maybe convert a given key to its equivalent i8 value(s). Used as a

492 preprocessing step prior to IntervalTree queries (self._engine), which

493 expects numeric data.

494

495 Parameters

496 ----------

497 key : scalar or list-like

498 The key that should maybe be converted to i8.

499

500 Returns

501 -------

502 scalar or list-like

503 The original key if no conversion occurred, int if converted scalar,

504 Int64Index if converted list-like.

505 """

506 original = key

507 if is_list_like(key):

508 key = ensure_index(key)

509

510 if not self._needs_i8_conversion(key):

511 return original

512

513 scalar = is_scalar(key)

514 if is_interval_dtype(key) or isinstance(key, Interval):

515 # convert left/right and reconstruct

516 left = self._maybe_convert_i8(key.left)

517 right = self._maybe_convert_i8(key.right)

518 constructor = Interval if scalar else IntervalIndex.from_arrays

519 # error: "object" not callable

520 return constructor(

521 left, right, closed=self.closed

522 ) # type: ignore[operator]

523

524 if scalar:

525 # Timestamp/Timedelta

526 key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True)

527 if lib.is_period(key):

528 key_i8 = key.ordinal

529 elif isinstance(key_i8, Timestamp):

530 key_i8 = key_i8.value

531 elif isinstance(key_i8, (np.datetime64, np.timedelta64)):

532 key_i8 = key_i8.view("i8")

533 else:

534 # DatetimeIndex/TimedeltaIndex

535 key_dtype, key_i8 = key.dtype, Index(key.asi8)

536 if key.hasnans:

537 # convert NaT from its i8 value to np.nan so it's not viewed

538 # as a valid value, maybe causing errors (e.g. is_overlapping)

539 key_i8 = key_i8.where(~key._isnan)

540

541 # ensure consistency with IntervalIndex subtype

542 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],

543 # ExtensionDtype]" has no attribute "subtype"

544 subtype = self.dtype.subtype # type: ignore[union-attr]

545

546 if not is_dtype_equal(subtype, key_dtype):

547 raise ValueError(

548 f"Cannot index an IntervalIndex of subtype {subtype} with "

549 f"values of dtype {key_dtype}"

550 )

551

552 return key_i8

553

554 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):

555 if not self.is_non_overlapping_monotonic:

556 raise KeyError(

557 "can only get slices from an IntervalIndex if bounds are "

558 "non-overlapping and all monotonic increasing or decreasing"

559 )

560

561 if isinstance(label, (IntervalMixin, IntervalIndex)):

562 raise NotImplementedError("Interval objects are not currently supported")

563

564 # GH 20921: "not is_monotonic_increasing" for the second condition

565 # instead of "is_monotonic_decreasing" to account for single element

566 # indexes being both increasing and decreasing

567 if (side == "left" and self.left.is_monotonic_increasing) or (

568 side == "right" and not self.left.is_monotonic_increasing

569 ):

570 sub_idx = self.right

571 if self.open_right:

572 label = _get_next_label(label)

573 else:

574 sub_idx = self.left

575 if self.open_left:

576 label = _get_prev_label(label)

577

578 return sub_idx._searchsorted_monotonic(label, side)

579

580 # --------------------------------------------------------------------

581 # Indexing Methods

582

583 def get_loc(

584 self, key, method: str | None = None, tolerance=None

585 ) -> int | slice | np.ndarray:

586 """

587 Get integer location, slice or boolean mask for requested label.

588

589 Parameters

590 ----------

591 key : label

592 method : {None}, optional

593 * default: matches where the label is within an interval only.

594

595 .. deprecated:: 1.4

596

597 Returns

598 -------

599 int if unique index, slice if monotonic index, else mask

600

601 Examples

602 --------

603 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)

604 >>> index = pd.IntervalIndex([i1, i2])

605 >>> index.get_loc(1)

606 0

607

608 You can also supply a point inside an interval.

609

610 >>> index.get_loc(1.5)

611 1

612

613 If a label is in several intervals, you get the locations of all the

614 relevant intervals.

615

616 >>> i3 = pd.Interval(0, 2)

617 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3])

618 >>> overlapping_index.get_loc(0.5)

619 array([ True, False, True])

620

621 Only exact matches will be returned if an interval is provided.

622

623 >>> index.get_loc(pd.Interval(0, 1))

624 0

625 """

626 self._check_indexing_method(method)

627 self._check_indexing_error(key)

628

629 if isinstance(key, Interval):

630 if self.closed != key.closed:

631 raise KeyError(key)

632 mask = (self.left == key.left) & (self.right == key.right)

633 elif is_valid_na_for_dtype(key, self.dtype):

634 mask = self.isna()

635 else:

636 # assume scalar

637 op_left = le if self.closed_left else lt

638 op_right = le if self.closed_right else lt

639 try:

640 mask = op_left(self.left, key) & op_right(key, self.right)

641 except TypeError as err:

642 # scalar is not comparable to II subtype --> invalid label

643 raise KeyError(key) from err

644

645 matches = mask.sum()

646 if matches == 0:

647 raise KeyError(key)

648 elif matches == 1:

649 return mask.argmax()

650

651 res = lib.maybe_booleans_to_slice(mask.view("u1"))

652 if isinstance(res, slice) and res.stop is None:

653 # TODO: DO this in maybe_booleans_to_slice?

654 res = slice(res.start, len(self), res.step)

655 return res

656

657 def _get_indexer(

658 self,

659 target: Index,

660 method: str | None = None,

661 limit: int | None = None,

662 tolerance: Any | None = None,

663 ) -> npt.NDArray[np.intp]:

664

665 if isinstance(target, IntervalIndex):

666 # We only get here with not self.is_overlapping

667 # -> at most one match per interval in target

668 # want exact matches -> need both left/right to match, so defer to

669 # left/right get_indexer, compare elementwise, equality -> match

670 indexer = self._get_indexer_unique_sides(target)

671

672 elif not is_object_dtype(target.dtype):

673 # homogeneous scalar index: use IntervalTree

674 # we should always have self._should_partial_index(target) here

675 target = self._maybe_convert_i8(target)

676 indexer = self._engine.get_indexer(target.values)

677 else:

678 # heterogeneous scalar index: defer elementwise to get_loc

679 # we should always have self._should_partial_index(target) here

680 return self._get_indexer_pointwise(target)[0]

681

682 return ensure_platform_int(indexer)

683

684 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)

685 def get_indexer_non_unique(

686 self, target: Index

687 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

688 target = ensure_index(target)

689

690 if not self._should_compare(target) and not self._should_partial_index(target):

691 # e.g. IntervalIndex with different closed or incompatible subtype

692 # -> no matches

693 return self._get_indexer_non_comparable(target, None, unique=False)

694

695 elif isinstance(target, IntervalIndex):

696 if self.left.is_unique and self.right.is_unique:

697 # fastpath available even if we don't have self._index_as_unique

698 indexer = self._get_indexer_unique_sides(target)

699 missing = (indexer == -1).nonzero()[0]

700 else:

701 return self._get_indexer_pointwise(target)

702

703 elif is_object_dtype(target.dtype) or not self._should_partial_index(target):

704 # target might contain intervals: defer elementwise to get_loc

705 return self._get_indexer_pointwise(target)

706

707 else:

708 # Note: this case behaves differently from other Index subclasses

709 # because IntervalIndex does partial-int indexing

710 target = self._maybe_convert_i8(target)

711 indexer, missing = self._engine.get_indexer_non_unique(target.values)

712

713 return ensure_platform_int(indexer), ensure_platform_int(missing)

714

715 def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]:

716 """

717 _get_indexer specialized to the case where both of our sides are unique.

718 """

719 # Caller is responsible for checking

720 # `self.left.is_unique and self.right.is_unique`

721

722 left_indexer = self.left.get_indexer(target.left)

723 right_indexer = self.right.get_indexer(target.right)

724 indexer = np.where(left_indexer == right_indexer, left_indexer, -1)

725 return indexer

726

727 def _get_indexer_pointwise(

728 self, target: Index

729 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

730 """

731 pointwise implementation for get_indexer and get_indexer_non_unique.

732 """

733 indexer, missing = [], []

734 for i, key in enumerate(target):

735 try:

736 locs = self.get_loc(key)

737 if isinstance(locs, slice):

738 # Only needed for get_indexer_non_unique

739 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp")

740 elif lib.is_integer(locs):

741 locs = np.array(locs, ndmin=1)

742 else:

743 # otherwise we have ndarray[bool]

744 locs = np.where(locs)[0]

745 except KeyError:

746 missing.append(i)

747 locs = np.array([-1])

748 except InvalidIndexError:

749 # i.e. non-scalar key e.g. a tuple.

750 # see test_append_different_columns_types_raises

751 missing.append(i)

752 locs = np.array([-1])

753

754 indexer.append(locs)

755

756 indexer = np.concatenate(indexer)

757 return ensure_platform_int(indexer), ensure_platform_int(missing)

758

759 @cache_readonly

760 def _index_as_unique(self) -> bool:

761 return not self.is_overlapping and self._engine._na_count < 2

762

763 _requires_unique_msg = (

764 "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"

765 )

766

767 def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):

768 if not (key.step is None or key.step == 1):

769 # GH#31658 if label-based, we require step == 1,

770 # if positional, we disallow float start/stop

771 msg = "label-based slicing with step!=1 is not supported for IntervalIndex"

772 if kind == "loc":

773 raise ValueError(msg)

774 elif kind == "getitem":

775 if not is_valid_positional_slice(key):

776 # i.e. this cannot be interpreted as a positional slice

777 raise ValueError(msg)

778

779 return super()._convert_slice_indexer(key, kind, is_frame=is_frame)

780

781 @cache_readonly

782 def _should_fallback_to_positional(self) -> bool:

783 # integer lookups in Series.__getitem__ are unambiguously

784 # positional in this case

785 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],

786 # ExtensionDtype]" has no attribute "subtype"

787 return self.dtype.subtype.kind in ["m", "M"] # type: ignore[union-attr]

788

789 def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):

790 self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound")

791 return getattr(self, side)._maybe_cast_slice_bound(label, side)

792

793 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

794 if not isinstance(dtype, IntervalDtype):

795 return False

796 common_subtype = find_common_type([self.dtype, dtype])

797 return not is_object_dtype(common_subtype)

798

799 # --------------------------------------------------------------------

800

801 @cache_readonly

802 def left(self) -> Index:

803 return Index(self._data.left, copy=False)

804

805 @cache_readonly

806 def right(self) -> Index:

807 return Index(self._data.right, copy=False)

808

809 @cache_readonly

810 def mid(self) -> Index:

811 return Index(self._data.mid, copy=False)

812

813 @property

814 def length(self) -> Index:

815 return Index(self._data.length, copy=False)

816

817 # --------------------------------------------------------------------

818 # Rendering Methods

819 # __repr__ associated methods are based on MultiIndex

820

821 def _format_with_header(self, header: list[str], na_rep: str) -> list[str]:

822 # matches base class except for whitespace padding

823 return header + list(self._format_native_types(na_rep=na_rep))

824

825 def _format_native_types(

826 self, *, na_rep="NaN", quoting=None, **kwargs

827 ) -> npt.NDArray[np.object_]:

828 # GH 28210: use base method but with different default na_rep

829 return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs)

830

831 def _format_data(self, name=None) -> str:

832 # TODO: integrate with categorical and make generic

833 # name argument is unused here; just for compat with base / categorical

834 return self._data._format_data() + "," + self._format_space()

835

836 # --------------------------------------------------------------------

837 # Set Operations

838

839 def _intersection(self, other, sort):

840 """

841 intersection specialized to the case with matching dtypes.

842 """

843 # For IntervalIndex we also know other.closed == self.closed

844 if self.left.is_unique and self.right.is_unique:

845 taken = self._intersection_unique(other)

846 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1:

847 # Swap other/self if other is unique and self does not have

848 # multiple NaNs

849 taken = other._intersection_unique(self)

850 else:

851 # duplicates

852 taken = self._intersection_non_unique(other)

853

854 if sort is None:

855 taken = taken.sort_values()

856

857 return taken

858

859 def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex:

860 """

861 Used when the IntervalIndex does not have any common endpoint,

862 no matter left or right.

863 Return the intersection with another IntervalIndex.

864 Parameters

865 ----------

866 other : IntervalIndex

867 Returns

868 -------

869 IntervalIndex

870 """

871 # Note: this is much more performant than super()._intersection(other)

872 lindexer = self.left.get_indexer(other.left)

873 rindexer = self.right.get_indexer(other.right)

874

875 match = (lindexer == rindexer) & (lindexer != -1)

876 indexer = lindexer.take(match.nonzero()[0])

877 indexer = unique(indexer)

878

879 return self.take(indexer)

880

881 def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:

882 """

883 Used when the IntervalIndex does have some common endpoints,

884 on either sides.

885 Return the intersection with another IntervalIndex.

886

887 Parameters

888 ----------

889 other : IntervalIndex

890

891 Returns

892 -------

893 IntervalIndex

894 """

895 # Note: this is about 3.25x faster than super()._intersection(other)

896 # in IntervalIndexMethod.time_intersection_both_duplicate(1000)

897 mask = np.zeros(len(self), dtype=bool)

898

899 if self.hasnans and other.hasnans:

900 first_nan_loc = np.arange(len(self))[self.isna()][0]

901 mask[first_nan_loc] = True

902

903 other_tups = set(zip(other.left, other.right))

904 for i, tup in enumerate(zip(self.left, self.right)):

905 if tup in other_tups:

906 mask[i] = True

907

908 return self[mask]

909

910 # --------------------------------------------------------------------

911

912 def _get_engine_target(self) -> np.ndarray:

913 # Note: we _could_ use libjoin functions by either casting to object

914 # dtype or constructing tuples (faster than constructing Intervals)

915 # but the libjoin fastpaths are no longer fast in these cases.

916 raise NotImplementedError(

917 "IntervalIndex does not use libjoin fastpaths or pass values to "

918 "IndexEngine objects"

919 )

920

921 def _from_join_target(self, result):

922 raise NotImplementedError("IntervalIndex does not use libjoin fastpaths")

923

924 # TODO: arithmetic operations

925

926

927def _is_valid_endpoint(endpoint) -> bool:

928 """

929 Helper for interval_range to check if start/end are valid types.

930 """

931 return any(

932 [

933 is_number(endpoint),

934 isinstance(endpoint, Timestamp),

935 isinstance(endpoint, Timedelta),

936 endpoint is None,

937 ]

938 )

939

940

941def _is_type_compatible(a, b) -> bool:

942 """

943 Helper for interval_range to check type compat of start/end/freq.

944 """

945 is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset))

946 is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset))

947 return (

948 (is_number(a) and is_number(b))

949 or (is_ts_compat(a) and is_ts_compat(b))

950 or (is_td_compat(a) and is_td_compat(b))

951 or com.any_none(a, b)

952 )

953

954

955def interval_range(

956 start=None,

957 end=None,

958 periods=None,

959 freq=None,

960 name: Hashable = None,

961 closed: IntervalClosedType = "right",

962) -> IntervalIndex:

963 """

964 Return a fixed frequency IntervalIndex.

965

966 Parameters

967 ----------

968 start : numeric or datetime-like, default None

969 Left bound for generating intervals.

970 end : numeric or datetime-like, default None

971 Right bound for generating intervals.

972 periods : int, default None

973 Number of periods to generate.

974 freq : numeric, str, or DateOffset, default None

975 The length of each interval. Must be consistent with the type of start

976 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1

977 for numeric and 'D' for datetime-like.

978 name : str, default None

979 Name of the resulting IntervalIndex.

980 closed : {'left', 'right', 'both', 'neither'}, default 'right'

981 Whether the intervals are closed on the left-side, right-side, both

982 or neither.

983

984 Returns

985 -------

986 IntervalIndex

987

988 See Also

989 --------

990 IntervalIndex : An Index of intervals that are all closed on the same side.

991

992 Notes

993 -----

994 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,

995 exactly three must be specified. If ``freq`` is omitted, the resulting

996 ``IntervalIndex`` will have ``periods`` linearly spaced elements between

997 ``start`` and ``end``, inclusively.

998

999 To learn more about datetime-like frequency strings, please see `this link

1000 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

1001

1002 Examples

1003 --------

1004 Numeric ``start`` and ``end`` is supported.

1005

1006 >>> pd.interval_range(start=0, end=5)

1007 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],

1008 dtype='interval[int64, right]')

1009

1010 Additionally, datetime-like input is also supported.

1011

1012 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),

1013 ... end=pd.Timestamp('2017-01-04'))

1014 IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],

1015 (2017-01-03, 2017-01-04]],

1016 dtype='interval[datetime64[ns], right]')

1017

1018 The ``freq`` parameter specifies the frequency between the left and right.

1019 endpoints of the individual intervals within the ``IntervalIndex``. For

1020 numeric ``start`` and ``end``, the frequency must also be numeric.

1021

1022 >>> pd.interval_range(start=0, periods=4, freq=1.5)

1023 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],

1024 dtype='interval[float64, right]')

1025

1026 Similarly, for datetime-like ``start`` and ``end``, the frequency must be

1027 convertible to a DateOffset.

1028

1029 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),

1030 ... periods=3, freq='MS')

1031 IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],

1032 (2017-03-01, 2017-04-01]],

1033 dtype='interval[datetime64[ns], right]')

1034

1035 Specify ``start``, ``end``, and ``periods``; the frequency is generated

1036 automatically (linearly spaced).

1037

1038 >>> pd.interval_range(start=0, end=6, periods=4)

1039 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],

1040 dtype='interval[float64, right]')

1041

1042 The ``closed`` parameter specifies which endpoints of the individual

1043 intervals within the ``IntervalIndex`` are closed.

1044

1045 >>> pd.interval_range(end=5, periods=4, closed='both')

1046 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],

1047 dtype='interval[int64, both]')

1048 """

1049 start = maybe_box_datetimelike(start)

1050 end = maybe_box_datetimelike(end)

1051 endpoint = start if start is not None else end

1052

1053 if freq is None and com.any_none(periods, start, end):

1054 freq = 1 if is_number(endpoint) else "D"

1055

1056 if com.count_not_none(start, end, periods, freq) != 3:

1057 raise ValueError(

1058 "Of the four parameters: start, end, periods, and "

1059 "freq, exactly three must be specified"

1060 )

1061

1062 if not _is_valid_endpoint(start):

1063 raise ValueError(f"start must be numeric or datetime-like, got {start}")

1064 elif not _is_valid_endpoint(end):

1065 raise ValueError(f"end must be numeric or datetime-like, got {end}")

1066

1067 if is_float(periods):

1068 periods = int(periods)

1069 elif not is_integer(periods) and periods is not None:

1070 raise TypeError(f"periods must be a number, got {periods}")

1071

1072 if freq is not None and not is_number(freq):

1073 try:

1074 freq = to_offset(freq)

1075 except ValueError as err:

1076 raise ValueError(

1077 f"freq must be numeric or convertible to DateOffset, got {freq}"

1078 ) from err

1079

1080 # verify type compatibility

1081 if not all(

1082 [

1083 _is_type_compatible(start, end),

1084 _is_type_compatible(start, freq),

1085 _is_type_compatible(end, freq),

1086 ]

1087 ):

1088 raise TypeError("start, end, freq need to be type compatible")

1089

1090 # +1 to convert interval count to breaks count (n breaks = n-1 intervals)

1091 if periods is not None:

1092 periods += 1

1093

1094 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex

1095

1096 if is_number(endpoint):

1097 # force consistency between start/end/freq (lower end if freq skips it)

1098 if com.all_not_none(start, end, freq):

1099 end -= (end - start) % freq

1100

1101 # compute the period/start/end if unspecified (at most one)

1102 if periods is None:

1103 periods = int((end - start) // freq) + 1

1104 elif start is None:

1105 start = end - (periods - 1) * freq

1106 elif end is None:

1107 end = start + (periods - 1) * freq

1108

1109 breaks = np.linspace(start, end, periods)

1110 if all(is_integer(x) for x in com.not_none(start, end, freq)):

1111 # np.linspace always produces float output

1112

1113 # error: Argument 1 to "maybe_downcast_numeric" has incompatible type

1114 # "Union[ndarray[Any, Any], TimedeltaIndex, DatetimeIndex]";

1115 # expected "ndarray[Any, Any]" [

1116 breaks = maybe_downcast_numeric(

1117 breaks, # type: ignore[arg-type]

1118 np.dtype("int64"),

1119 )

1120 else:

1121 # delegate to the appropriate range function

1122 if isinstance(endpoint, Timestamp):

1123 breaks = date_range(start=start, end=end, periods=periods, freq=freq)

1124 else:

1125 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)

1126

1127 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)