Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/base.py: 14%

2370 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from datetime import datetime 

4import functools 

5from itertools import zip_longest 

6import operator 

7from typing import ( 

8 TYPE_CHECKING, 

9 Any, 

10 Callable, 

11 ClassVar, 

12 Hashable, 

13 Iterable, 

14 Literal, 

15 NoReturn, 

16 Sequence, 

17 TypeVar, 

18 cast, 

19 final, 

20 overload, 

21) 

22import warnings 

23 

24import numpy as np 

25 

26from pandas._config import get_option 

27 

28from pandas._libs import ( 

29 NaT, 

30 algos as libalgos, 

31 index as libindex, 

32 lib, 

33) 

34import pandas._libs.join as libjoin 

35from pandas._libs.lib import ( 

36 is_datetime_array, 

37 no_default, 

38) 

39from pandas._libs.missing import is_float_nan 

40from pandas._libs.tslibs import ( 

41 IncompatibleFrequency, 

42 OutOfBoundsDatetime, 

43 Timestamp, 

44 is_unitless, 

45 tz_compare, 

46) 

47from pandas._typing import ( 

48 ArrayLike, 

49 Axes, 

50 Dtype, 

51 DtypeObj, 

52 F, 

53 IgnoreRaise, 

54 Level, 

55 Shape, 

56 npt, 

57) 

58from pandas.compat.numpy import function as nv 

59from pandas.errors import ( 

60 DuplicateLabelError, 

61 IntCastingNaNError, 

62 InvalidIndexError, 

63) 

64from pandas.util._decorators import ( 

65 Appender, 

66 cache_readonly, 

67 deprecate_nonkeyword_arguments, 

68 doc, 

69) 

70from pandas.util._exceptions import ( 

71 find_stack_level, 

72 rewrite_exception, 

73) 

74 

75from pandas.core.dtypes.astype import astype_nansafe 

76from pandas.core.dtypes.cast import ( 

77 LossySetitemError, 

78 can_hold_element, 

79 common_dtype_categorical_compat, 

80 ensure_dtype_can_hold_na, 

81 find_common_type, 

82 infer_dtype_from, 

83 maybe_cast_pointwise_result, 

84 np_can_hold_element, 

85) 

86from pandas.core.dtypes.common import ( 

87 ensure_int64, 

88 ensure_object, 

89 ensure_platform_int, 

90 is_bool_dtype, 

91 is_categorical_dtype, 

92 is_dtype_equal, 

93 is_ea_or_datetimelike_dtype, 

94 is_extension_array_dtype, 

95 is_float, 

96 is_float_dtype, 

97 is_hashable, 

98 is_integer, 

99 is_interval_dtype, 

100 is_iterator, 

101 is_list_like, 

102 is_numeric_dtype, 

103 is_object_dtype, 

104 is_scalar, 

105 is_signed_integer_dtype, 

106 is_string_dtype, 

107 is_unsigned_integer_dtype, 

108 needs_i8_conversion, 

109 pandas_dtype, 

110 validate_all_hashable, 

111) 

112from pandas.core.dtypes.concat import concat_compat 

113from pandas.core.dtypes.dtypes import ( 

114 CategoricalDtype, 

115 DatetimeTZDtype, 

116 ExtensionDtype, 

117 IntervalDtype, 

118 PandasDtype, 

119 PeriodDtype, 

120) 

121from pandas.core.dtypes.generic import ( 

122 ABCDataFrame, 

123 ABCDatetimeIndex, 

124 ABCMultiIndex, 

125 ABCPeriodIndex, 

126 ABCRangeIndex, 

127 ABCSeries, 

128 ABCTimedeltaIndex, 

129) 

130from pandas.core.dtypes.inference import is_dict_like 

131from pandas.core.dtypes.missing import ( 

132 array_equivalent, 

133 is_valid_na_for_dtype, 

134 isna, 

135) 

136 

137from pandas.core import ( 

138 arraylike, 

139 missing, 

140 ops, 

141) 

142from pandas.core.accessor import CachedAccessor 

143import pandas.core.algorithms as algos 

144from pandas.core.array_algos.putmask import ( 

145 setitem_datetimelike_compat, 

146 validate_putmask, 

147) 

148from pandas.core.arrays import ( 

149 Categorical, 

150 ExtensionArray, 

151) 

152from pandas.core.arrays.datetimes import ( 

153 tz_to_dtype, 

154 validate_tz_from_dtype, 

155) 

156from pandas.core.arrays.sparse import SparseDtype 

157from pandas.core.arrays.string_ import StringArray 

158from pandas.core.base import ( 

159 IndexOpsMixin, 

160 PandasObject, 

161) 

162import pandas.core.common as com 

163from pandas.core.construction import ( 

164 ensure_wrapped_if_datetimelike, 

165 extract_array, 

166 sanitize_array, 

167) 

168from pandas.core.indexers import deprecate_ndim_indexing 

169from pandas.core.indexes.frozen import FrozenList 

170from pandas.core.ops import get_op_result_name 

171from pandas.core.ops.invalid import make_invalid_op 

172from pandas.core.sorting import ( 

173 ensure_key_mapped, 

174 get_group_index_sorter, 

175 nargsort, 

176) 

177from pandas.core.strings import StringMethods 

178 

179from pandas.io.formats.printing import ( 

180 PrettyDict, 

181 default_pprint, 

182 format_object_summary, 

183 pprint_thing, 

184) 

185 

186if TYPE_CHECKING: 186 ↛ 187line 186 didn't jump to line 187, because the condition on line 186 was never true

187 from pandas import ( 

188 CategoricalIndex, 

189 DataFrame, 

190 MultiIndex, 

191 Series, 

192 ) 

193 from pandas.core.arrays import PeriodArray 

194 

195 

196__all__ = ["Index"] 

197 

198_unsortable_types = frozenset(("mixed", "mixed-integer")) 

199 

200_index_doc_kwargs: dict[str, str] = { 

201 "klass": "Index", 

202 "inplace": "", 

203 "target_klass": "Index", 

204 "raises_section": "", 

205 "unique": "Index", 

206 "duplicated": "np.ndarray", 

207} 

208_index_shared_docs: dict[str, str] = {} 

209str_t = str 

210 

211 

212_dtype_obj = np.dtype("object") 

213 

214 

215def _maybe_return_indexers(meth: F) -> F: 

216 """ 

217 Decorator to simplify 'return_indexers' checks in Index.join. 

218 """ 

219 

220 @functools.wraps(meth) 

221 def join( 

222 self, 

223 other, 

224 how: str_t = "left", 

225 level=None, 

226 return_indexers: bool = False, 

227 sort: bool = False, 

228 ): 

229 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort) 

230 if not return_indexers: 

231 return join_index 

232 

233 if lidx is not None: 

234 lidx = ensure_platform_int(lidx) 

235 if ridx is not None: 

236 ridx = ensure_platform_int(ridx) 

237 return join_index, lidx, ridx 

238 

239 return cast(F, join) 

240 

241 

242def disallow_kwargs(kwargs: dict[str, Any]) -> None: 

243 if kwargs: 

244 raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") 

245 

246 

247def _new_Index(cls, d): 

248 """ 

249 This is called upon unpickling, rather than the default which doesn't 

250 have arguments and breaks __new__. 

251 """ 

252 # required for backward compat, because PI can't be instantiated with 

253 # ordinals through __new__ GH #13277 

254 if issubclass(cls, ABCPeriodIndex): 

255 from pandas.core.indexes.period import _new_PeriodIndex 

256 

257 return _new_PeriodIndex(cls, **d) 

258 

259 if issubclass(cls, ABCMultiIndex): 

260 if "labels" in d and "codes" not in d: 

261 # GH#23752 "labels" kwarg has been replaced with "codes" 

262 d["codes"] = d.pop("labels") 

263 

264 # Since this was a valid MultiIndex at pickle-time, we don't need to 

265 # check validty at un-pickle time. 

266 d["verify_integrity"] = False 

267 

268 elif "dtype" not in d and "data" in d: 

269 # Prevent Index.__new__ from conducting inference; 

270 # "data" key not in RangeIndex 

271 d["dtype"] = d["data"].dtype 

272 return cls.__new__(cls, **d) 

273 

274 

275_IndexT = TypeVar("_IndexT", bound="Index") 

276 

277 

278class Index(IndexOpsMixin, PandasObject): 

279 """ 

280 Immutable sequence used for indexing and alignment. 

281 

282 The basic object storing axis labels for all pandas objects. 

283 

284 Parameters 

285 ---------- 

286 data : array-like (1-dimensional) 

287 dtype : NumPy dtype (default: object) 

288 If dtype is None, we find the dtype that best fits the data. 

289 If an actual dtype is provided, we coerce to that dtype if it's safe. 

290 Otherwise, an error will be raised. 

291 copy : bool 

292 Make a copy of input ndarray. 

293 name : object 

294 Name to be stored in the index. 

295 tupleize_cols : bool (default: True) 

296 When True, attempt to create a MultiIndex if possible. 

297 

298 See Also 

299 -------- 

300 RangeIndex : Index implementing a monotonic integer range. 

301 CategoricalIndex : Index of :class:`Categorical` s. 

302 MultiIndex : A multi-level, or hierarchical Index. 

303 IntervalIndex : An Index of :class:`Interval` s. 

304 DatetimeIndex : Index of datetime64 data. 

305 TimedeltaIndex : Index of timedelta64 data. 

306 PeriodIndex : Index of Period data. 

307 NumericIndex : Index of numpy int/uint/float data. 

308 Int64Index : Index of purely int64 labels (deprecated). 

309 UInt64Index : Index of purely uint64 labels (deprecated). 

310 Float64Index : Index of purely float64 labels (deprecated). 

311 

312 Notes 

313 ----- 

314 An Index instance can **only** contain hashable objects 

315 

316 Examples 

317 -------- 

318 >>> pd.Index([1, 2, 3]) 

319 Int64Index([1, 2, 3], dtype='int64') 

320 

321 >>> pd.Index(list('abc')) 

322 Index(['a', 'b', 'c'], dtype='object') 

323 """ 

324 

325 # tolist is not actually deprecated, just suppressed in the __dir__ 

326 _hidden_attrs: frozenset[str] = ( 

327 PandasObject._hidden_attrs 

328 | IndexOpsMixin._hidden_attrs 

329 | frozenset(["contains", "set_value"]) 

330 ) 

331 

332 # To hand over control to subclasses 

333 _join_precedence = 1 

334 

335 # Cython methods; see github.com/cython/cython/issues/2647 

336 # for why we need to wrap these instead of making them class attributes 

337 # Moreover, cython will choose the appropriate-dtyped sub-function 

338 # given the dtypes of the passed arguments 

339 

340 @final 

341 def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]: 

342 # Caller is responsible for ensuring other.dtype == self.dtype 

343 sv = self._get_engine_target() 

344 ov = other._get_engine_target() 

345 # can_use_libjoin assures sv and ov are ndarrays 

346 sv = cast(np.ndarray, sv) 

347 ov = cast(np.ndarray, ov) 

348 return libjoin.left_join_indexer_unique(sv, ov) 

349 

350 @final 

351 def _left_indexer( 

352 self: _IndexT, other: _IndexT 

353 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

354 # Caller is responsible for ensuring other.dtype == self.dtype 

355 sv = self._get_engine_target() 

356 ov = other._get_engine_target() 

357 # can_use_libjoin assures sv and ov are ndarrays 

358 sv = cast(np.ndarray, sv) 

359 ov = cast(np.ndarray, ov) 

360 joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov) 

361 joined = self._from_join_target(joined_ndarray) 

362 return joined, lidx, ridx 

363 

364 @final 

365 def _inner_indexer( 

366 self: _IndexT, other: _IndexT 

367 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

368 # Caller is responsible for ensuring other.dtype == self.dtype 

369 sv = self._get_engine_target() 

370 ov = other._get_engine_target() 

371 # can_use_libjoin assures sv and ov are ndarrays 

372 sv = cast(np.ndarray, sv) 

373 ov = cast(np.ndarray, ov) 

374 joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov) 

375 joined = self._from_join_target(joined_ndarray) 

376 return joined, lidx, ridx 

377 

378 @final 

379 def _outer_indexer( 

380 self: _IndexT, other: _IndexT 

381 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

382 # Caller is responsible for ensuring other.dtype == self.dtype 

383 sv = self._get_engine_target() 

384 ov = other._get_engine_target() 

385 # can_use_libjoin assures sv and ov are ndarrays 

386 sv = cast(np.ndarray, sv) 

387 ov = cast(np.ndarray, ov) 

388 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov) 

389 joined = self._from_join_target(joined_ndarray) 

390 return joined, lidx, ridx 

391 

392 _typ: str = "index" 

393 _data: ExtensionArray | np.ndarray 

394 _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = ( 

395 np.ndarray, 

396 ExtensionArray, 

397 ) 

398 _id: object | None = None 

399 _name: Hashable = None 

400 # MultiIndex.levels previously allowed setting the index name. We 

401 # don't allow this anymore, and raise if it happens rather than 

402 # failing silently. 

403 _no_setting_name: bool = False 

404 _comparables: list[str] = ["name"] 

405 _attributes: list[str] = ["name"] 

406 _is_numeric_dtype: bool = False 

407 _can_hold_strings: bool = True 

408 

409 # Whether this index is a NumericIndex, but not a Int64Index, Float64Index, 

410 # UInt64Index or RangeIndex. Needed for backwards compat. Remove this attribute and 

411 # associated code in pandas 2.0. 

412 _is_backward_compat_public_numeric_index: bool = False 

413 

414 @property 

415 def _engine_type( 

416 self, 

417 ) -> type[libindex.IndexEngine] | type[libindex.ExtensionEngine]: 

418 return libindex.ObjectEngine 

419 

420 # whether we support partial string indexing. Overridden 

421 # in DatetimeIndex and PeriodIndex 

422 _supports_partial_string_indexing = False 

423 

424 _accessors = {"str"} 

425 

426 str = CachedAccessor("str", StringMethods) 

427 

428 # -------------------------------------------------------------------- 

429 # Constructors 

430 

431 def __new__( 

432 cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs 

433 ) -> Index: 

434 

435 if kwargs: 

436 warnings.warn( 

437 "Passing keywords other than 'data', 'dtype', 'copy', 'name', " 

438 "'tupleize_cols' is deprecated and will raise TypeError in a " 

439 "future version. Use the specific Index subclass directly instead.", 

440 FutureWarning, 

441 stacklevel=find_stack_level(), 

442 ) 

443 

444 from pandas.core.arrays import PandasArray 

445 from pandas.core.indexes.range import RangeIndex 

446 

447 name = maybe_extract_name(name, data, cls) 

448 

449 if dtype is not None: 

450 dtype = pandas_dtype(dtype) 

451 if "tz" in kwargs: 

452 tz = kwargs.pop("tz") 

453 validate_tz_from_dtype(dtype, tz) 

454 dtype = tz_to_dtype(tz) 

455 

456 if type(data) is PandasArray: 

457 # ensure users don't accidentally put a PandasArray in an index, 

458 # but don't unpack StringArray 

459 data = data.to_numpy() 

460 if isinstance(dtype, PandasDtype): 

461 dtype = dtype.numpy_dtype 

462 

463 data_dtype = getattr(data, "dtype", None) 

464 

465 # range 

466 if isinstance(data, (range, RangeIndex)): 

467 result = RangeIndex(start=data, copy=copy, name=name) 

468 if dtype is not None: 

469 return result.astype(dtype, copy=False) 

470 return result 

471 

472 elif is_ea_or_datetimelike_dtype(dtype): 

473 # non-EA dtype indexes have special casting logic, so we punt here 

474 klass = cls._dtype_to_subclass(dtype) 

475 if klass is not Index: 

476 return klass(data, dtype=dtype, copy=copy, name=name, **kwargs) 

477 

478 ea_cls = dtype.construct_array_type() 

479 data = ea_cls._from_sequence(data, dtype=dtype, copy=copy) 

480 disallow_kwargs(kwargs) 

481 return Index._simple_new(data, name=name) 

482 

483 elif is_ea_or_datetimelike_dtype(data_dtype): 

484 data_dtype = cast(DtypeObj, data_dtype) 

485 klass = cls._dtype_to_subclass(data_dtype) 

486 if klass is not Index: 

487 result = klass(data, copy=copy, name=name, **kwargs) 

488 if dtype is not None: 

489 return result.astype(dtype, copy=False) 

490 return result 

491 elif dtype is not None: 

492 # GH#45206 

493 data = data.astype(dtype, copy=False) 

494 

495 disallow_kwargs(kwargs) 

496 data = extract_array(data, extract_numpy=True) 

497 return Index._simple_new(data, name=name) 

498 

499 # index-like 

500 elif ( 

501 isinstance(data, Index) 

502 and data._is_backward_compat_public_numeric_index 

503 and dtype is None 

504 ): 

505 return data._constructor(data, name=name, copy=copy) 

506 elif isinstance(data, (np.ndarray, Index, ABCSeries)): 

507 

508 if isinstance(data, ABCMultiIndex): 

509 data = data._values 

510 

511 if dtype is not None: 

512 # we need to avoid having numpy coerce 

513 # things that look like ints/floats to ints unless 

514 # they are actually ints, e.g. '0' and 0.0 

515 # should not be coerced 

516 # GH 11836 

517 data = sanitize_array(data, None, dtype=dtype, copy=copy) 

518 

519 dtype = data.dtype 

520 

521 if data.dtype.kind in ["i", "u", "f"]: 

522 # maybe coerce to a sub-class 

523 arr = data 

524 elif data.dtype.kind in ["b", "c"]: 

525 # No special subclass, and Index._ensure_array won't do this 

526 # for us. 

527 arr = np.asarray(data) 

528 else: 

529 arr = com.asarray_tuplesafe(data, dtype=_dtype_obj) 

530 

531 if dtype is None: 

532 arr = _maybe_cast_data_without_dtype( 

533 arr, cast_numeric_deprecated=True 

534 ) 

535 dtype = arr.dtype 

536 

537 if kwargs: 

538 return cls(arr, dtype, copy=copy, name=name, **kwargs) 

539 

540 klass = cls._dtype_to_subclass(arr.dtype) 

541 arr = klass._ensure_array(arr, dtype, copy) 

542 disallow_kwargs(kwargs) 

543 return klass._simple_new(arr, name) 

544 

545 elif is_scalar(data): 

546 raise cls._scalar_data_error(data) 

547 elif hasattr(data, "__array__"): 

548 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) 

549 else: 

550 

551 if tupleize_cols and is_list_like(data): 

552 # GH21470: convert iterable to list before determining if empty 

553 if is_iterator(data): 

554 data = list(data) 

555 

556 if data and all(isinstance(e, tuple) for e in data): 

557 # we must be all tuples, otherwise don't construct 

558 # 10697 

559 from pandas.core.indexes.multi import MultiIndex 

560 

561 return MultiIndex.from_tuples( 

562 data, names=name or kwargs.get("names") 

563 ) 

564 # other iterable of some kind 

565 

566 subarr = com.asarray_tuplesafe(data, dtype=_dtype_obj) 

567 if dtype is None: 

568 # with e.g. a list [1, 2, 3] casting to numeric is _not_ deprecated 

569 subarr = _maybe_cast_data_without_dtype( 

570 subarr, cast_numeric_deprecated=False 

571 ) 

572 dtype = subarr.dtype 

573 return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) 

574 

575 @classmethod 

576 def _ensure_array(cls, data, dtype, copy: bool): 

577 """ 

578 Ensure we have a valid array to pass to _simple_new. 

579 """ 

580 if data.ndim > 1: 

581 # GH#13601, GH#20285, GH#27125 

582 raise ValueError("Index data must be 1-dimensional") 

583 if copy: 

584 # asarray_tuplesafe does not always copy underlying data, 

585 # so need to make sure that this happens 

586 data = data.copy() 

587 return data 

588 

589 @final 

590 @classmethod 

591 def _dtype_to_subclass(cls, dtype: DtypeObj): 

592 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 

593 

594 if isinstance(dtype, ExtensionDtype): 

595 if isinstance(dtype, DatetimeTZDtype): 

596 from pandas import DatetimeIndex 

597 

598 return DatetimeIndex 

599 elif isinstance(dtype, CategoricalDtype): 

600 from pandas import CategoricalIndex 

601 

602 return CategoricalIndex 

603 elif isinstance(dtype, IntervalDtype): 

604 from pandas import IntervalIndex 

605 

606 return IntervalIndex 

607 elif isinstance(dtype, PeriodDtype): 

608 from pandas import PeriodIndex 

609 

610 return PeriodIndex 

611 

612 elif isinstance(dtype, SparseDtype): 

613 warnings.warn( 

614 "In a future version, passing a SparseArray to pd.Index " 

615 "will store that array directly instead of converting to a " 

616 "dense numpy ndarray. To retain the old behavior, use " 

617 "pd.Index(arr.to_numpy()) instead", 

618 FutureWarning, 

619 stacklevel=find_stack_level(), 

620 ) 

621 return cls._dtype_to_subclass(dtype.subtype) 

622 

623 return Index 

624 

625 if dtype.kind == "M": 

626 from pandas import DatetimeIndex 

627 

628 return DatetimeIndex 

629 

630 elif dtype.kind == "m": 

631 from pandas import TimedeltaIndex 

632 

633 return TimedeltaIndex 

634 

635 elif is_float_dtype(dtype): 

636 from pandas.core.api import Float64Index 

637 

638 return Float64Index 

639 elif is_unsigned_integer_dtype(dtype): 

640 from pandas.core.api import UInt64Index 

641 

642 return UInt64Index 

643 elif is_signed_integer_dtype(dtype): 

644 from pandas.core.api import Int64Index 

645 

646 return Int64Index 

647 

648 elif dtype == _dtype_obj: 

649 # NB: assuming away MultiIndex 

650 return Index 

651 

652 elif issubclass( 

653 dtype.type, (str, bool, np.bool_, complex, np.complex64, np.complex128) 

654 ): 

655 return Index 

656 

657 raise NotImplementedError(dtype) 

658 

659 """ 

660 NOTE for new Index creation: 

661 

662 - _simple_new: It returns new Index with the same type as the caller. 

663 All metadata (such as name) must be provided by caller's responsibility. 

664 Using _shallow_copy is recommended because it fills these metadata 

665 otherwise specified. 

666 

667 - _shallow_copy: It returns new Index with the same type (using 

668 _simple_new), but fills caller's metadata otherwise specified. Passed 

669 kwargs will overwrite corresponding metadata. 

670 

671 See each method's docstring. 

672 """ 

673 

674 @property 

675 def asi8(self): 

676 """ 

677 Integer representation of the values. 

678 

679 Returns 

680 ------- 

681 ndarray 

682 An ndarray with int64 dtype. 

683 """ 

684 warnings.warn( 

685 "Index.asi8 is deprecated and will be removed in a future version.", 

686 FutureWarning, 

687 stacklevel=find_stack_level(), 

688 ) 

689 return None 

690 

691 @classmethod 

692 def _simple_new(cls: type[_IndexT], values, name: Hashable = None) -> _IndexT: 

693 """ 

694 We require that we have a dtype compat for the values. If we are passed 

695 a non-dtype compat, then coerce using the constructor. 

696 

697 Must be careful not to recurse. 

698 """ 

699 assert isinstance(values, cls._data_cls), type(values) 

700 

701 result = object.__new__(cls) 

702 result._data = values 

703 result._name = name 

704 result._cache = {} 

705 result._reset_identity() 

706 

707 return result 

708 

709 @classmethod 

710 def _with_infer(cls, *args, **kwargs): 

711 """ 

712 Constructor that uses the 1.0.x behavior inferring numeric dtypes 

713 for ndarray[object] inputs. 

714 """ 

715 with warnings.catch_warnings(): 

716 warnings.filterwarnings("ignore", ".*the Index constructor", FutureWarning) 

717 result = cls(*args, **kwargs) 

718 

719 if result.dtype == _dtype_obj and not result._is_multi: 

720 # error: Argument 1 to "maybe_convert_objects" has incompatible type 

721 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected 

722 # "ndarray[Any, Any]" 

723 values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type] 

724 if values.dtype.kind in ["i", "u", "f", "b"]: 

725 return Index(values, name=result.name) 

726 

727 return result 

728 

729 @cache_readonly 

730 def _constructor(self: _IndexT) -> type[_IndexT]: 

731 return type(self) 

732 

733 @final 

734 def _maybe_check_unique(self) -> None: 

735 """ 

736 Check that an Index has no duplicates. 

737 

738 This is typically only called via 

739 `NDFrame.flags.allows_duplicate_labels.setter` when it's set to 

740 True (duplicates aren't allowed). 

741 

742 Raises 

743 ------ 

744 DuplicateLabelError 

745 When the index is not unique. 

746 """ 

747 if not self.is_unique: 

748 msg = """Index has duplicates.""" 

749 duplicates = self._format_duplicate_message() 

750 msg += f"\n{duplicates}" 

751 

752 raise DuplicateLabelError(msg) 

753 

754 @final 

755 def _format_duplicate_message(self) -> DataFrame: 

756 """ 

757 Construct the DataFrame for a DuplicateLabelError. 

758 

759 This returns a DataFrame indicating the labels and positions 

760 of duplicates in an index. This should only be called when it's 

761 already known that duplicates are present. 

762 

763 Examples 

764 -------- 

765 >>> idx = pd.Index(['a', 'b', 'a']) 

766 >>> idx._format_duplicate_message() 

767 positions 

768 label 

769 a [0, 2] 

770 """ 

771 from pandas import Series 

772 

773 duplicates = self[self.duplicated(keep="first")].unique() 

774 assert len(duplicates) 

775 

776 out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates] 

777 if self._is_multi: 

778 # test_format_duplicate_labels_message_multi 

779 # error: "Type[Index]" has no attribute "from_tuples" [attr-defined] 

780 out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined] 

781 

782 if self.nlevels == 1: 

783 out = out.rename_axis("label") 

784 return out.to_frame(name="positions") 

785 

786 # -------------------------------------------------------------------- 

787 # Index Internals Methods 

788 

789 @final 

790 def _get_attributes_dict(self) -> dict[str_t, Any]: 

791 """ 

792 Return an attributes dict for my class. 

793 

794 Temporarily added back for compatibility issue in dask, see 

795 https://github.com/pandas-dev/pandas/pull/43895 

796 """ 

797 warnings.warn( 

798 "The Index._get_attributes_dict method is deprecated, and will be " 

799 "removed in a future version", 

800 DeprecationWarning, 

801 stacklevel=find_stack_level(), 

802 ) 

803 return {k: getattr(self, k, None) for k in self._attributes} 

804 

805 def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT: 

806 """ 

807 Create a new Index with the same class as the caller, don't copy the 

808 data, use the same object attributes with passed in attributes taking 

809 precedence. 

810 

811 *this is an internal non-public method* 

812 

813 Parameters 

814 ---------- 

815 values : the values to create the new Index, optional 

816 name : Label, defaults to self.name 

817 """ 

818 name = self._name if name is no_default else name 

819 

820 return self._simple_new(values, name=name) 

821 

822 def _view(self: _IndexT) -> _IndexT: 

823 """ 

824 fastpath to make a shallow copy, i.e. new object with same data. 

825 """ 

826 result = self._simple_new(self._values, name=self._name) 

827 

828 result._cache = self._cache 

829 return result 

830 

831 @final 

832 def _rename(self: _IndexT, name: Hashable) -> _IndexT: 

833 """ 

834 fastpath for rename if new name is already validated. 

835 """ 

836 result = self._view() 

837 result._name = name 

838 return result 

839 

840 @final 

841 def is_(self, other) -> bool: 

842 """ 

843 More flexible, faster check like ``is`` but that works through views. 

844 

845 Note: this is *not* the same as ``Index.identical()``, which checks 

846 that metadata is also the same. 

847 

848 Parameters 

849 ---------- 

850 other : object 

851 Other object to compare against. 

852 

853 Returns 

854 ------- 

855 bool 

856 True if both have same underlying data, False otherwise. 

857 

858 See Also 

859 -------- 

860 Index.identical : Works like ``Index.is_`` but also checks metadata. 

861 """ 

862 if self is other: 

863 return True 

864 elif not hasattr(other, "_id"): 

865 return False 

866 elif self._id is None or other._id is None: 

867 return False 

868 else: 

869 return self._id is other._id 

870 

871 @final 

872 def _reset_identity(self) -> None: 

873 """ 

874 Initializes or resets ``_id`` attribute with new object. 

875 """ 

876 self._id = object() 

877 

878 @final 

879 def _cleanup(self) -> None: 

880 self._engine.clear_mapping() 

881 

882 @cache_readonly 

883 def _engine( 

884 self, 

885 ) -> libindex.IndexEngine | libindex.ExtensionEngine: 

886 # For base class (object dtype) we get ObjectEngine 

887 target_values = self._get_engine_target() 

888 if ( 

889 isinstance(target_values, ExtensionArray) 

890 and self._engine_type is libindex.ObjectEngine 

891 ): 

892 return libindex.ExtensionEngine(target_values) 

893 

894 target_values = cast(np.ndarray, target_values) 

895 # to avoid a reference cycle, bind `target_values` to a local variable, so 

896 # `self` is not passed into the lambda. 

897 if target_values.dtype == bool: 

898 return libindex.BoolEngine(target_values) 

899 elif target_values.dtype == np.complex64: 

900 return libindex.Complex64Engine(target_values) 

901 elif target_values.dtype == np.complex128: 

902 return libindex.Complex128Engine(target_values) 

903 

904 # error: Argument 1 to "ExtensionEngine" has incompatible type 

905 # "ndarray[Any, Any]"; expected "ExtensionArray" 

906 return self._engine_type(target_values) # type: ignore[arg-type] 

907 

908 @final 

909 @cache_readonly 

910 def _dir_additions_for_owner(self) -> set[str_t]: 

911 """ 

912 Add the string-like labels to the owner dataframe/series dir output. 

913 

914 If this is a MultiIndex, it's first level values are used. 

915 """ 

916 return { 

917 c 

918 for c in self.unique(level=0)[: get_option("display.max_dir_items")] 

919 if isinstance(c, str) and c.isidentifier() 

920 } 

921 

922 # -------------------------------------------------------------------- 

923 # Array-Like Methods 

924 

925 # ndarray compat 

926 def __len__(self) -> int: 

927 """ 

928 Return the length of the Index. 

929 """ 

930 return len(self._data) 

931 

932 def __array__(self, dtype=None) -> np.ndarray: 

933 """ 

934 The array interface, return my values. 

935 """ 

936 return np.asarray(self._data, dtype=dtype) 

937 

938 def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): 

939 if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs): 

940 return NotImplemented 

941 

942 # TODO(2.0) the 'and', 'or' and 'xor' dunder methods are currently set 

943 # operations and not logical operations, so don't dispatch 

944 # This is deprecated, so this full 'if' clause can be removed once 

945 # deprecation is enforced in 2.0 

946 if not ( 

947 method == "__call__" 

948 and ufunc in (np.bitwise_and, np.bitwise_or, np.bitwise_xor) 

949 ): 

950 result = arraylike.maybe_dispatch_ufunc_to_dunder_op( 

951 self, ufunc, method, *inputs, **kwargs 

952 ) 

953 if result is not NotImplemented: 

954 return result 

955 

956 if "out" in kwargs: 

957 # e.g. test_dti_isub_tdi 

958 return arraylike.dispatch_ufunc_with_out( 

959 self, ufunc, method, *inputs, **kwargs 

960 ) 

961 

962 if method == "reduce": 

963 result = arraylike.dispatch_reduction_ufunc( 

964 self, ufunc, method, *inputs, **kwargs 

965 ) 

966 if result is not NotImplemented: 

967 return result 

968 

969 new_inputs = [x if x is not self else x._values for x in inputs] 

970 result = getattr(ufunc, method)(*new_inputs, **kwargs) 

971 if ufunc.nout == 2: 

972 # i.e. np.divmod, np.modf, np.frexp 

973 return tuple(self.__array_wrap__(x) for x in result) 

974 

975 return self.__array_wrap__(result) 

976 

977 def __array_wrap__(self, result, context=None): 

978 """ 

979 Gets called after a ufunc and other functions e.g. np.split. 

980 """ 

981 result = lib.item_from_zerodim(result) 

982 if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1: 

983 return result 

984 

985 return Index(result, name=self.name) 

986 

987 @cache_readonly 

988 def dtype(self) -> DtypeObj: 

989 """ 

990 Return the dtype object of the underlying data. 

991 """ 

992 return self._data.dtype 

993 

994 @final 

995 def ravel(self, order="C"): 

996 """ 

997 Return an ndarray of the flattened values of the underlying data. 

998 

999 Returns 

1000 ------- 

1001 numpy.ndarray 

1002 Flattened array. 

1003 

1004 See Also 

1005 -------- 

1006 numpy.ndarray.ravel : Return a flattened array. 

1007 """ 

1008 warnings.warn( 

1009 "Index.ravel returning ndarray is deprecated; in a future version " 

1010 "this will return a view on self.", 

1011 FutureWarning, 

1012 stacklevel=find_stack_level(), 

1013 ) 

1014 if needs_i8_conversion(self.dtype): 

1015 # Item "ndarray[Any, Any]" of "Union[ExtensionArray, ndarray[Any, Any]]" 

1016 # has no attribute "_ndarray" 

1017 values = self._data._ndarray # type: ignore[union-attr] 

1018 elif is_interval_dtype(self.dtype): 

1019 values = np.asarray(self._data) 

1020 else: 

1021 values = self._get_engine_target() 

1022 return values.ravel(order=order) 

1023 

1024 def view(self, cls=None): 

1025 

1026 # we need to see if we are subclassing an 

1027 # index type here 

1028 if cls is not None and not hasattr(cls, "_typ"): 

1029 dtype = cls 

1030 if isinstance(cls, str): 

1031 dtype = pandas_dtype(cls) 

1032 

1033 if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion( 

1034 dtype 

1035 ): 

1036 if dtype.kind == "m" and dtype != "m8[ns]": 

1037 # e.g. m8[s] 

1038 return self._data.view(cls) 

1039 

1040 idx_cls = self._dtype_to_subclass(dtype) 

1041 # NB: we only get here for subclasses that override 

1042 # _data_cls such that it is a type and not a tuple 

1043 # of types. 

1044 arr_cls = idx_cls._data_cls 

1045 arr = arr_cls(self._data.view("i8"), dtype=dtype) 

1046 return idx_cls._simple_new(arr, name=self.name) 

1047 

1048 result = self._data.view(cls) 

1049 else: 

1050 result = self._view() 

1051 if isinstance(result, Index): 

1052 result._id = self._id 

1053 return result 

1054 

1055 def astype(self, dtype, copy: bool = True): 

1056 """ 

1057 Create an Index with values cast to dtypes. 

1058 

1059 The class of a new Index is determined by dtype. When conversion is 

1060 impossible, a TypeError exception is raised. 

1061 

1062 Parameters 

1063 ---------- 

1064 dtype : numpy dtype or pandas type 

1065 Note that any signed integer `dtype` is treated as ``'int64'``, 

1066 and any unsigned integer `dtype` is treated as ``'uint64'``, 

1067 regardless of the size. 

1068 copy : bool, default True 

1069 By default, astype always returns a newly allocated object. 

1070 If copy is set to False and internal requirements on dtype are 

1071 satisfied, the original data is used to create a new Index 

1072 or the original Index is returned. 

1073 

1074 Returns 

1075 ------- 

1076 Index 

1077 Index with values cast to specified dtype. 

1078 """ 

1079 if dtype is not None: 

1080 dtype = pandas_dtype(dtype) 

1081 

1082 if is_dtype_equal(self.dtype, dtype): 

1083 # Ensure that self.astype(self.dtype) is self 

1084 return self.copy() if copy else self 

1085 

1086 values = self._data 

1087 if isinstance(values, ExtensionArray): 

1088 if isinstance(dtype, np.dtype) and dtype.kind == "M" and is_unitless(dtype): 

1089 # TODO(2.0): remove this special-casing once this is enforced 

1090 # in DTA.astype 

1091 raise TypeError(f"Cannot cast {type(self).__name__} to dtype") 

1092 

1093 with rewrite_exception(type(values).__name__, type(self).__name__): 

1094 new_values = values.astype(dtype, copy=copy) 

1095 

1096 elif is_float_dtype(self.dtype) and needs_i8_conversion(dtype): 

1097 # NB: this must come before the ExtensionDtype check below 

1098 # TODO: this differs from Series behavior; can/should we align them? 

1099 raise TypeError( 

1100 f"Cannot convert Float64Index to dtype {dtype}; integer " 

1101 "values are required for conversion" 

1102 ) 

1103 

1104 elif isinstance(dtype, ExtensionDtype): 

1105 cls = dtype.construct_array_type() 

1106 # Note: for RangeIndex and CategoricalDtype self vs self._values 

1107 # behaves differently here. 

1108 new_values = cls._from_sequence(self, dtype=dtype, copy=copy) 

1109 

1110 else: 

1111 try: 

1112 if dtype == str: 

1113 # GH#38607 

1114 new_values = values.astype(dtype, copy=copy) 

1115 else: 

1116 # GH#13149 specifically use astype_nansafe instead of astype 

1117 new_values = astype_nansafe(values, dtype=dtype, copy=copy) 

1118 except IntCastingNaNError: 

1119 raise 

1120 except (TypeError, ValueError) as err: 

1121 if dtype.kind == "u" and "losslessly" in str(err): 

1122 # keep the message from _astype_float_to_int_nansafe 

1123 raise 

1124 raise TypeError( 

1125 f"Cannot cast {type(self).__name__} to dtype {dtype}" 

1126 ) from err 

1127 

1128 # pass copy=False because any copying will be done in the astype above 

1129 if self._is_backward_compat_public_numeric_index: 

1130 # this block is needed so e.g. NumericIndex[int8].astype("int32") returns 

1131 # NumericIndex[int32] and not Int64Index with dtype int64. 

1132 # When Int64Index etc. are removed from the code base, removed this also. 

1133 if isinstance(dtype, np.dtype) and is_numeric_dtype(dtype): 

1134 return self._constructor( 

1135 new_values, name=self.name, dtype=dtype, copy=False 

1136 ) 

1137 return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False) 

1138 

1139 _index_shared_docs[ 

1140 "take" 

1141 ] = """ 

1142 Return a new %(klass)s of the values selected by the indices. 

1143 

1144 For internal compatibility with numpy arrays. 

1145 

1146 Parameters 

1147 ---------- 

1148 indices : array-like 

1149 Indices to be taken. 

1150 axis : int, optional 

1151 The axis over which to select values, always 0. 

1152 allow_fill : bool, default True 

1153 fill_value : scalar, default None 

1154 If allow_fill=True and fill_value is not None, indices specified by 

1155 -1 are regarded as NA. If Index doesn't hold NA, raise ValueError. 

1156 

1157 Returns 

1158 ------- 

1159 Index 

1160 An index formed of elements at the given indices. Will be the same 

1161 type as self, except for RangeIndex. 

1162 

1163 See Also 

1164 -------- 

1165 numpy.ndarray.take: Return an array formed from the 

1166 elements of a at the given indices. 

1167 """ 

1168 

1169 @Appender(_index_shared_docs["take"] % _index_doc_kwargs) 

1170 def take( 

1171 self, indices, axis: int = 0, allow_fill: bool = True, fill_value=None, **kwargs 

1172 ): 

1173 if kwargs: 

1174 nv.validate_take((), kwargs) 

1175 if is_scalar(indices): 

1176 raise TypeError("Expected indices to be array-like") 

1177 indices = ensure_platform_int(indices) 

1178 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices) 

1179 

1180 # Note: we discard fill_value and use self._na_value, only relevant 

1181 # in the case where allow_fill is True and fill_value is not None 

1182 values = self._values 

1183 if isinstance(values, np.ndarray): 

1184 taken = algos.take( 

1185 values, indices, allow_fill=allow_fill, fill_value=self._na_value 

1186 ) 

1187 else: 

1188 # algos.take passes 'axis' keyword which not all EAs accept 

1189 taken = values.take( 

1190 indices, allow_fill=allow_fill, fill_value=self._na_value 

1191 ) 

1192 # _constructor so RangeIndex->Int64Index 

1193 return self._constructor._simple_new(taken, name=self.name) 

1194 

1195 @final 

1196 def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool: 

1197 """ 

1198 We only use pandas-style take when allow_fill is True _and_ 

1199 fill_value is not None. 

1200 """ 

1201 if allow_fill and fill_value is not None: 

1202 # only fill if we are passing a non-None fill_value 

1203 if self._can_hold_na: 

1204 if (indices < -1).any(): 

1205 raise ValueError( 

1206 "When allow_fill=True and fill_value is not None, " 

1207 "all indices must be >= -1" 

1208 ) 

1209 else: 

1210 cls_name = type(self).__name__ 

1211 raise ValueError( 

1212 f"Unable to fill values because {cls_name} cannot contain NA" 

1213 ) 

1214 else: 

1215 allow_fill = False 

1216 return allow_fill 

1217 

1218 _index_shared_docs[ 

1219 "repeat" 

1220 ] = """ 

1221 Repeat elements of a %(klass)s. 

1222 

1223 Returns a new %(klass)s where each element of the current %(klass)s 

1224 is repeated consecutively a given number of times. 

1225 

1226 Parameters 

1227 ---------- 

1228 repeats : int or array of ints 

1229 The number of repetitions for each element. This should be a 

1230 non-negative integer. Repeating 0 times will return an empty 

1231 %(klass)s. 

1232 axis : None 

1233 Must be ``None``. Has no effect but is accepted for compatibility 

1234 with numpy. 

1235 

1236 Returns 

1237 ------- 

1238 repeated_index : %(klass)s 

1239 Newly created %(klass)s with repeated elements. 

1240 

1241 See Also 

1242 -------- 

1243 Series.repeat : Equivalent function for Series. 

1244 numpy.repeat : Similar method for :class:`numpy.ndarray`. 

1245 

1246 Examples 

1247 -------- 

1248 >>> idx = pd.Index(['a', 'b', 'c']) 

1249 >>> idx 

1250 Index(['a', 'b', 'c'], dtype='object') 

1251 >>> idx.repeat(2) 

1252 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object') 

1253 >>> idx.repeat([1, 2, 3]) 

1254 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object') 

1255 """ 

1256 

1257 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) 

1258 def repeat(self, repeats, axis=None): 

1259 repeats = ensure_platform_int(repeats) 

1260 nv.validate_repeat((), {"axis": axis}) 

1261 res_values = self._values.repeat(repeats) 

1262 

1263 # _constructor so RangeIndex->Int64Index 

1264 return self._constructor._simple_new(res_values, name=self.name) 

1265 

1266 # -------------------------------------------------------------------- 

1267 # Copying Methods 

1268 

1269 def copy( 

1270 self: _IndexT, 

1271 name: Hashable | None = None, 

1272 deep: bool = False, 

1273 dtype: Dtype | None = None, 

1274 names: Sequence[Hashable] | None = None, 

1275 ) -> _IndexT: 

1276 """ 

1277 Make a copy of this object. 

1278 

1279 Name and dtype sets those attributes on the new object. 

1280 

1281 Parameters 

1282 ---------- 

1283 name : Label, optional 

1284 Set name for new object. 

1285 deep : bool, default False 

1286 dtype : numpy dtype or pandas type, optional 

1287 Set dtype for new object. 

1288 

1289 .. deprecated:: 1.2.0 

1290 use ``astype`` method instead. 

1291 names : list-like, optional 

1292 Kept for compatibility with MultiIndex. Should not be used. 

1293 

1294 .. deprecated:: 1.4.0 

1295 use ``name`` instead. 

1296 

1297 Returns 

1298 ------- 

1299 Index 

1300 Index refer to new object which is a copy of this object. 

1301 

1302 Notes 

1303 ----- 

1304 In most cases, there should be no functional difference from using 

1305 ``deep``, but if ``deep`` is passed it will attempt to deepcopy. 

1306 """ 

1307 if names is not None: 

1308 warnings.warn( 

1309 "parameter names is deprecated and will be removed in a future " 

1310 "version. Use the name parameter instead.", 

1311 FutureWarning, 

1312 stacklevel=find_stack_level(), 

1313 ) 

1314 

1315 name = self._validate_names(name=name, names=names, deep=deep)[0] 

1316 if deep: 

1317 new_data = self._data.copy() 

1318 new_index = type(self)._simple_new(new_data, name=name) 

1319 else: 

1320 new_index = self._rename(name=name) 

1321 

1322 if dtype: 

1323 warnings.warn( 

1324 "parameter dtype is deprecated and will be removed in a future " 

1325 "version. Use the astype method instead.", 

1326 FutureWarning, 

1327 stacklevel=find_stack_level(), 

1328 ) 

1329 new_index = new_index.astype(dtype) 

1330 return new_index 

1331 

1332 @final 

1333 def __copy__(self: _IndexT, **kwargs) -> _IndexT: 

1334 return self.copy(**kwargs) 

1335 

1336 @final 

1337 def __deepcopy__(self: _IndexT, memo=None) -> _IndexT: 

1338 """ 

1339 Parameters 

1340 ---------- 

1341 memo, default None 

1342 Standard signature. Unused 

1343 """ 

1344 return self.copy(deep=True) 

1345 

1346 # -------------------------------------------------------------------- 

1347 # Rendering Methods 

1348 

1349 @final 

1350 def __repr__(self) -> str_t: 

1351 """ 

1352 Return a string representation for this object. 

1353 """ 

1354 klass_name = type(self).__name__ 

1355 data = self._format_data() 

1356 attrs = self._format_attrs() 

1357 space = self._format_space() 

1358 attrs_str = [f"{k}={v}" for k, v in attrs] 

1359 prepr = f",{space}".join(attrs_str) 

1360 

1361 # no data provided, just attributes 

1362 if data is None: 

1363 data = "" 

1364 

1365 return f"{klass_name}({data}{prepr})" 

1366 

1367 def _format_space(self) -> str_t: 

1368 

1369 # using space here controls if the attributes 

1370 # are line separated or not (the default) 

1371 

1372 # max_seq_items = get_option('display.max_seq_items') 

1373 # if len(self) > max_seq_items: 

1374 # space = "\n%s" % (' ' * (len(klass) + 1)) 

1375 return " " 

1376 

1377 @property 

1378 def _formatter_func(self): 

1379 """ 

1380 Return the formatter function. 

1381 """ 

1382 return default_pprint 

1383 

1384 def _format_data(self, name=None) -> str_t: 

1385 """ 

1386 Return the formatted data as a unicode string. 

1387 """ 

1388 # do we want to justify (only do so for non-objects) 

1389 is_justify = True 

1390 

1391 if self.inferred_type == "string": 

1392 is_justify = False 

1393 elif self.inferred_type == "categorical": 

1394 self = cast("CategoricalIndex", self) 

1395 if is_object_dtype(self.categories): 

1396 is_justify = False 

1397 

1398 return format_object_summary( 

1399 self, 

1400 self._formatter_func, 

1401 is_justify=is_justify, 

1402 name=name, 

1403 line_break_each_value=self._is_multi, 

1404 ) 

1405 

1406 def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]: 

1407 """ 

1408 Return a list of tuples of the (attr,formatted_value). 

1409 """ 

1410 attrs: list[tuple[str_t, str_t | int | bool | None]] = [] 

1411 

1412 if not self._is_multi: 

1413 attrs.append(("dtype", f"'{self.dtype}'")) 

1414 

1415 if self.name is not None: 

1416 attrs.append(("name", default_pprint(self.name))) 

1417 elif self._is_multi and any(x is not None for x in self.names): 

1418 attrs.append(("names", default_pprint(self.names))) 

1419 

1420 max_seq_items = get_option("display.max_seq_items") or len(self) 

1421 if len(self) > max_seq_items: 

1422 attrs.append(("length", len(self))) 

1423 return attrs 

1424 

1425 @final 

1426 def _get_level_names(self) -> Hashable | Sequence[Hashable]: 

1427 """ 

1428 Return a name or list of names with None replaced by the level number. 

1429 """ 

1430 if self._is_multi: 

1431 return [ 

1432 level if name is None else name for level, name in enumerate(self.names) 

1433 ] 

1434 else: 

1435 return 0 if self.name is None else self.name 

1436 

1437 @final 

1438 def _mpl_repr(self) -> np.ndarray: 

1439 # how to represent ourselves to matplotlib 

1440 if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M": 

1441 return cast(np.ndarray, self.values) 

1442 return self.astype(object, copy=False)._values 

1443 

1444 def format( 

1445 self, 

1446 name: bool = False, 

1447 formatter: Callable | None = None, 

1448 na_rep: str_t = "NaN", 

1449 ) -> list[str_t]: 

1450 """ 

1451 Render a string representation of the Index. 

1452 """ 

1453 header = [] 

1454 if name: 

1455 header.append( 

1456 pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) 

1457 if self.name is not None 

1458 else "" 

1459 ) 

1460 

1461 if formatter is not None: 

1462 return header + list(self.map(formatter)) 

1463 

1464 return self._format_with_header(header, na_rep=na_rep) 

1465 

1466 def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]: 

1467 from pandas.io.formats.format import format_array 

1468 

1469 values = self._values 

1470 

1471 if is_object_dtype(values.dtype): 

1472 values = cast(np.ndarray, values) 

1473 values = lib.maybe_convert_objects(values, safe=True) 

1474 

1475 result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] 

1476 

1477 # could have nans 

1478 mask = is_float_nan(values) 

1479 if mask.any(): 

1480 result_arr = np.array(result) 

1481 result_arr[mask] = na_rep 

1482 result = result_arr.tolist() 

1483 else: 

1484 result = trim_front(format_array(values, None, justify="left")) 

1485 return header + result 

1486 

1487 @final 

1488 def to_native_types(self, slicer=None, **kwargs) -> np.ndarray: 

1489 """ 

1490 Format specified values of `self` and return them. 

1491 

1492 .. deprecated:: 1.2.0 

1493 

1494 Parameters 

1495 ---------- 

1496 slicer : int, array-like 

1497 An indexer into `self` that specifies which values 

1498 are used in the formatting process. 

1499 kwargs : dict 

1500 Options for specifying how the values should be formatted. 

1501 These options include the following: 

1502 

1503 1) na_rep : str 

1504 The value that serves as a placeholder for NULL values 

1505 2) quoting : bool or None 

1506 Whether or not there are quoted values in `self` 

1507 3) date_format : str 

1508 The format used to represent date-like values. 

1509 

1510 Returns 

1511 ------- 

1512 numpy.ndarray 

1513 Formatted values. 

1514 """ 

1515 warnings.warn( 

1516 "The 'to_native_types' method is deprecated and will be removed in " 

1517 "a future version. Use 'astype(str)' instead.", 

1518 FutureWarning, 

1519 stacklevel=find_stack_level(), 

1520 ) 

1521 values = self 

1522 if slicer is not None: 

1523 values = values[slicer] 

1524 return values._format_native_types(**kwargs) 

1525 

1526 def _format_native_types( 

1527 self, *, na_rep="", quoting=None, **kwargs 

1528 ) -> npt.NDArray[np.object_]: 

1529 """ 

1530 Actually format specific types of the index. 

1531 """ 

1532 mask = isna(self) 

1533 if not self.is_object() and not quoting: 

1534 values = np.asarray(self).astype(str) 

1535 else: 

1536 values = np.array(self, dtype=object, copy=True) 

1537 

1538 values[mask] = na_rep 

1539 return values 

1540 

1541 def _summary(self, name=None) -> str_t: 

1542 """ 

1543 Return a summarized representation. 

1544 

1545 Parameters 

1546 ---------- 

1547 name : str 

1548 name to use in the summary representation 

1549 

1550 Returns 

1551 ------- 

1552 String with a summarized representation of the index 

1553 """ 

1554 if len(self) > 0: 

1555 head = self[0] 

1556 if hasattr(head, "format") and not isinstance(head, str): 

1557 head = head.format() 

1558 elif needs_i8_conversion(self.dtype): 

1559 # e.g. Timedelta, display as values, not quoted 

1560 head = self._formatter_func(head).replace("'", "") 

1561 tail = self[-1] 

1562 if hasattr(tail, "format") and not isinstance(tail, str): 

1563 tail = tail.format() 

1564 elif needs_i8_conversion(self.dtype): 

1565 # e.g. Timedelta, display as values, not quoted 

1566 tail = self._formatter_func(tail).replace("'", "") 

1567 

1568 index_summary = f", {head} to {tail}" 

1569 else: 

1570 index_summary = "" 

1571 

1572 if name is None: 

1573 name = type(self).__name__ 

1574 return f"{name}: {len(self)} entries{index_summary}" 

1575 

1576 # -------------------------------------------------------------------- 

1577 # Conversion Methods 

1578 

1579 def to_flat_index(self: _IndexT) -> _IndexT: 

1580 """ 

1581 Identity method. 

1582 

1583 This is implemented for compatibility with subclass implementations 

1584 when chaining. 

1585 

1586 Returns 

1587 ------- 

1588 pd.Index 

1589 Caller. 

1590 

1591 See Also 

1592 -------- 

1593 MultiIndex.to_flat_index : Subclass implementation. 

1594 """ 

1595 return self 

1596 

1597 def to_series(self, index=None, name: Hashable = None) -> Series: 

1598 """ 

1599 Create a Series with both index and values equal to the index keys. 

1600 

1601 Useful with map for returning an indexer based on an index. 

1602 

1603 Parameters 

1604 ---------- 

1605 index : Index, optional 

1606 Index of resulting Series. If None, defaults to original index. 

1607 name : str, optional 

1608 Name of resulting Series. If None, defaults to name of original 

1609 index. 

1610 

1611 Returns 

1612 ------- 

1613 Series 

1614 The dtype will be based on the type of the Index values. 

1615 

1616 See Also 

1617 -------- 

1618 Index.to_frame : Convert an Index to a DataFrame. 

1619 Series.to_frame : Convert Series to DataFrame. 

1620 

1621 Examples 

1622 -------- 

1623 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') 

1624 

1625 By default, the original Index and original name is reused. 

1626 

1627 >>> idx.to_series() 

1628 animal 

1629 Ant Ant 

1630 Bear Bear 

1631 Cow Cow 

1632 Name: animal, dtype: object 

1633 

1634 To enforce a new Index, specify new labels to ``index``: 

1635 

1636 >>> idx.to_series(index=[0, 1, 2]) 

1637 0 Ant 

1638 1 Bear 

1639 2 Cow 

1640 Name: animal, dtype: object 

1641 

1642 To override the name of the resulting column, specify `name`: 

1643 

1644 >>> idx.to_series(name='zoo') 

1645 animal 

1646 Ant Ant 

1647 Bear Bear 

1648 Cow Cow 

1649 Name: zoo, dtype: object 

1650 """ 

1651 from pandas import Series 

1652 

1653 if index is None: 

1654 index = self._view() 

1655 if name is None: 

1656 name = self.name 

1657 

1658 return Series(self._values.copy(), index=index, name=name) 

1659 

1660 def to_frame( 

1661 self, index: bool = True, name: Hashable = lib.no_default 

1662 ) -> DataFrame: 

1663 """ 

1664 Create a DataFrame with a column containing the Index. 

1665 

1666 Parameters 

1667 ---------- 

1668 index : bool, default True 

1669 Set the index of the returned DataFrame as the original Index. 

1670 

1671 name : object, default None 

1672 The passed name should substitute for the index name (if it has 

1673 one). 

1674 

1675 Returns 

1676 ------- 

1677 DataFrame 

1678 DataFrame containing the original Index data. 

1679 

1680 See Also 

1681 -------- 

1682 Index.to_series : Convert an Index to a Series. 

1683 Series.to_frame : Convert Series to DataFrame. 

1684 

1685 Examples 

1686 -------- 

1687 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') 

1688 >>> idx.to_frame() 

1689 animal 

1690 animal 

1691 Ant Ant 

1692 Bear Bear 

1693 Cow Cow 

1694 

1695 By default, the original Index is reused. To enforce a new Index: 

1696 

1697 >>> idx.to_frame(index=False) 

1698 animal 

1699 0 Ant 

1700 1 Bear 

1701 2 Cow 

1702 

1703 To override the name of the resulting column, specify `name`: 

1704 

1705 >>> idx.to_frame(index=False, name='zoo') 

1706 zoo 

1707 0 Ant 

1708 1 Bear 

1709 2 Cow 

1710 """ 

1711 from pandas import DataFrame 

1712 

1713 if name is None: 

1714 warnings.warn( 

1715 "Explicitly passing `name=None` currently preserves the Index's name " 

1716 "or uses a default name of 0. This behaviour is deprecated, and in " 

1717 "the future `None` will be used as the name of the resulting " 

1718 "DataFrame column.", 

1719 FutureWarning, 

1720 stacklevel=find_stack_level(), 

1721 ) 

1722 name = lib.no_default 

1723 

1724 if name is lib.no_default: 

1725 name = self._get_level_names() 

1726 result = DataFrame({name: self._values.copy()}) 

1727 

1728 if index: 

1729 result.index = self 

1730 return result 

1731 

1732 # -------------------------------------------------------------------- 

1733 # Name-Centric Methods 

1734 

1735 @property 

1736 def name(self) -> Hashable: 

1737 """ 

1738 Return Index or MultiIndex name. 

1739 """ 

1740 return self._name 

1741 

1742 @name.setter 

1743 def name(self, value: Hashable) -> None: 

1744 if self._no_setting_name: 

1745 # Used in MultiIndex.levels to avoid silently ignoring name updates. 

1746 raise RuntimeError( 

1747 "Cannot set name on a level of a MultiIndex. Use " 

1748 "'MultiIndex.set_names' instead." 

1749 ) 

1750 maybe_extract_name(value, None, type(self)) 

1751 self._name = value 

1752 

1753 @final 

1754 def _validate_names( 

1755 self, name=None, names=None, deep: bool = False 

1756 ) -> list[Hashable]: 

1757 """ 

1758 Handles the quirks of having a singular 'name' parameter for general 

1759 Index and plural 'names' parameter for MultiIndex. 

1760 """ 

1761 from copy import deepcopy 

1762 

1763 if names is not None and name is not None: 

1764 raise TypeError("Can only provide one of `names` and `name`") 

1765 elif names is None and name is None: 

1766 new_names = deepcopy(self.names) if deep else self.names 

1767 elif names is not None: 

1768 if not is_list_like(names): 

1769 raise TypeError("Must pass list-like as `names`.") 

1770 new_names = names 

1771 elif not is_list_like(name): 

1772 new_names = [name] 

1773 else: 

1774 new_names = name 

1775 

1776 if len(new_names) != len(self.names): 

1777 raise ValueError( 

1778 f"Length of new names must be {len(self.names)}, got {len(new_names)}" 

1779 ) 

1780 

1781 # All items in 'new_names' need to be hashable 

1782 validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name") 

1783 

1784 return new_names 

1785 

1786 def _get_default_index_names( 

1787 self, names: Hashable | Sequence[Hashable] | None = None, default=None 

1788 ) -> list[Hashable]: 

1789 """ 

1790 Get names of index. 

1791 

1792 Parameters 

1793 ---------- 

1794 names : int, str or 1-dimensional list, default None 

1795 Index names to set. 

1796 default : str 

1797 Default name of index. 

1798 

1799 Raises 

1800 ------ 

1801 TypeError 

1802 if names not str or list-like 

1803 """ 

1804 from pandas.core.indexes.multi import MultiIndex 

1805 

1806 if names is not None: 

1807 if isinstance(names, str) or isinstance(names, int): 

1808 names = [names] 

1809 

1810 if not isinstance(names, list) and names is not None: 

1811 raise ValueError("Index names must be str or 1-dimensional list") 

1812 

1813 if not names: 

1814 if isinstance(self, MultiIndex): 

1815 names = com.fill_missing_names(self.names) 

1816 else: 

1817 names = [default] if self.name is None else [self.name] 

1818 

1819 return names 

1820 

1821 def _get_names(self) -> FrozenList: 

1822 return FrozenList((self.name,)) 

1823 

1824 def _set_names(self, values, *, level=None) -> None: 

1825 """ 

1826 Set new names on index. Each name has to be a hashable type. 

1827 

1828 Parameters 

1829 ---------- 

1830 values : str or sequence 

1831 name(s) to set 

1832 level : int, level name, or sequence of int/level names (default None) 

1833 If the index is a MultiIndex (hierarchical), level(s) to set (None 

1834 for all levels). Otherwise level must be None 

1835 

1836 Raises 

1837 ------ 

1838 TypeError if each name is not hashable. 

1839 """ 

1840 if not is_list_like(values): 

1841 raise ValueError("Names must be a list-like") 

1842 if len(values) != 1: 

1843 raise ValueError(f"Length of new names must be 1, got {len(values)}") 

1844 

1845 # GH 20527 

1846 # All items in 'name' need to be hashable: 

1847 validate_all_hashable(*values, error_name=f"{type(self).__name__}.name") 

1848 

1849 self._name = values[0] 

1850 

1851 names = property(fset=_set_names, fget=_get_names) 

1852 

1853 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"]) 

1854 def set_names(self, names, level=None, inplace: bool = False): 

1855 """ 

1856 Set Index or MultiIndex name. 

1857 

1858 Able to set new names partially and by level. 

1859 

1860 Parameters 

1861 ---------- 

1862 

1863 names : label or list of label or dict-like for MultiIndex 

1864 Name(s) to set. 

1865 

1866 .. versionchanged:: 1.3.0 

1867 

1868 level : int, label or list of int or label, optional 

1869 If the index is a MultiIndex and names is not dict-like, level(s) to set 

1870 (None for all levels). Otherwise level must be None. 

1871 

1872 .. versionchanged:: 1.3.0 

1873 

1874 inplace : bool, default False 

1875 Modifies the object directly, instead of creating a new Index or 

1876 MultiIndex. 

1877 

1878 Returns 

1879 ------- 

1880 Index or None 

1881 The same type as the caller or None if ``inplace=True``. 

1882 

1883 See Also 

1884 -------- 

1885 Index.rename : Able to set new names without level. 

1886 

1887 Examples 

1888 -------- 

1889 >>> idx = pd.Index([1, 2, 3, 4]) 

1890 >>> idx 

1891 Int64Index([1, 2, 3, 4], dtype='int64') 

1892 >>> idx.set_names('quarter') 

1893 Int64Index([1, 2, 3, 4], dtype='int64', name='quarter') 

1894 

1895 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], 

1896 ... [2018, 2019]]) 

1897 >>> idx 

1898 MultiIndex([('python', 2018), 

1899 ('python', 2019), 

1900 ( 'cobra', 2018), 

1901 ( 'cobra', 2019)], 

1902 ) 

1903 >>> idx.set_names(['kind', 'year'], inplace=True) 

1904 >>> idx 

1905 MultiIndex([('python', 2018), 

1906 ('python', 2019), 

1907 ( 'cobra', 2018), 

1908 ( 'cobra', 2019)], 

1909 names=['kind', 'year']) 

1910 >>> idx.set_names('species', level=0) 

1911 MultiIndex([('python', 2018), 

1912 ('python', 2019), 

1913 ( 'cobra', 2018), 

1914 ( 'cobra', 2019)], 

1915 names=['species', 'year']) 

1916 

1917 When renaming levels with a dict, levels can not be passed. 

1918 

1919 >>> idx.set_names({'kind': 'snake'}) 

1920 MultiIndex([('python', 2018), 

1921 ('python', 2019), 

1922 ( 'cobra', 2018), 

1923 ( 'cobra', 2019)], 

1924 names=['snake', 'year']) 

1925 """ 

1926 if level is not None and not isinstance(self, ABCMultiIndex): 

1927 raise ValueError("Level must be None for non-MultiIndex") 

1928 

1929 elif level is not None and not is_list_like(level) and is_list_like(names): 

1930 raise TypeError("Names must be a string when a single level is provided.") 

1931 

1932 elif not is_list_like(names) and level is None and self.nlevels > 1: 

1933 raise TypeError("Must pass list-like as `names`.") 

1934 

1935 elif is_dict_like(names) and not isinstance(self, ABCMultiIndex): 

1936 raise TypeError("Can only pass dict-like as `names` for MultiIndex.") 

1937 

1938 elif is_dict_like(names) and level is not None: 

1939 raise TypeError("Can not pass level for dictlike `names`.") 

1940 

1941 if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None: 

1942 # Transform dict to list of new names and corresponding levels 

1943 level, names_adjusted = [], [] 

1944 for i, name in enumerate(self.names): 

1945 if name in names.keys(): 

1946 level.append(i) 

1947 names_adjusted.append(names[name]) 

1948 names = names_adjusted 

1949 

1950 if not is_list_like(names): 

1951 names = [names] 

1952 if level is not None and not is_list_like(level): 

1953 level = [level] 

1954 

1955 if inplace: 

1956 idx = self 

1957 else: 

1958 idx = self._view() 

1959 

1960 idx._set_names(names, level=level) 

1961 if not inplace: 

1962 return idx 

1963 

1964 def rename(self, name, inplace=False): 

1965 """ 

1966 Alter Index or MultiIndex name. 

1967 

1968 Able to set new names without level. Defaults to returning new index. 

1969 Length of names must match number of levels in MultiIndex. 

1970 

1971 Parameters 

1972 ---------- 

1973 name : label or list of labels 

1974 Name(s) to set. 

1975 inplace : bool, default False 

1976 Modifies the object directly, instead of creating a new Index or 

1977 MultiIndex. 

1978 

1979 Returns 

1980 ------- 

1981 Index or None 

1982 The same type as the caller or None if ``inplace=True``. 

1983 

1984 See Also 

1985 -------- 

1986 Index.set_names : Able to set new names partially and by level. 

1987 

1988 Examples 

1989 -------- 

1990 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score') 

1991 >>> idx.rename('grade') 

1992 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade') 

1993 

1994 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], 

1995 ... [2018, 2019]], 

1996 ... names=['kind', 'year']) 

1997 >>> idx 

1998 MultiIndex([('python', 2018), 

1999 ('python', 2019), 

2000 ( 'cobra', 2018), 

2001 ( 'cobra', 2019)], 

2002 names=['kind', 'year']) 

2003 >>> idx.rename(['species', 'year']) 

2004 MultiIndex([('python', 2018), 

2005 ('python', 2019), 

2006 ( 'cobra', 2018), 

2007 ( 'cobra', 2019)], 

2008 names=['species', 'year']) 

2009 >>> idx.rename('species') 

2010 Traceback (most recent call last): 

2011 TypeError: Must pass list-like as `names`. 

2012 """ 

2013 return self.set_names([name], inplace=inplace) 

2014 

2015 # -------------------------------------------------------------------- 

2016 # Level-Centric Methods 

2017 

2018 @property 

2019 def nlevels(self) -> int: 

2020 """ 

2021 Number of levels. 

2022 """ 

2023 return 1 

2024 

2025 def _sort_levels_monotonic(self: _IndexT) -> _IndexT: 

2026 """ 

2027 Compat with MultiIndex. 

2028 """ 

2029 return self 

2030 

2031 @final 

2032 def _validate_index_level(self, level) -> None: 

2033 """ 

2034 Validate index level. 

2035 

2036 For single-level Index getting level number is a no-op, but some 

2037 verification must be done like in MultiIndex. 

2038 

2039 """ 

2040 if isinstance(level, int): 

2041 if level < 0 and level != -1: 

2042 raise IndexError( 

2043 "Too many levels: Index has only 1 level, " 

2044 f"{level} is not a valid level number" 

2045 ) 

2046 elif level > 0: 

2047 raise IndexError( 

2048 f"Too many levels: Index has only 1 level, not {level + 1}" 

2049 ) 

2050 elif level != self.name: 

2051 raise KeyError( 

2052 f"Requested level ({level}) does not match index name ({self.name})" 

2053 ) 

2054 

2055 def _get_level_number(self, level) -> int: 

2056 self._validate_index_level(level) 

2057 return 0 

2058 

2059 def sortlevel(self, level=None, ascending=True, sort_remaining=None): 

2060 """ 

2061 For internal compatibility with the Index API. 

2062 

2063 Sort the Index. This is for compat with MultiIndex 

2064 

2065 Parameters 

2066 ---------- 

2067 ascending : bool, default True 

2068 False to sort in descending order 

2069 

2070 level, sort_remaining are compat parameters 

2071 

2072 Returns 

2073 ------- 

2074 Index 

2075 """ 

2076 if not isinstance(ascending, (list, bool)): 

2077 raise TypeError( 

2078 "ascending must be a single bool value or" 

2079 "a list of bool values of length 1" 

2080 ) 

2081 

2082 if isinstance(ascending, list): 

2083 if len(ascending) != 1: 

2084 raise TypeError("ascending must be a list of bool values of length 1") 

2085 ascending = ascending[0] 

2086 

2087 if not isinstance(ascending, bool): 

2088 raise TypeError("ascending must be a bool value") 

2089 

2090 return self.sort_values(return_indexer=True, ascending=ascending) 

2091 

2092 def _get_level_values(self, level) -> Index: 

2093 """ 

2094 Return an Index of values for requested level. 

2095 

2096 This is primarily useful to get an individual level of values from a 

2097 MultiIndex, but is provided on Index as well for compatibility. 

2098 

2099 Parameters 

2100 ---------- 

2101 level : int or str 

2102 It is either the integer position or the name of the level. 

2103 

2104 Returns 

2105 ------- 

2106 Index 

2107 Calling object, as there is only one level in the Index. 

2108 

2109 See Also 

2110 -------- 

2111 MultiIndex.get_level_values : Get values for a level of a MultiIndex. 

2112 

2113 Notes 

2114 ----- 

2115 For Index, level should be 0, since there are no multiple levels. 

2116 

2117 Examples 

2118 -------- 

2119 >>> idx = pd.Index(list('abc')) 

2120 >>> idx 

2121 Index(['a', 'b', 'c'], dtype='object') 

2122 

2123 Get level values by supplying `level` as integer: 

2124 

2125 >>> idx.get_level_values(0) 

2126 Index(['a', 'b', 'c'], dtype='object') 

2127 """ 

2128 self._validate_index_level(level) 

2129 return self 

2130 

2131 get_level_values = _get_level_values 

2132 

2133 @final 

2134 def droplevel(self, level=0): 

2135 """ 

2136 Return index with requested level(s) removed. 

2137 

2138 If resulting index has only 1 level left, the result will be 

2139 of Index type, not MultiIndex. 

2140 

2141 Parameters 

2142 ---------- 

2143 level : int, str, or list-like, default 0 

2144 If a string is given, must be the name of a level 

2145 If list-like, elements must be names or indexes of levels. 

2146 

2147 Returns 

2148 ------- 

2149 Index or MultiIndex 

2150 

2151 Examples 

2152 -------- 

2153 >>> mi = pd.MultiIndex.from_arrays( 

2154 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) 

2155 >>> mi 

2156 MultiIndex([(1, 3, 5), 

2157 (2, 4, 6)], 

2158 names=['x', 'y', 'z']) 

2159 

2160 >>> mi.droplevel() 

2161 MultiIndex([(3, 5), 

2162 (4, 6)], 

2163 names=['y', 'z']) 

2164 

2165 >>> mi.droplevel(2) 

2166 MultiIndex([(1, 3), 

2167 (2, 4)], 

2168 names=['x', 'y']) 

2169 

2170 >>> mi.droplevel('z') 

2171 MultiIndex([(1, 3), 

2172 (2, 4)], 

2173 names=['x', 'y']) 

2174 

2175 >>> mi.droplevel(['x', 'y']) 

2176 Int64Index([5, 6], dtype='int64', name='z') 

2177 """ 

2178 if not isinstance(level, (tuple, list)): 

2179 level = [level] 

2180 

2181 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1] 

2182 

2183 return self._drop_level_numbers(levnums) 

2184 

2185 @final 

2186 def _drop_level_numbers(self, levnums: list[int]): 

2187 """ 

2188 Drop MultiIndex levels by level _number_, not name. 

2189 """ 

2190 

2191 if not levnums and not isinstance(self, ABCMultiIndex): 

2192 return self 

2193 if len(levnums) >= self.nlevels: 

2194 raise ValueError( 

2195 f"Cannot remove {len(levnums)} levels from an index with " 

2196 f"{self.nlevels} levels: at least one level must be left." 

2197 ) 

2198 # The two checks above guarantee that here self is a MultiIndex 

2199 self = cast("MultiIndex", self) 

2200 

2201 new_levels = list(self.levels) 

2202 new_codes = list(self.codes) 

2203 new_names = list(self.names) 

2204 

2205 for i in levnums: 

2206 new_levels.pop(i) 

2207 new_codes.pop(i) 

2208 new_names.pop(i) 

2209 

2210 if len(new_levels) == 1: 

2211 lev = new_levels[0] 

2212 

2213 if len(lev) == 0: 

2214 # If lev is empty, lev.take will fail GH#42055 

2215 if len(new_codes[0]) == 0: 

2216 # GH#45230 preserve RangeIndex here 

2217 # see test_reset_index_empty_rangeindex 

2218 result = lev[:0] 

2219 else: 

2220 res_values = algos.take(lev._values, new_codes[0], allow_fill=True) 

2221 # _constructor instead of type(lev) for RangeIndex compat GH#35230 

2222 result = lev._constructor._simple_new(res_values, name=new_names[0]) 

2223 else: 

2224 # set nan if needed 

2225 mask = new_codes[0] == -1 

2226 result = new_levels[0].take(new_codes[0]) 

2227 if mask.any(): 

2228 result = result.putmask(mask, np.nan) 

2229 

2230 result._name = new_names[0] 

2231 

2232 return result 

2233 else: 

2234 from pandas.core.indexes.multi import MultiIndex 

2235 

2236 return MultiIndex( 

2237 levels=new_levels, 

2238 codes=new_codes, 

2239 names=new_names, 

2240 verify_integrity=False, 

2241 ) 

2242 

2243 def _get_grouper_for_level( 

2244 self, 

2245 mapper, 

2246 *, 

2247 level=None, 

2248 dropna: bool = True, 

2249 ) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]: 

2250 """ 

2251 Get index grouper corresponding to an index level 

2252 

2253 Parameters 

2254 ---------- 

2255 mapper: Group mapping function or None 

2256 Function mapping index values to groups 

2257 level : int or None 

2258 Index level, positional 

2259 dropna : bool 

2260 dropna from groupby 

2261 

2262 Returns 

2263 ------- 

2264 grouper : Index 

2265 Index of values to group on. 

2266 labels : ndarray of int or None 

2267 Array of locations in level_index. 

2268 uniques : Index or None 

2269 Index of unique values for level. 

2270 """ 

2271 assert level is None or level == 0 

2272 if mapper is None: 

2273 grouper = self 

2274 else: 

2275 grouper = self.map(mapper) 

2276 

2277 return grouper, None, None 

2278 

2279 # -------------------------------------------------------------------- 

2280 # Introspection Methods 

2281 

2282 @cache_readonly 

2283 @final 

2284 def _can_hold_na(self) -> bool: 

2285 if isinstance(self.dtype, ExtensionDtype): 

2286 if isinstance(self.dtype, IntervalDtype): 

2287 # FIXME(GH#45720): this is inaccurate for integer-backed 

2288 # IntervalArray, but without it other.categories.take raises 

2289 # in IntervalArray._cmp_method 

2290 return True 

2291 return self.dtype._can_hold_na 

2292 if self.dtype.kind in ["i", "u", "b"]: 

2293 return False 

2294 return True 

2295 

2296 @final 

2297 @property 

2298 def is_monotonic(self) -> bool: 

2299 """ 

2300 Alias for is_monotonic_increasing. 

2301 

2302 .. deprecated:: 1.5.0 

2303 is_monotonic is deprecated and will be removed in a future version. 

2304 Use is_monotonic_increasing instead. 

2305 """ 

2306 warnings.warn( 

2307 "is_monotonic is deprecated and will be removed in a future version. " 

2308 "Use is_monotonic_increasing instead.", 

2309 FutureWarning, 

2310 stacklevel=find_stack_level(), 

2311 ) 

2312 return self.is_monotonic_increasing 

2313 

2314 @property 

2315 def is_monotonic_increasing(self) -> bool: 

2316 """ 

2317 Return a boolean if the values are equal or increasing. 

2318 

2319 Examples 

2320 -------- 

2321 >>> Index([1, 2, 3]).is_monotonic_increasing 

2322 True 

2323 >>> Index([1, 2, 2]).is_monotonic_increasing 

2324 True 

2325 >>> Index([1, 3, 2]).is_monotonic_increasing 

2326 False 

2327 """ 

2328 return self._engine.is_monotonic_increasing 

2329 

2330 @property 

2331 def is_monotonic_decreasing(self) -> bool: 

2332 """ 

2333 Return a boolean if the values are equal or decreasing. 

2334 

2335 Examples 

2336 -------- 

2337 >>> Index([3, 2, 1]).is_monotonic_decreasing 

2338 True 

2339 >>> Index([3, 2, 2]).is_monotonic_decreasing 

2340 True 

2341 >>> Index([3, 1, 2]).is_monotonic_decreasing 

2342 False 

2343 """ 

2344 return self._engine.is_monotonic_decreasing 

2345 

2346 @final 

2347 @property 

2348 def _is_strictly_monotonic_increasing(self) -> bool: 

2349 """ 

2350 Return if the index is strictly monotonic increasing 

2351 (only increasing) values. 

2352 

2353 Examples 

2354 -------- 

2355 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing 

2356 True 

2357 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing 

2358 False 

2359 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing 

2360 False 

2361 """ 

2362 return self.is_unique and self.is_monotonic_increasing 

2363 

2364 @final 

2365 @property 

2366 def _is_strictly_monotonic_decreasing(self) -> bool: 

2367 """ 

2368 Return if the index is strictly monotonic decreasing 

2369 (only decreasing) values. 

2370 

2371 Examples 

2372 -------- 

2373 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing 

2374 True 

2375 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing 

2376 False 

2377 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing 

2378 False 

2379 """ 

2380 return self.is_unique and self.is_monotonic_decreasing 

2381 

2382 @cache_readonly 

2383 def is_unique(self) -> bool: 

2384 """ 

2385 Return if the index has unique values. 

2386 """ 

2387 return self._engine.is_unique 

2388 

2389 @final 

2390 @property 

2391 def has_duplicates(self) -> bool: 

2392 """ 

2393 Check if the Index has duplicate values. 

2394 

2395 Returns 

2396 ------- 

2397 bool 

2398 Whether or not the Index has duplicate values. 

2399 

2400 Examples 

2401 -------- 

2402 >>> idx = pd.Index([1, 5, 7, 7]) 

2403 >>> idx.has_duplicates 

2404 True 

2405 

2406 >>> idx = pd.Index([1, 5, 7]) 

2407 >>> idx.has_duplicates 

2408 False 

2409 

2410 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2411 ... "Watermelon"]).astype("category") 

2412 >>> idx.has_duplicates 

2413 True 

2414 

2415 >>> idx = pd.Index(["Orange", "Apple", 

2416 ... "Watermelon"]).astype("category") 

2417 >>> idx.has_duplicates 

2418 False 

2419 """ 

2420 return not self.is_unique 

2421 

2422 @final 

2423 def is_boolean(self) -> bool: 

2424 """ 

2425 Check if the Index only consists of booleans. 

2426 

2427 Returns 

2428 ------- 

2429 bool 

2430 Whether or not the Index only consists of booleans. 

2431 

2432 See Also 

2433 -------- 

2434 is_integer : Check if the Index only consists of integers. 

2435 is_floating : Check if the Index is a floating type. 

2436 is_numeric : Check if the Index only consists of numeric data. 

2437 is_object : Check if the Index is of the object dtype. 

2438 is_categorical : Check if the Index holds categorical data. 

2439 is_interval : Check if the Index holds Interval objects. 

2440 is_mixed : Check if the Index holds data with mixed data types. 

2441 

2442 Examples 

2443 -------- 

2444 >>> idx = pd.Index([True, False, True]) 

2445 >>> idx.is_boolean() 

2446 True 

2447 

2448 >>> idx = pd.Index(["True", "False", "True"]) 

2449 >>> idx.is_boolean() 

2450 False 

2451 

2452 >>> idx = pd.Index([True, False, "True"]) 

2453 >>> idx.is_boolean() 

2454 False 

2455 """ 

2456 return self.inferred_type in ["boolean"] 

2457 

2458 @final 

2459 def is_integer(self) -> bool: 

2460 """ 

2461 Check if the Index only consists of integers. 

2462 

2463 Returns 

2464 ------- 

2465 bool 

2466 Whether or not the Index only consists of integers. 

2467 

2468 See Also 

2469 -------- 

2470 is_boolean : Check if the Index only consists of booleans. 

2471 is_floating : Check if the Index is a floating type. 

2472 is_numeric : Check if the Index only consists of numeric data. 

2473 is_object : Check if the Index is of the object dtype. 

2474 is_categorical : Check if the Index holds categorical data. 

2475 is_interval : Check if the Index holds Interval objects. 

2476 is_mixed : Check if the Index holds data with mixed data types. 

2477 

2478 Examples 

2479 -------- 

2480 >>> idx = pd.Index([1, 2, 3, 4]) 

2481 >>> idx.is_integer() 

2482 True 

2483 

2484 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2485 >>> idx.is_integer() 

2486 False 

2487 

2488 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"]) 

2489 >>> idx.is_integer() 

2490 False 

2491 """ 

2492 return self.inferred_type in ["integer"] 

2493 

2494 @final 

2495 def is_floating(self) -> bool: 

2496 """ 

2497 Check if the Index is a floating type. 

2498 

2499 The Index may consist of only floats, NaNs, or a mix of floats, 

2500 integers, or NaNs. 

2501 

2502 Returns 

2503 ------- 

2504 bool 

2505 Whether or not the Index only consists of only consists of floats, NaNs, or 

2506 a mix of floats, integers, or NaNs. 

2507 

2508 See Also 

2509 -------- 

2510 is_boolean : Check if the Index only consists of booleans. 

2511 is_integer : Check if the Index only consists of integers. 

2512 is_numeric : Check if the Index only consists of numeric data. 

2513 is_object : Check if the Index is of the object dtype. 

2514 is_categorical : Check if the Index holds categorical data. 

2515 is_interval : Check if the Index holds Interval objects. 

2516 is_mixed : Check if the Index holds data with mixed data types. 

2517 

2518 Examples 

2519 -------- 

2520 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2521 >>> idx.is_floating() 

2522 True 

2523 

2524 >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0]) 

2525 >>> idx.is_floating() 

2526 True 

2527 

2528 >>> idx = pd.Index([1, 2, 3, 4, np.nan]) 

2529 >>> idx.is_floating() 

2530 True 

2531 

2532 >>> idx = pd.Index([1, 2, 3, 4]) 

2533 >>> idx.is_floating() 

2534 False 

2535 """ 

2536 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] 

2537 

2538 @final 

2539 def is_numeric(self) -> bool: 

2540 """ 

2541 Check if the Index only consists of numeric data. 

2542 

2543 Returns 

2544 ------- 

2545 bool 

2546 Whether or not the Index only consists of numeric data. 

2547 

2548 See Also 

2549 -------- 

2550 is_boolean : Check if the Index only consists of booleans. 

2551 is_integer : Check if the Index only consists of integers. 

2552 is_floating : Check if the Index is a floating type. 

2553 is_object : Check if the Index is of the object dtype. 

2554 is_categorical : Check if the Index holds categorical data. 

2555 is_interval : Check if the Index holds Interval objects. 

2556 is_mixed : Check if the Index holds data with mixed data types. 

2557 

2558 Examples 

2559 -------- 

2560 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2561 >>> idx.is_numeric() 

2562 True 

2563 

2564 >>> idx = pd.Index([1, 2, 3, 4.0]) 

2565 >>> idx.is_numeric() 

2566 True 

2567 

2568 >>> idx = pd.Index([1, 2, 3, 4]) 

2569 >>> idx.is_numeric() 

2570 True 

2571 

2572 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan]) 

2573 >>> idx.is_numeric() 

2574 True 

2575 

2576 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"]) 

2577 >>> idx.is_numeric() 

2578 False 

2579 """ 

2580 return self.inferred_type in ["integer", "floating"] 

2581 

2582 @final 

2583 def is_object(self) -> bool: 

2584 """ 

2585 Check if the Index is of the object dtype. 

2586 

2587 Returns 

2588 ------- 

2589 bool 

2590 Whether or not the Index is of the object dtype. 

2591 

2592 See Also 

2593 -------- 

2594 is_boolean : Check if the Index only consists of booleans. 

2595 is_integer : Check if the Index only consists of integers. 

2596 is_floating : Check if the Index is a floating type. 

2597 is_numeric : Check if the Index only consists of numeric data. 

2598 is_categorical : Check if the Index holds categorical data. 

2599 is_interval : Check if the Index holds Interval objects. 

2600 is_mixed : Check if the Index holds data with mixed data types. 

2601 

2602 Examples 

2603 -------- 

2604 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"]) 

2605 >>> idx.is_object() 

2606 True 

2607 

2608 >>> idx = pd.Index(["Apple", "Mango", 2.0]) 

2609 >>> idx.is_object() 

2610 True 

2611 

2612 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2613 ... "Watermelon"]).astype("category") 

2614 >>> idx.is_object() 

2615 False 

2616 

2617 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2618 >>> idx.is_object() 

2619 False 

2620 """ 

2621 return is_object_dtype(self.dtype) 

2622 

2623 @final 

2624 def is_categorical(self) -> bool: 

2625 """ 

2626 Check if the Index holds categorical data. 

2627 

2628 Returns 

2629 ------- 

2630 bool 

2631 True if the Index is categorical. 

2632 

2633 See Also 

2634 -------- 

2635 CategoricalIndex : Index for categorical data. 

2636 is_boolean : Check if the Index only consists of booleans. 

2637 is_integer : Check if the Index only consists of integers. 

2638 is_floating : Check if the Index is a floating type. 

2639 is_numeric : Check if the Index only consists of numeric data. 

2640 is_object : Check if the Index is of the object dtype. 

2641 is_interval : Check if the Index holds Interval objects. 

2642 is_mixed : Check if the Index holds data with mixed data types. 

2643 

2644 Examples 

2645 -------- 

2646 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2647 ... "Watermelon"]).astype("category") 

2648 >>> idx.is_categorical() 

2649 True 

2650 

2651 >>> idx = pd.Index([1, 3, 5, 7]) 

2652 >>> idx.is_categorical() 

2653 False 

2654 

2655 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"]) 

2656 >>> s 

2657 0 Peter 

2658 1 Victor 

2659 2 Elisabeth 

2660 3 Mar 

2661 dtype: object 

2662 >>> s.index.is_categorical() 

2663 False 

2664 """ 

2665 return self.inferred_type in ["categorical"] 

2666 

2667 @final 

2668 def is_interval(self) -> bool: 

2669 """ 

2670 Check if the Index holds Interval objects. 

2671 

2672 Returns 

2673 ------- 

2674 bool 

2675 Whether or not the Index holds Interval objects. 

2676 

2677 See Also 

2678 -------- 

2679 IntervalIndex : Index for Interval objects. 

2680 is_boolean : Check if the Index only consists of booleans. 

2681 is_integer : Check if the Index only consists of integers. 

2682 is_floating : Check if the Index is a floating type. 

2683 is_numeric : Check if the Index only consists of numeric data. 

2684 is_object : Check if the Index is of the object dtype. 

2685 is_categorical : Check if the Index holds categorical data. 

2686 is_mixed : Check if the Index holds data with mixed data types. 

2687 

2688 Examples 

2689 -------- 

2690 >>> idx = pd.Index([pd.Interval(left=0, right=5), 

2691 ... pd.Interval(left=5, right=10)]) 

2692 >>> idx.is_interval() 

2693 True 

2694 

2695 >>> idx = pd.Index([1, 3, 5, 7]) 

2696 >>> idx.is_interval() 

2697 False 

2698 """ 

2699 return self.inferred_type in ["interval"] 

2700 

2701 @final 

2702 def is_mixed(self) -> bool: 

2703 """ 

2704 Check if the Index holds data with mixed data types. 

2705 

2706 Returns 

2707 ------- 

2708 bool 

2709 Whether or not the Index holds data with mixed data types. 

2710 

2711 See Also 

2712 -------- 

2713 is_boolean : Check if the Index only consists of booleans. 

2714 is_integer : Check if the Index only consists of integers. 

2715 is_floating : Check if the Index is a floating type. 

2716 is_numeric : Check if the Index only consists of numeric data. 

2717 is_object : Check if the Index is of the object dtype. 

2718 is_categorical : Check if the Index holds categorical data. 

2719 is_interval : Check if the Index holds Interval objects. 

2720 

2721 Examples 

2722 -------- 

2723 >>> idx = pd.Index(['a', np.nan, 'b']) 

2724 >>> idx.is_mixed() 

2725 True 

2726 

2727 >>> idx = pd.Index([1.0, 2.0, 3.0, 5.0]) 

2728 >>> idx.is_mixed() 

2729 False 

2730 """ 

2731 warnings.warn( 

2732 "Index.is_mixed is deprecated and will be removed in a future version. " 

2733 "Check index.inferred_type directly instead.", 

2734 FutureWarning, 

2735 stacklevel=find_stack_level(), 

2736 ) 

2737 return self.inferred_type in ["mixed"] 

2738 

2739 @final 

2740 def holds_integer(self) -> bool: 

2741 """ 

2742 Whether the type is an integer type. 

2743 """ 

2744 return self.inferred_type in ["integer", "mixed-integer"] 

2745 

2746 @cache_readonly 

2747 def inferred_type(self) -> str_t: 

2748 """ 

2749 Return a string of the type inferred from the values. 

2750 """ 

2751 return lib.infer_dtype(self._values, skipna=False) 

2752 

2753 @cache_readonly 

2754 @final 

2755 def _is_all_dates(self) -> bool: 

2756 """ 

2757 Whether or not the index values only consist of dates. 

2758 """ 

2759 if needs_i8_conversion(self.dtype): 

2760 return True 

2761 elif self.dtype != _dtype_obj: 

2762 # TODO(ExtensionIndex): 3rd party EA might override? 

2763 # Note: this includes IntervalIndex, even when the left/right 

2764 # contain datetime-like objects. 

2765 return False 

2766 elif self._is_multi: 

2767 return False 

2768 return is_datetime_array(ensure_object(self._values)) 

2769 

2770 @cache_readonly 

2771 @final 

2772 def is_all_dates(self) -> bool: 

2773 """ 

2774 Whether or not the index values only consist of dates. 

2775 """ 

2776 warnings.warn( 

2777 "Index.is_all_dates is deprecated, will be removed in a future version. " 

2778 "check index.inferred_type instead.", 

2779 FutureWarning, 

2780 stacklevel=find_stack_level(), 

2781 ) 

2782 return self._is_all_dates 

2783 

2784 @final 

2785 @cache_readonly 

2786 def _is_multi(self) -> bool: 

2787 """ 

2788 Cached check equivalent to isinstance(self, MultiIndex) 

2789 """ 

2790 return isinstance(self, ABCMultiIndex) 

2791 

2792 # -------------------------------------------------------------------- 

2793 # Pickle Methods 

2794 

2795 def __reduce__(self): 

2796 d = {"data": self._data, "name": self.name} 

2797 return _new_Index, (type(self), d), None 

2798 

2799 # -------------------------------------------------------------------- 

2800 # Null Handling Methods 

2801 

2802 @cache_readonly 

2803 def _na_value(self): 

2804 """The expected NA value to use with this index.""" 

2805 dtype = self.dtype 

2806 if isinstance(dtype, np.dtype): 

2807 if dtype.kind in ["m", "M"]: 

2808 return NaT 

2809 return np.nan 

2810 return dtype.na_value 

2811 

2812 @cache_readonly 

2813 def _isnan(self) -> npt.NDArray[np.bool_]: 

2814 """ 

2815 Return if each value is NaN. 

2816 """ 

2817 if self._can_hold_na: 

2818 return isna(self) 

2819 else: 

2820 # shouldn't reach to this condition by checking hasnans beforehand 

2821 values = np.empty(len(self), dtype=np.bool_) 

2822 values.fill(False) 

2823 return values 

2824 

2825 @cache_readonly 

2826 def hasnans(self) -> bool: 

2827 """ 

2828 Return True if there are any NaNs. 

2829 

2830 Enables various performance speedups. 

2831 """ 

2832 if self._can_hold_na: 

2833 return bool(self._isnan.any()) 

2834 else: 

2835 return False 

2836 

2837 @final 

2838 def isna(self) -> npt.NDArray[np.bool_]: 

2839 """ 

2840 Detect missing values. 

2841 

2842 Return a boolean same-sized object indicating if the values are NA. 

2843 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get 

2844 mapped to ``True`` values. 

2845 Everything else get mapped to ``False`` values. Characters such as 

2846 empty strings `''` or :attr:`numpy.inf` are not considered NA values 

2847 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

2848 

2849 Returns 

2850 ------- 

2851 numpy.ndarray[bool] 

2852 A boolean array of whether my values are NA. 

2853 

2854 See Also 

2855 -------- 

2856 Index.notna : Boolean inverse of isna. 

2857 Index.dropna : Omit entries with missing values. 

2858 isna : Top-level isna. 

2859 Series.isna : Detect missing values in Series object. 

2860 

2861 Examples 

2862 -------- 

2863 Show which entries in a pandas.Index are NA. The result is an 

2864 array. 

2865 

2866 >>> idx = pd.Index([5.2, 6.0, np.NaN]) 

2867 >>> idx 

2868 Float64Index([5.2, 6.0, nan], dtype='float64') 

2869 >>> idx.isna() 

2870 array([False, False, True]) 

2871 

2872 Empty strings are not considered NA values. None is considered an NA 

2873 value. 

2874 

2875 >>> idx = pd.Index(['black', '', 'red', None]) 

2876 >>> idx 

2877 Index(['black', '', 'red', None], dtype='object') 

2878 >>> idx.isna() 

2879 array([False, False, False, True]) 

2880 

2881 For datetimes, `NaT` (Not a Time) is considered as an NA value. 

2882 

2883 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'), 

2884 ... pd.Timestamp(''), None, pd.NaT]) 

2885 >>> idx 

2886 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], 

2887 dtype='datetime64[ns]', freq=None) 

2888 >>> idx.isna() 

2889 array([False, True, True, True]) 

2890 """ 

2891 return self._isnan 

2892 

2893 isnull = isna 

2894 

2895 @final 

2896 def notna(self) -> npt.NDArray[np.bool_]: 

2897 """ 

2898 Detect existing (non-missing) values. 

2899 

2900 Return a boolean same-sized object indicating if the values are not NA. 

2901 Non-missing values get mapped to ``True``. Characters such as empty 

2902 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

2903 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

2904 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False`` 

2905 values. 

2906 

2907 Returns 

2908 ------- 

2909 numpy.ndarray[bool] 

2910 Boolean array to indicate which entries are not NA. 

2911 

2912 See Also 

2913 -------- 

2914 Index.notnull : Alias of notna. 

2915 Index.isna: Inverse of notna. 

2916 notna : Top-level notna. 

2917 

2918 Examples 

2919 -------- 

2920 Show which entries in an Index are not NA. The result is an 

2921 array. 

2922 

2923 >>> idx = pd.Index([5.2, 6.0, np.NaN]) 

2924 >>> idx 

2925 Float64Index([5.2, 6.0, nan], dtype='float64') 

2926 >>> idx.notna() 

2927 array([ True, True, False]) 

2928 

2929 Empty strings are not considered NA values. None is considered a NA 

2930 value. 

2931 

2932 >>> idx = pd.Index(['black', '', 'red', None]) 

2933 >>> idx 

2934 Index(['black', '', 'red', None], dtype='object') 

2935 >>> idx.notna() 

2936 array([ True, True, True, False]) 

2937 """ 

2938 return ~self.isna() 

2939 

2940 notnull = notna 

2941 

2942 def fillna(self, value=None, downcast=None): 

2943 """ 

2944 Fill NA/NaN values with the specified value. 

2945 

2946 Parameters 

2947 ---------- 

2948 value : scalar 

2949 Scalar value to use to fill holes (e.g. 0). 

2950 This value cannot be a list-likes. 

2951 downcast : dict, default is None 

2952 A dict of item->dtype of what to downcast if possible, 

2953 or the string 'infer' which will try to downcast to an appropriate 

2954 equal type (e.g. float64 to int64 if possible). 

2955 

2956 Returns 

2957 ------- 

2958 Index 

2959 

2960 See Also 

2961 -------- 

2962 DataFrame.fillna : Fill NaN values of a DataFrame. 

2963 Series.fillna : Fill NaN Values of a Series. 

2964 """ 

2965 

2966 value = self._require_scalar(value) 

2967 if self.hasnans: 

2968 result = self.putmask(self._isnan, value) 

2969 if downcast is None: 

2970 # no need to care metadata other than name 

2971 # because it can't have freq if it has NaTs 

2972 return Index._with_infer(result, name=self.name) 

2973 raise NotImplementedError( 

2974 f"{type(self).__name__}.fillna does not support 'downcast' " 

2975 "argument values other than 'None'." 

2976 ) 

2977 return self._view() 

2978 

2979 def dropna(self: _IndexT, how: str_t = "any") -> _IndexT: 

2980 """ 

2981 Return Index without NA/NaN values. 

2982 

2983 Parameters 

2984 ---------- 

2985 how : {'any', 'all'}, default 'any' 

2986 If the Index is a MultiIndex, drop the value when any or all levels 

2987 are NaN. 

2988 

2989 Returns 

2990 ------- 

2991 Index 

2992 """ 

2993 if how not in ("any", "all"): 

2994 raise ValueError(f"invalid how option: {how}") 

2995 

2996 if self.hasnans: 

2997 res_values = self._values[~self._isnan] 

2998 return type(self)._simple_new(res_values, name=self.name) 

2999 return self._view() 

3000 

3001 # -------------------------------------------------------------------- 

3002 # Uniqueness Methods 

3003 

3004 def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: 

3005 """ 

3006 Return unique values in the index. 

3007 

3008 Unique values are returned in order of appearance, this does NOT sort. 

3009 

3010 Parameters 

3011 ---------- 

3012 level : int or hashable, optional 

3013 Only return values from specified level (for MultiIndex). 

3014 If int, gets the level by integer position, else by level name. 

3015 

3016 Returns 

3017 ------- 

3018 Index 

3019 

3020 See Also 

3021 -------- 

3022 unique : Numpy array of unique values in that column. 

3023 Series.unique : Return unique values of Series object. 

3024 """ 

3025 if level is not None: 

3026 self._validate_index_level(level) 

3027 

3028 if self.is_unique: 

3029 return self._view() 

3030 

3031 result = super().unique() 

3032 return self._shallow_copy(result) 

3033 

3034 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) 

3035 def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT: 

3036 """ 

3037 Return Index with duplicate values removed. 

3038 

3039 Parameters 

3040 ---------- 

3041 keep : {'first', 'last', ``False``}, default 'first' 

3042 - 'first' : Drop duplicates except for the first occurrence. 

3043 - 'last' : Drop duplicates except for the last occurrence. 

3044 - ``False`` : Drop all duplicates. 

3045 

3046 Returns 

3047 ------- 

3048 deduplicated : Index 

3049 

3050 See Also 

3051 -------- 

3052 Series.drop_duplicates : Equivalent method on Series. 

3053 DataFrame.drop_duplicates : Equivalent method on DataFrame. 

3054 Index.duplicated : Related method on Index, indicating duplicate 

3055 Index values. 

3056 

3057 Examples 

3058 -------- 

3059 Generate an pandas.Index with duplicate values. 

3060 

3061 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) 

3062 

3063 The `keep` parameter controls which duplicate values are removed. 

3064 The value 'first' keeps the first occurrence for each 

3065 set of duplicated entries. The default value of keep is 'first'. 

3066 

3067 >>> idx.drop_duplicates(keep='first') 

3068 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object') 

3069 

3070 The value 'last' keeps the last occurrence for each set of duplicated 

3071 entries. 

3072 

3073 >>> idx.drop_duplicates(keep='last') 

3074 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object') 

3075 

3076 The value ``False`` discards all sets of duplicated entries. 

3077 

3078 >>> idx.drop_duplicates(keep=False) 

3079 Index(['cow', 'beetle', 'hippo'], dtype='object') 

3080 """ 

3081 if self.is_unique: 

3082 return self._view() 

3083 

3084 return super().drop_duplicates(keep=keep) 

3085 

3086 def duplicated( 

3087 self, keep: Literal["first", "last", False] = "first" 

3088 ) -> npt.NDArray[np.bool_]: 

3089 """ 

3090 Indicate duplicate index values. 

3091 

3092 Duplicated values are indicated as ``True`` values in the resulting 

3093 array. Either all duplicates, all except the first, or all except the 

3094 last occurrence of duplicates can be indicated. 

3095 

3096 Parameters 

3097 ---------- 

3098 keep : {'first', 'last', False}, default 'first' 

3099 The value or values in a set of duplicates to mark as missing. 

3100 

3101 - 'first' : Mark duplicates as ``True`` except for the first 

3102 occurrence. 

3103 - 'last' : Mark duplicates as ``True`` except for the last 

3104 occurrence. 

3105 - ``False`` : Mark all duplicates as ``True``. 

3106 

3107 Returns 

3108 ------- 

3109 np.ndarray[bool] 

3110 

3111 See Also 

3112 -------- 

3113 Series.duplicated : Equivalent method on pandas.Series. 

3114 DataFrame.duplicated : Equivalent method on pandas.DataFrame. 

3115 Index.drop_duplicates : Remove duplicate values from Index. 

3116 

3117 Examples 

3118 -------- 

3119 By default, for each set of duplicated values, the first occurrence is 

3120 set to False and all others to True: 

3121 

3122 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) 

3123 >>> idx.duplicated() 

3124 array([False, False, True, False, True]) 

3125 

3126 which is equivalent to 

3127 

3128 >>> idx.duplicated(keep='first') 

3129 array([False, False, True, False, True]) 

3130 

3131 By using 'last', the last occurrence of each set of duplicated values 

3132 is set on False and all others on True: 

3133 

3134 >>> idx.duplicated(keep='last') 

3135 array([ True, False, True, False, False]) 

3136 

3137 By setting keep on ``False``, all duplicates are True: 

3138 

3139 >>> idx.duplicated(keep=False) 

3140 array([ True, False, True, False, True]) 

3141 """ 

3142 if self.is_unique: 

3143 # fastpath available bc we are immutable 

3144 return np.zeros(len(self), dtype=bool) 

3145 return self._duplicated(keep=keep) 

3146 

3147 # -------------------------------------------------------------------- 

3148 # Arithmetic & Logical Methods 

3149 

3150 def __iadd__(self, other): 

3151 # alias for __add__ 

3152 return self + other 

3153 

3154 @final 

3155 def __and__(self, other): 

3156 warnings.warn( 

3157 "Index.__and__ operating as a set operation is deprecated, " 

3158 "in the future this will be a logical operation matching " 

3159 "Series.__and__. Use index.intersection(other) instead.", 

3160 FutureWarning, 

3161 stacklevel=find_stack_level(), 

3162 ) 

3163 return self.intersection(other) 

3164 

3165 @final 

3166 def __or__(self, other): 

3167 warnings.warn( 

3168 "Index.__or__ operating as a set operation is deprecated, " 

3169 "in the future this will be a logical operation matching " 

3170 "Series.__or__. Use index.union(other) instead.", 

3171 FutureWarning, 

3172 stacklevel=find_stack_level(), 

3173 ) 

3174 return self.union(other) 

3175 

3176 @final 

3177 def __xor__(self, other): 

3178 warnings.warn( 

3179 "Index.__xor__ operating as a set operation is deprecated, " 

3180 "in the future this will be a logical operation matching " 

3181 "Series.__xor__. Use index.symmetric_difference(other) instead.", 

3182 FutureWarning, 

3183 stacklevel=find_stack_level(), 

3184 ) 

3185 return self.symmetric_difference(other) 

3186 

3187 @final 

3188 def __nonzero__(self) -> NoReturn: 

3189 raise ValueError( 

3190 f"The truth value of a {type(self).__name__} is ambiguous. " 

3191 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." 

3192 ) 

3193 

3194 __bool__ = __nonzero__ 

3195 

3196 # -------------------------------------------------------------------- 

3197 # Set Operation Methods 

3198 

3199 def _get_reconciled_name_object(self, other): 

3200 """ 

3201 If the result of a set operation will be self, 

3202 return self, unless the name changes, in which 

3203 case make a shallow copy of self. 

3204 """ 

3205 name = get_op_result_name(self, other) 

3206 if self.name is not name: 

3207 return self.rename(name) 

3208 return self 

3209 

3210 @final 

3211 def _validate_sort_keyword(self, sort): 

3212 if sort not in [None, False]: 

3213 raise ValueError( 

3214 "The 'sort' keyword only takes the values of " 

3215 f"None or False; {sort} was passed." 

3216 ) 

3217 

3218 @final 

3219 def _deprecate_dti_setop(self, other: Index, setop: str_t): 

3220 """ 

3221 Deprecate setop behavior between timezone-aware DatetimeIndexes with 

3222 mismatched timezones. 

3223 """ 

3224 # Caller is responsibelf or checking 

3225 # `not is_dtype_equal(self.dtype, other.dtype)` 

3226 if ( 

3227 isinstance(self, ABCDatetimeIndex) 

3228 and isinstance(other, ABCDatetimeIndex) 

3229 and self.tz is not None 

3230 and other.tz is not None 

3231 ): 

3232 # GH#39328, GH#45357 

3233 warnings.warn( 

3234 f"In a future version, the {setop} of DatetimeIndex objects " 

3235 "with mismatched timezones will cast both to UTC instead of " 

3236 "object dtype. To retain the old behavior, " 

3237 f"use `index.astype(object).{setop}(other)`", 

3238 FutureWarning, 

3239 stacklevel=find_stack_level(), 

3240 ) 

3241 

3242 @final 

3243 def union(self, other, sort=None): 

3244 """ 

3245 Form the union of two Index objects. 

3246 

3247 If the Index objects are incompatible, both Index objects will be 

3248 cast to dtype('object') first. 

3249 

3250 .. versionchanged:: 0.25.0 

3251 

3252 Parameters 

3253 ---------- 

3254 other : Index or array-like 

3255 sort : bool or None, default None 

3256 Whether to sort the resulting Index. 

3257 

3258 * None : Sort the result, except when 

3259 

3260 1. `self` and `other` are equal. 

3261 2. `self` or `other` has length 0. 

3262 3. Some values in `self` or `other` cannot be compared. 

3263 A RuntimeWarning is issued in this case. 

3264 

3265 * False : do not sort the result. 

3266 

3267 Returns 

3268 ------- 

3269 union : Index 

3270 

3271 Examples 

3272 -------- 

3273 Union matching dtypes 

3274 

3275 >>> idx1 = pd.Index([1, 2, 3, 4]) 

3276 >>> idx2 = pd.Index([3, 4, 5, 6]) 

3277 >>> idx1.union(idx2) 

3278 Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') 

3279 

3280 Union mismatched dtypes 

3281 

3282 >>> idx1 = pd.Index(['a', 'b', 'c', 'd']) 

3283 >>> idx2 = pd.Index([1, 2, 3, 4]) 

3284 >>> idx1.union(idx2) 

3285 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') 

3286 

3287 MultiIndex case 

3288 

3289 >>> idx1 = pd.MultiIndex.from_arrays( 

3290 ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]] 

3291 ... ) 

3292 >>> idx1 

3293 MultiIndex([(1, 'Red'), 

3294 (1, 'Blue'), 

3295 (2, 'Red'), 

3296 (2, 'Blue')], 

3297 ) 

3298 >>> idx2 = pd.MultiIndex.from_arrays( 

3299 ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]] 

3300 ... ) 

3301 >>> idx2 

3302 MultiIndex([(3, 'Red'), 

3303 (3, 'Green'), 

3304 (2, 'Red'), 

3305 (2, 'Green')], 

3306 ) 

3307 >>> idx1.union(idx2) 

3308 MultiIndex([(1, 'Blue'), 

3309 (1, 'Red'), 

3310 (2, 'Blue'), 

3311 (2, 'Green'), 

3312 (2, 'Red'), 

3313 (3, 'Green'), 

3314 (3, 'Red')], 

3315 ) 

3316 >>> idx1.union(idx2, sort=False) 

3317 MultiIndex([(1, 'Red'), 

3318 (1, 'Blue'), 

3319 (2, 'Red'), 

3320 (2, 'Blue'), 

3321 (3, 'Red'), 

3322 (3, 'Green'), 

3323 (2, 'Green')], 

3324 ) 

3325 """ 

3326 self._validate_sort_keyword(sort) 

3327 self._assert_can_do_setop(other) 

3328 other, result_name = self._convert_can_do_setop(other) 

3329 

3330 if not is_dtype_equal(self.dtype, other.dtype): 

3331 if ( 

3332 isinstance(self, ABCMultiIndex) 

3333 and not is_object_dtype(unpack_nested_dtype(other)) 

3334 and len(other) > 0 

3335 ): 

3336 raise NotImplementedError( 

3337 "Can only union MultiIndex with MultiIndex or Index of tuples, " 

3338 "try mi.to_flat_index().union(other) instead." 

3339 ) 

3340 self._deprecate_dti_setop(other, "union") 

3341 

3342 dtype = self._find_common_type_compat(other) 

3343 left = self.astype(dtype, copy=False) 

3344 right = other.astype(dtype, copy=False) 

3345 return left.union(right, sort=sort) 

3346 

3347 elif not len(other) or self.equals(other): 

3348 # NB: whether this (and the `if not len(self)` check below) come before 

3349 # or after the is_dtype_equal check above affects the returned dtype 

3350 return self._get_reconciled_name_object(other) 

3351 

3352 elif not len(self): 

3353 return other._get_reconciled_name_object(self) 

3354 

3355 result = self._union(other, sort=sort) 

3356 

3357 return self._wrap_setop_result(other, result) 

3358 

3359 def _union(self, other: Index, sort): 

3360 """ 

3361 Specific union logic should go here. In subclasses, union behavior 

3362 should be overwritten here rather than in `self.union`. 

3363 

3364 Parameters 

3365 ---------- 

3366 other : Index or array-like 

3367 sort : False or None, default False 

3368 Whether to sort the resulting index. 

3369 

3370 * False : do not sort the result. 

3371 * None : sort the result, except when `self` and `other` are equal 

3372 or when the values cannot be compared. 

3373 

3374 Returns 

3375 ------- 

3376 Index 

3377 """ 

3378 lvals = self._values 

3379 rvals = other._values 

3380 

3381 if ( 

3382 sort is None 

3383 and self.is_monotonic_increasing 

3384 and other.is_monotonic_increasing 

3385 and not (self.has_duplicates and other.has_duplicates) 

3386 and self._can_use_libjoin 

3387 ): 

3388 # Both are monotonic and at least one is unique, so can use outer join 

3389 # (actually don't need either unique, but without this restriction 

3390 # test_union_same_value_duplicated_in_both fails) 

3391 try: 

3392 return self._outer_indexer(other)[0] 

3393 except (TypeError, IncompatibleFrequency): 

3394 # incomparable objects; should only be for object dtype 

3395 value_list = list(lvals) 

3396 

3397 # worth making this faster? a very unusual case 

3398 value_set = set(lvals) 

3399 value_list.extend([x for x in rvals if x not in value_set]) 

3400 # If objects are unorderable, we must have object dtype. 

3401 return np.array(value_list, dtype=object) 

3402 

3403 elif not other.is_unique: 

3404 # other has duplicates 

3405 result = algos.union_with_duplicates(lvals, rvals) 

3406 return _maybe_try_sort(result, sort) 

3407 

3408 # Self may have duplicates; other already checked as unique 

3409 # find indexes of things in "other" that are not in "self" 

3410 if self._index_as_unique: 

3411 indexer = self.get_indexer(other) 

3412 missing = (indexer == -1).nonzero()[0] 

3413 else: 

3414 missing = algos.unique1d(self.get_indexer_non_unique(other)[1]) 

3415 

3416 if len(missing) > 0: 

3417 other_diff = rvals.take(missing) 

3418 result = concat_compat((lvals, other_diff)) 

3419 else: 

3420 result = lvals 

3421 

3422 if not self.is_monotonic_increasing or not other.is_monotonic_increasing: 

3423 # if both are monotonic then result should already be sorted 

3424 result = _maybe_try_sort(result, sort) 

3425 

3426 return result 

3427 

3428 @final 

3429 def _wrap_setop_result(self, other: Index, result) -> Index: 

3430 name = get_op_result_name(self, other) 

3431 if isinstance(result, Index): 

3432 if result.name != name: 

3433 result = result.rename(name) 

3434 else: 

3435 result = self._shallow_copy(result, name=name) 

3436 return result 

3437 

3438 @final 

3439 def intersection(self, other, sort=False): 

3440 """ 

3441 Form the intersection of two Index objects. 

3442 

3443 This returns a new Index with elements common to the index and `other`. 

3444 

3445 Parameters 

3446 ---------- 

3447 other : Index or array-like 

3448 sort : False or None, default False 

3449 Whether to sort the resulting index. 

3450 

3451 * False : do not sort the result. 

3452 * None : sort the result, except when `self` and `other` are equal 

3453 or when the values cannot be compared. 

3454 

3455 Returns 

3456 ------- 

3457 intersection : Index 

3458 

3459 Examples 

3460 -------- 

3461 >>> idx1 = pd.Index([1, 2, 3, 4]) 

3462 >>> idx2 = pd.Index([3, 4, 5, 6]) 

3463 >>> idx1.intersection(idx2) 

3464 Int64Index([3, 4], dtype='int64') 

3465 """ 

3466 self._validate_sort_keyword(sort) 

3467 self._assert_can_do_setop(other) 

3468 other, result_name = self._convert_can_do_setop(other) 

3469 

3470 if not is_dtype_equal(self.dtype, other.dtype): 

3471 self._deprecate_dti_setop(other, "intersection") 

3472 

3473 if self.equals(other): 

3474 if self.has_duplicates: 

3475 return self.unique()._get_reconciled_name_object(other) 

3476 return self._get_reconciled_name_object(other) 

3477 

3478 if len(self) == 0 or len(other) == 0: 

3479 # fastpath; we need to be careful about having commutativity 

3480 

3481 if self._is_multi or other._is_multi: 

3482 # _convert_can_do_setop ensures that we have both or neither 

3483 # We retain self.levels 

3484 return self[:0].rename(result_name) 

3485 

3486 dtype = self._find_common_type_compat(other) 

3487 if is_dtype_equal(self.dtype, dtype): 

3488 # Slicing allows us to retain DTI/TDI.freq, RangeIndex 

3489 

3490 # Note: self[:0] vs other[:0] affects 

3491 # 1) which index's `freq` we get in DTI/TDI cases 

3492 # This may be a historical artifact, i.e. no documented 

3493 # reason for this choice. 

3494 # 2) The `step` we get in RangeIndex cases 

3495 if len(self) == 0: 

3496 return self[:0].rename(result_name) 

3497 else: 

3498 return other[:0].rename(result_name) 

3499 

3500 return Index([], dtype=dtype, name=result_name) 

3501 

3502 elif not self._should_compare(other): 

3503 # We can infer that the intersection is empty. 

3504 if isinstance(self, ABCMultiIndex): 

3505 return self[:0].rename(result_name) 

3506 return Index([], name=result_name) 

3507 

3508 elif not is_dtype_equal(self.dtype, other.dtype): 

3509 dtype = self._find_common_type_compat(other) 

3510 this = self.astype(dtype, copy=False) 

3511 other = other.astype(dtype, copy=False) 

3512 return this.intersection(other, sort=sort) 

3513 

3514 result = self._intersection(other, sort=sort) 

3515 return self._wrap_intersection_result(other, result) 

3516 

3517 def _intersection(self, other: Index, sort=False): 

3518 """ 

3519 intersection specialized to the case with matching dtypes. 

3520 """ 

3521 if ( 

3522 self.is_monotonic_increasing 

3523 and other.is_monotonic_increasing 

3524 and self._can_use_libjoin 

3525 ): 

3526 try: 

3527 result = self._inner_indexer(other)[0] 

3528 except TypeError: 

3529 # non-comparable; should only be for object dtype 

3530 pass 

3531 else: 

3532 # TODO: algos.unique1d should preserve DTA/TDA 

3533 res = algos.unique1d(result) 

3534 return ensure_wrapped_if_datetimelike(res) 

3535 

3536 res_values = self._intersection_via_get_indexer(other, sort=sort) 

3537 res_values = _maybe_try_sort(res_values, sort) 

3538 return res_values 

3539 

3540 def _wrap_intersection_result(self, other, result): 

3541 # We will override for MultiIndex to handle empty results 

3542 return self._wrap_setop_result(other, result) 

3543 

3544 @final 

3545 def _intersection_via_get_indexer(self, other: Index, sort) -> ArrayLike: 

3546 """ 

3547 Find the intersection of two Indexes using get_indexer. 

3548 

3549 Returns 

3550 ------- 

3551 np.ndarray or ExtensionArray 

3552 The returned array will be unique. 

3553 """ 

3554 left_unique = self.unique() 

3555 right_unique = other.unique() 

3556 

3557 # even though we are unique, we need get_indexer_for for IntervalIndex 

3558 indexer = left_unique.get_indexer_for(right_unique) 

3559 

3560 mask = indexer != -1 

3561 

3562 taker = indexer.take(mask.nonzero()[0]) 

3563 if sort is False: 

3564 # sort bc we want the elements in the same order they are in self 

3565 # unnecessary in the case with sort=None bc we will sort later 

3566 taker = np.sort(taker) 

3567 

3568 result = left_unique.take(taker)._values 

3569 return result 

3570 

3571 @final 

3572 def difference(self, other, sort=None): 

3573 """ 

3574 Return a new Index with elements of index not in `other`. 

3575 

3576 This is the set difference of two Index objects. 

3577 

3578 Parameters 

3579 ---------- 

3580 other : Index or array-like 

3581 sort : False or None, default None 

3582 Whether to sort the resulting index. By default, the 

3583 values are attempted to be sorted, but any TypeError from 

3584 incomparable elements is caught by pandas. 

3585 

3586 * None : Attempt to sort the result, but catch any TypeErrors 

3587 from comparing incomparable elements. 

3588 * False : Do not sort the result. 

3589 

3590 Returns 

3591 ------- 

3592 difference : Index 

3593 

3594 Examples 

3595 -------- 

3596 >>> idx1 = pd.Index([2, 1, 3, 4]) 

3597 >>> idx2 = pd.Index([3, 4, 5, 6]) 

3598 >>> idx1.difference(idx2) 

3599 Int64Index([1, 2], dtype='int64') 

3600 >>> idx1.difference(idx2, sort=False) 

3601 Int64Index([2, 1], dtype='int64') 

3602 """ 

3603 self._validate_sort_keyword(sort) 

3604 self._assert_can_do_setop(other) 

3605 other, result_name = self._convert_can_do_setop(other) 

3606 

3607 # Note: we do NOT call _deprecate_dti_setop here, as there 

3608 # is no requirement that .difference be commutative, so it does 

3609 # not cast to object. 

3610 

3611 if self.equals(other): 

3612 # Note: we do not (yet) sort even if sort=None GH#24959 

3613 return self[:0].rename(result_name) 

3614 

3615 if len(other) == 0: 

3616 # Note: we do not (yet) sort even if sort=None GH#24959 

3617 return self.rename(result_name) 

3618 

3619 if not self._should_compare(other): 

3620 # Nothing matches -> difference is everything 

3621 return self.rename(result_name) 

3622 

3623 result = self._difference(other, sort=sort) 

3624 return self._wrap_difference_result(other, result) 

3625 

3626 def _difference(self, other, sort): 

3627 # overridden by RangeIndex 

3628 

3629 this = self.unique() 

3630 

3631 indexer = this.get_indexer_for(other) 

3632 indexer = indexer.take((indexer != -1).nonzero()[0]) 

3633 

3634 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) 

3635 the_diff = this._values.take(label_diff) 

3636 the_diff = _maybe_try_sort(the_diff, sort) 

3637 

3638 return the_diff 

3639 

3640 def _wrap_difference_result(self, other, result): 

3641 # We will override for MultiIndex to handle empty results 

3642 return self._wrap_setop_result(other, result) 

3643 

3644 def symmetric_difference(self, other, result_name=None, sort=None): 

3645 """ 

3646 Compute the symmetric difference of two Index objects. 

3647 

3648 Parameters 

3649 ---------- 

3650 other : Index or array-like 

3651 result_name : str 

3652 sort : False or None, default None 

3653 Whether to sort the resulting index. By default, the 

3654 values are attempted to be sorted, but any TypeError from 

3655 incomparable elements is caught by pandas. 

3656 

3657 * None : Attempt to sort the result, but catch any TypeErrors 

3658 from comparing incomparable elements. 

3659 * False : Do not sort the result. 

3660 

3661 Returns 

3662 ------- 

3663 symmetric_difference : Index 

3664 

3665 Notes 

3666 ----- 

3667 ``symmetric_difference`` contains elements that appear in either 

3668 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by 

3669 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates 

3670 dropped. 

3671 

3672 Examples 

3673 -------- 

3674 >>> idx1 = pd.Index([1, 2, 3, 4]) 

3675 >>> idx2 = pd.Index([2, 3, 4, 5]) 

3676 >>> idx1.symmetric_difference(idx2) 

3677 Int64Index([1, 5], dtype='int64') 

3678 """ 

3679 self._validate_sort_keyword(sort) 

3680 self._assert_can_do_setop(other) 

3681 other, result_name_update = self._convert_can_do_setop(other) 

3682 if result_name is None: 

3683 result_name = result_name_update 

3684 

3685 if not is_dtype_equal(self.dtype, other.dtype): 

3686 self._deprecate_dti_setop(other, "symmetric_difference") 

3687 

3688 if not self._should_compare(other): 

3689 return self.union(other, sort=sort).rename(result_name) 

3690 

3691 elif not is_dtype_equal(self.dtype, other.dtype): 

3692 dtype = self._find_common_type_compat(other) 

3693 this = self.astype(dtype, copy=False) 

3694 that = other.astype(dtype, copy=False) 

3695 return this.symmetric_difference(that, sort=sort).rename(result_name) 

3696 

3697 this = self.unique() 

3698 other = other.unique() 

3699 indexer = this.get_indexer_for(other) 

3700 

3701 # {this} minus {other} 

3702 common_indexer = indexer.take((indexer != -1).nonzero()[0]) 

3703 left_indexer = np.setdiff1d( 

3704 np.arange(this.size), common_indexer, assume_unique=True 

3705 ) 

3706 left_diff = this._values.take(left_indexer) 

3707 

3708 # {other} minus {this} 

3709 right_indexer = (indexer == -1).nonzero()[0] 

3710 right_diff = other._values.take(right_indexer) 

3711 

3712 res_values = concat_compat([left_diff, right_diff]) 

3713 res_values = _maybe_try_sort(res_values, sort) 

3714 

3715 # pass dtype so we retain object dtype 

3716 result = Index(res_values, name=result_name, dtype=res_values.dtype) 

3717 

3718 if self._is_multi: 

3719 self = cast("MultiIndex", self) 

3720 if len(result) == 0: 

3721 # On equal symmetric_difference MultiIndexes the difference is empty. 

3722 # Therefore, an empty MultiIndex is returned GH#13490 

3723 return type(self)( 

3724 levels=[[] for _ in range(self.nlevels)], 

3725 codes=[[] for _ in range(self.nlevels)], 

3726 names=result.name, 

3727 ) 

3728 return type(self).from_tuples(result, names=result.name) 

3729 

3730 return result 

3731 

3732 @final 

3733 def _assert_can_do_setop(self, other) -> bool: 

3734 if not is_list_like(other): 

3735 raise TypeError("Input must be Index or array-like") 

3736 return True 

3737 

3738 def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]: 

3739 if not isinstance(other, Index): 

3740 # TODO(2.0): no need to special-case here once _with_infer 

3741 # deprecation is enforced 

3742 if hasattr(other, "dtype"): 

3743 other = Index(other, name=self.name, dtype=other.dtype) 

3744 else: 

3745 # e.g. list 

3746 other = Index(other, name=self.name) 

3747 result_name = self.name 

3748 else: 

3749 result_name = get_op_result_name(self, other) 

3750 return other, result_name 

3751 

3752 # -------------------------------------------------------------------- 

3753 # Indexing Methods 

3754 

3755 def get_loc(self, key, method=None, tolerance=None): 

3756 """ 

3757 Get integer location, slice or boolean mask for requested label. 

3758 

3759 Parameters 

3760 ---------- 

3761 key : label 

3762 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional 

3763 * default: exact matches only. 

3764 * pad / ffill: find the PREVIOUS index value if no exact match. 

3765 * backfill / bfill: use NEXT index value if no exact match 

3766 * nearest: use the NEAREST index value if no exact match. Tied 

3767 distances are broken by preferring the larger index value. 

3768 

3769 .. deprecated:: 1.4 

3770 Use index.get_indexer([item], method=...) instead. 

3771 

3772 tolerance : int or float, optional 

3773 Maximum distance from index value for inexact matches. The value of 

3774 the index at the matching location must satisfy the equation 

3775 ``abs(index[loc] - key) <= tolerance``. 

3776 

3777 Returns 

3778 ------- 

3779 loc : int if unique index, slice if monotonic index, else mask 

3780 

3781 Examples 

3782 -------- 

3783 >>> unique_index = pd.Index(list('abc')) 

3784 >>> unique_index.get_loc('b') 

3785 1 

3786 

3787 >>> monotonic_index = pd.Index(list('abbc')) 

3788 >>> monotonic_index.get_loc('b') 

3789 slice(1, 3, None) 

3790 

3791 >>> non_monotonic_index = pd.Index(list('abcb')) 

3792 >>> non_monotonic_index.get_loc('b') 

3793 array([False, True, False, True]) 

3794 """ 

3795 if method is None: 

3796 if tolerance is not None: 

3797 raise ValueError( 

3798 "tolerance argument only valid if using pad, " 

3799 "backfill or nearest lookups" 

3800 ) 

3801 casted_key = self._maybe_cast_indexer(key) 

3802 try: 

3803 return self._engine.get_loc(casted_key) 

3804 except KeyError as err: 

3805 raise KeyError(key) from err 

3806 except TypeError: 

3807 # If we have a listlike key, _check_indexing_error will raise 

3808 # InvalidIndexError. Otherwise we fall through and re-raise 

3809 # the TypeError. 

3810 self._check_indexing_error(key) 

3811 raise 

3812 

3813 # GH#42269 

3814 warnings.warn( 

3815 f"Passing method to {type(self).__name__}.get_loc is deprecated " 

3816 "and will raise in a future version. Use " 

3817 "index.get_indexer([item], method=...) instead.", 

3818 FutureWarning, 

3819 stacklevel=find_stack_level(), 

3820 ) 

3821 

3822 if is_scalar(key) and isna(key) and not self.hasnans: 

3823 raise KeyError(key) 

3824 

3825 if tolerance is not None: 

3826 tolerance = self._convert_tolerance(tolerance, np.asarray(key)) 

3827 

3828 indexer = self.get_indexer([key], method=method, tolerance=tolerance) 

3829 if indexer.ndim > 1 or indexer.size > 1: 

3830 raise TypeError("get_loc requires scalar valued input") 

3831 loc = indexer.item() 

3832 if loc == -1: 

3833 raise KeyError(key) 

3834 return loc 

3835 

3836 _index_shared_docs[ 

3837 "get_indexer" 

3838 ] = """ 

3839 Compute indexer and mask for new index given the current index. 

3840 

3841 The indexer should be then used as an input to ndarray.take to align the 

3842 current data to the new index. 

3843 

3844 Parameters 

3845 ---------- 

3846 target : %(target_klass)s 

3847 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional 

3848 * default: exact matches only. 

3849 * pad / ffill: find the PREVIOUS index value if no exact match. 

3850 * backfill / bfill: use NEXT index value if no exact match 

3851 * nearest: use the NEAREST index value if no exact match. Tied 

3852 distances are broken by preferring the larger index value. 

3853 limit : int, optional 

3854 Maximum number of consecutive labels in ``target`` to match for 

3855 inexact matches. 

3856 tolerance : optional 

3857 Maximum distance between original and new labels for inexact 

3858 matches. The values of the index at the matching locations must 

3859 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

3860 

3861 Tolerance may be a scalar value, which applies the same tolerance 

3862 to all values, or list-like, which applies variable tolerance per 

3863 element. List-like includes list, tuple, array, Series, and must be 

3864 the same size as the index and its dtype must exactly match the 

3865 index's type. 

3866 

3867 Returns 

3868 ------- 

3869 indexer : np.ndarray[np.intp] 

3870 Integers from 0 to n - 1 indicating that the index at these 

3871 positions matches the corresponding target values. Missing values 

3872 in the target are marked by -1. 

3873 %(raises_section)s 

3874 Notes 

3875 ----- 

3876 Returns -1 for unmatched values, for further explanation see the 

3877 example below. 

3878 

3879 Examples 

3880 -------- 

3881 >>> index = pd.Index(['c', 'a', 'b']) 

3882 >>> index.get_indexer(['a', 'b', 'x']) 

3883 array([ 1, 2, -1]) 

3884 

3885 Notice that the return value is an array of locations in ``index`` 

3886 and ``x`` is marked by -1, as it is not in ``index``. 

3887 """ 

3888 

3889 @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) 

3890 @final 

3891 def get_indexer( 

3892 self, 

3893 target, 

3894 method: str_t | None = None, 

3895 limit: int | None = None, 

3896 tolerance=None, 

3897 ) -> npt.NDArray[np.intp]: 

3898 method = missing.clean_reindex_fill_method(method) 

3899 orig_target = target 

3900 target = self._maybe_cast_listlike_indexer(target) 

3901 

3902 self._check_indexing_method(method, limit, tolerance) 

3903 

3904 if not self._index_as_unique: 

3905 raise InvalidIndexError(self._requires_unique_msg) 

3906 

3907 if len(target) == 0: 

3908 return np.array([], dtype=np.intp) 

3909 

3910 if not self._should_compare(target) and not self._should_partial_index(target): 

3911 # IntervalIndex get special treatment bc numeric scalars can be 

3912 # matched to Interval scalars 

3913 return self._get_indexer_non_comparable(target, method=method, unique=True) 

3914 

3915 if is_categorical_dtype(self.dtype): 

3916 # _maybe_cast_listlike_indexer ensures target has our dtype 

3917 # (could improve perf by doing _should_compare check earlier?) 

3918 assert is_dtype_equal(self.dtype, target.dtype) 

3919 

3920 indexer = self._engine.get_indexer(target.codes) 

3921 if self.hasnans and target.hasnans: 

3922 # After _maybe_cast_listlike_indexer, target elements which do not 

3923 # belong to some category are changed to NaNs 

3924 # Mask to track actual NaN values compared to inserted NaN values 

3925 # GH#45361 

3926 target_nans = isna(orig_target) 

3927 loc = self.get_loc(np.nan) 

3928 mask = target.isna() 

3929 indexer[target_nans] = loc 

3930 indexer[mask & ~target_nans] = -1 

3931 return indexer 

3932 

3933 if is_categorical_dtype(target.dtype): 

3934 # potential fastpath 

3935 # get an indexer for unique categories then propagate to codes via take_nd 

3936 # get_indexer instead of _get_indexer needed for MultiIndex cases 

3937 # e.g. test_append_different_columns_types 

3938 categories_indexer = self.get_indexer(target.categories) 

3939 

3940 indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1) 

3941 

3942 if (not self._is_multi and self.hasnans) and target.hasnans: 

3943 # Exclude MultiIndex because hasnans raises NotImplementedError 

3944 # we should only get here if we are unique, so loc is an integer 

3945 # GH#41934 

3946 loc = self.get_loc(np.nan) 

3947 mask = target.isna() 

3948 indexer[mask] = loc 

3949 

3950 return ensure_platform_int(indexer) 

3951 

3952 pself, ptarget = self._maybe_promote(target) 

3953 if pself is not self or ptarget is not target: 

3954 return pself.get_indexer( 

3955 ptarget, method=method, limit=limit, tolerance=tolerance 

3956 ) 

3957 

3958 if is_dtype_equal(self.dtype, target.dtype) and self.equals(target): 

3959 # Only call equals if we have same dtype to avoid inference/casting 

3960 return np.arange(len(target), dtype=np.intp) 

3961 

3962 if not is_dtype_equal(self.dtype, target.dtype) and not is_interval_dtype( 

3963 self.dtype 

3964 ): 

3965 # IntervalIndex gets special treatment for partial-indexing 

3966 dtype = self._find_common_type_compat(target) 

3967 

3968 this = self.astype(dtype, copy=False) 

3969 target = target.astype(dtype, copy=False) 

3970 return this._get_indexer( 

3971 target, method=method, limit=limit, tolerance=tolerance 

3972 ) 

3973 

3974 return self._get_indexer(target, method, limit, tolerance) 

3975 

3976 def _get_indexer( 

3977 self, 

3978 target: Index, 

3979 method: str_t | None = None, 

3980 limit: int | None = None, 

3981 tolerance=None, 

3982 ) -> npt.NDArray[np.intp]: 

3983 if tolerance is not None: 

3984 tolerance = self._convert_tolerance(tolerance, target) 

3985 

3986 if method in ["pad", "backfill"]: 

3987 indexer = self._get_fill_indexer(target, method, limit, tolerance) 

3988 elif method == "nearest": 

3989 indexer = self._get_nearest_indexer(target, limit, tolerance) 

3990 else: 

3991 if target._is_multi and self._is_multi: 

3992 engine = self._engine 

3993 # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" 

3994 # has no attribute "_extract_level_codes" 

3995 tgt_values = engine._extract_level_codes( # type: ignore[union-attr] 

3996 target 

3997 ) 

3998 else: 

3999 tgt_values = target._get_engine_target() 

4000 

4001 indexer = self._engine.get_indexer(tgt_values) 

4002 

4003 return ensure_platform_int(indexer) 

4004 

4005 @final 

4006 def _should_partial_index(self, target: Index) -> bool: 

4007 """ 

4008 Should we attempt partial-matching indexing? 

4009 """ 

4010 if is_interval_dtype(self.dtype): 

4011 if is_interval_dtype(target.dtype): 

4012 return False 

4013 # See https://github.com/pandas-dev/pandas/issues/47772 the commented 

4014 # out code can be restored (instead of hardcoding `return True`) 

4015 # once that issue if fixed 

4016 # "Index" has no attribute "left" 

4017 # return self.left._should_compare(target) # type: ignore[attr-defined] 

4018 return True 

4019 return False 

4020 

4021 @final 

4022 def _check_indexing_method( 

4023 self, 

4024 method: str_t | None, 

4025 limit: int | None = None, 

4026 tolerance=None, 

4027 ) -> None: 

4028 """ 

4029 Raise if we have a get_indexer `method` that is not supported or valid. 

4030 """ 

4031 if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]: 

4032 # in practice the clean_reindex_fill_method call would raise 

4033 # before we get here 

4034 raise ValueError("Invalid fill method") # pragma: no cover 

4035 

4036 if self._is_multi: 

4037 if method == "nearest": 

4038 raise NotImplementedError( 

4039 "method='nearest' not implemented yet " 

4040 "for MultiIndex; see GitHub issue 9365" 

4041 ) 

4042 elif method == "pad" or method == "backfill": 

4043 if tolerance is not None: 

4044 raise NotImplementedError( 

4045 "tolerance not implemented yet for MultiIndex" 

4046 ) 

4047 

4048 if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype): 

4049 # GH#37871 for now this is only for IntervalIndex and CategoricalIndex 

4050 if method is not None: 

4051 raise NotImplementedError( 

4052 f"method {method} not yet implemented for {type(self).__name__}" 

4053 ) 

4054 

4055 if method is None: 

4056 if tolerance is not None: 

4057 raise ValueError( 

4058 "tolerance argument only valid if doing pad, " 

4059 "backfill or nearest reindexing" 

4060 ) 

4061 if limit is not None: 

4062 raise ValueError( 

4063 "limit argument only valid if doing pad, " 

4064 "backfill or nearest reindexing" 

4065 ) 

4066 

4067 def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray: 

4068 # override this method on subclasses 

4069 tolerance = np.asarray(tolerance) 

4070 if target.size != tolerance.size and tolerance.size > 1: 

4071 raise ValueError("list-like tolerance size must match target index size") 

4072 return tolerance 

4073 

4074 @final 

4075 def _get_fill_indexer( 

4076 self, target: Index, method: str_t, limit: int | None = None, tolerance=None 

4077 ) -> npt.NDArray[np.intp]: 

4078 

4079 if self._is_multi: 

4080 # TODO: get_indexer_with_fill docstring says values must be _sorted_ 

4081 # but that doesn't appear to be enforced 

4082 # error: "IndexEngine" has no attribute "get_indexer_with_fill" 

4083 engine = self._engine 

4084 return engine.get_indexer_with_fill( # type: ignore[union-attr] 

4085 target=target._values, values=self._values, method=method, limit=limit 

4086 ) 

4087 

4088 if self.is_monotonic_increasing and target.is_monotonic_increasing: 

4089 target_values = target._get_engine_target() 

4090 own_values = self._get_engine_target() 

4091 if not isinstance(target_values, np.ndarray) or not isinstance( 

4092 own_values, np.ndarray 

4093 ): 

4094 raise NotImplementedError 

4095 

4096 if method == "pad": 

4097 indexer = libalgos.pad(own_values, target_values, limit=limit) 

4098 else: 

4099 # i.e. "backfill" 

4100 indexer = libalgos.backfill(own_values, target_values, limit=limit) 

4101 else: 

4102 indexer = self._get_fill_indexer_searchsorted(target, method, limit) 

4103 if tolerance is not None and len(self): 

4104 indexer = self._filter_indexer_tolerance(target, indexer, tolerance) 

4105 return indexer 

4106 

4107 @final 

4108 def _get_fill_indexer_searchsorted( 

4109 self, target: Index, method: str_t, limit: int | None = None 

4110 ) -> npt.NDArray[np.intp]: 

4111 """ 

4112 Fallback pad/backfill get_indexer that works for monotonic decreasing 

4113 indexes and non-monotonic targets. 

4114 """ 

4115 if limit is not None: 

4116 raise ValueError( 

4117 f"limit argument for {repr(method)} method only well-defined " 

4118 "if index and target are monotonic" 

4119 ) 

4120 

4121 side: Literal["left", "right"] = "left" if method == "pad" else "right" 

4122 

4123 # find exact matches first (this simplifies the algorithm) 

4124 indexer = self.get_indexer(target) 

4125 nonexact = indexer == -1 

4126 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side) 

4127 if side == "left": 

4128 # searchsorted returns "indices into a sorted array such that, 

4129 # if the corresponding elements in v were inserted before the 

4130 # indices, the order of a would be preserved". 

4131 # Thus, we need to subtract 1 to find values to the left. 

4132 indexer[nonexact] -= 1 

4133 # This also mapped not found values (values of 0 from 

4134 # np.searchsorted) to -1, which conveniently is also our 

4135 # sentinel for missing values 

4136 else: 

4137 # Mark indices to the right of the largest value as not found 

4138 indexer[indexer == len(self)] = -1 

4139 return indexer 

4140 

4141 @final 

4142 def _get_nearest_indexer( 

4143 self, target: Index, limit: int | None, tolerance 

4144 ) -> npt.NDArray[np.intp]: 

4145 """ 

4146 Get the indexer for the nearest index labels; requires an index with 

4147 values that can be subtracted from each other (e.g., not strings or 

4148 tuples). 

4149 """ 

4150 if not len(self): 

4151 return self._get_fill_indexer(target, "pad") 

4152 

4153 left_indexer = self.get_indexer(target, "pad", limit=limit) 

4154 right_indexer = self.get_indexer(target, "backfill", limit=limit) 

4155 

4156 left_distances = self._difference_compat(target, left_indexer) 

4157 right_distances = self._difference_compat(target, right_indexer) 

4158 

4159 op = operator.lt if self.is_monotonic_increasing else operator.le 

4160 indexer = np.where( 

4161 # error: Argument 1&2 has incompatible type "Union[ExtensionArray, 

4162 # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE, 

4163 # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]" 

4164 op(left_distances, right_distances) # type: ignore[arg-type] 

4165 | (right_indexer == -1), 

4166 left_indexer, 

4167 right_indexer, 

4168 ) 

4169 if tolerance is not None: 

4170 indexer = self._filter_indexer_tolerance(target, indexer, tolerance) 

4171 return indexer 

4172 

4173 @final 

4174 def _filter_indexer_tolerance( 

4175 self, 

4176 target: Index, 

4177 indexer: npt.NDArray[np.intp], 

4178 tolerance, 

4179 ) -> npt.NDArray[np.intp]: 

4180 

4181 distance = self._difference_compat(target, indexer) 

4182 

4183 return np.where(distance <= tolerance, indexer, -1) 

4184 

4185 @final 

4186 def _difference_compat( 

4187 self, target: Index, indexer: npt.NDArray[np.intp] 

4188 ) -> ArrayLike: 

4189 # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object] 

4190 # of DateOffset objects, which do not support __abs__ (and would be slow 

4191 # if they did) 

4192 

4193 if isinstance(self.dtype, PeriodDtype): 

4194 # Note: we only get here with matching dtypes 

4195 own_values = cast("PeriodArray", self._data)._ndarray 

4196 target_values = cast("PeriodArray", target._data)._ndarray 

4197 diff = own_values[indexer] - target_values 

4198 else: 

4199 # error: Unsupported left operand type for - ("ExtensionArray") 

4200 diff = self._values[indexer] - target._values # type: ignore[operator] 

4201 return abs(diff) 

4202 

4203 # -------------------------------------------------------------------- 

4204 # Indexer Conversion Methods 

4205 

4206 @final 

4207 def _validate_positional_slice(self, key: slice) -> None: 

4208 """ 

4209 For positional indexing, a slice must have either int or None 

4210 for each of start, stop, and step. 

4211 """ 

4212 self._validate_indexer("positional", key.start, "iloc") 

4213 self._validate_indexer("positional", key.stop, "iloc") 

4214 self._validate_indexer("positional", key.step, "iloc") 

4215 

4216 def _convert_slice_indexer(self, key: slice, kind: str_t, is_frame: bool = False): 

4217 """ 

4218 Convert a slice indexer. 

4219 

4220 By definition, these are labels unless 'iloc' is passed in. 

4221 Floats are not allowed as the start, step, or stop of the slice. 

4222 

4223 Parameters 

4224 ---------- 

4225 key : label of the slice bound 

4226 kind : {'loc', 'getitem'} 

4227 is_frame : bool, default False 

4228 Whether this is a slice called on DataFrame.__getitem__ 

4229 as opposed to Series.__getitem__ 

4230 """ 

4231 assert kind in ["loc", "getitem"], kind 

4232 

4233 # potentially cast the bounds to integers 

4234 start, stop, step = key.start, key.stop, key.step 

4235 

4236 # figure out if this is a positional indexer 

4237 def is_int(v): 

4238 return v is None or is_integer(v) 

4239 

4240 is_index_slice = is_int(start) and is_int(stop) and is_int(step) 

4241 

4242 # special case for interval_dtype bc we do not do partial-indexing 

4243 # on integer Intervals when slicing 

4244 # TODO: write this in terms of e.g. should_partial_index? 

4245 ints_are_positional = self._should_fallback_to_positional or is_interval_dtype( 

4246 self.dtype 

4247 ) 

4248 is_positional = is_index_slice and ints_are_positional 

4249 

4250 if kind == "getitem": 

4251 """ 

4252 called from the getitem slicers, validate that we are in fact 

4253 integers 

4254 """ 

4255 if self.is_integer(): 

4256 if is_frame: 

4257 # unambiguously positional, no deprecation 

4258 pass 

4259 elif start is None and stop is None: 

4260 # label-based vs positional is irrelevant 

4261 pass 

4262 elif isinstance(self, ABCRangeIndex) and self._range == range( 

4263 len(self) 

4264 ): 

4265 # In this case there is no difference between label-based 

4266 # and positional, so nothing will change. 

4267 pass 

4268 elif ( 

4269 self.dtype.kind in ["i", "u"] 

4270 and self._is_strictly_monotonic_increasing 

4271 and len(self) > 0 

4272 and self[0] == 0 

4273 and self[-1] == len(self) - 1 

4274 ): 

4275 # We are range-like, e.g. created with Index(np.arange(N)) 

4276 pass 

4277 elif not is_index_slice: 

4278 # we're going to raise, so don't bother warning, e.g. 

4279 # test_integer_positional_indexing 

4280 pass 

4281 else: 

4282 warnings.warn( 

4283 "The behavior of `series[i:j]` with an integer-dtype index " 

4284 "is deprecated. In a future version, this will be treated " 

4285 "as *label-based* indexing, consistent with e.g. `series[i]` " 

4286 "lookups. To retain the old behavior, use `series.iloc[i:j]`. " 

4287 "To get the future behavior, use `series.loc[i:j]`.", 

4288 FutureWarning, 

4289 stacklevel=find_stack_level(), 

4290 ) 

4291 if self.is_integer() or is_index_slice: 

4292 # Note: these checks are redundant if we know is_index_slice 

4293 self._validate_indexer("slice", key.start, "getitem") 

4294 self._validate_indexer("slice", key.stop, "getitem") 

4295 self._validate_indexer("slice", key.step, "getitem") 

4296 return key 

4297 

4298 # convert the slice to an indexer here 

4299 

4300 # if we are mixed and have integers 

4301 if is_positional: 

4302 try: 

4303 # Validate start & stop 

4304 if start is not None: 

4305 self.get_loc(start) 

4306 if stop is not None: 

4307 self.get_loc(stop) 

4308 is_positional = False 

4309 except KeyError: 

4310 pass 

4311 

4312 if com.is_null_slice(key): 

4313 # It doesn't matter if we are positional or label based 

4314 indexer = key 

4315 elif is_positional: 

4316 if kind == "loc": 

4317 # GH#16121, GH#24612, GH#31810 

4318 warnings.warn( 

4319 "Slicing a positional slice with .loc is not supported, " 

4320 "and will raise TypeError in a future version. " 

4321 "Use .loc with labels or .iloc with positions instead.", 

4322 FutureWarning, 

4323 stacklevel=find_stack_level(), 

4324 ) 

4325 indexer = key 

4326 else: 

4327 indexer = self.slice_indexer(start, stop, step) 

4328 

4329 return indexer 

4330 

4331 @final 

4332 def _invalid_indexer(self, form: str_t, key) -> TypeError: 

4333 """ 

4334 Consistent invalid indexer message. 

4335 """ 

4336 return TypeError( 

4337 f"cannot do {form} indexing on {type(self).__name__} with these " 

4338 f"indexers [{key}] of type {type(key).__name__}" 

4339 ) 

4340 

4341 # -------------------------------------------------------------------- 

4342 # Reindex Methods 

4343 

4344 @final 

4345 def _validate_can_reindex(self, indexer: np.ndarray) -> None: 

4346 """ 

4347 Check if we are allowing reindexing with this particular indexer. 

4348 

4349 Parameters 

4350 ---------- 

4351 indexer : an integer ndarray 

4352 

4353 Raises 

4354 ------ 

4355 ValueError if its a duplicate axis 

4356 """ 

4357 # trying to reindex on an axis with duplicates 

4358 if not self._index_as_unique and len(indexer): 

4359 raise ValueError("cannot reindex on an axis with duplicate labels") 

4360 

4361 def reindex( 

4362 self, target, method=None, level=None, limit=None, tolerance=None 

4363 ) -> tuple[Index, npt.NDArray[np.intp] | None]: 

4364 """ 

4365 Create index with target's values. 

4366 

4367 Parameters 

4368 ---------- 

4369 target : an iterable 

4370 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional 

4371 * default: exact matches only. 

4372 * pad / ffill: find the PREVIOUS index value if no exact match. 

4373 * backfill / bfill: use NEXT index value if no exact match 

4374 * nearest: use the NEAREST index value if no exact match. Tied 

4375 distances are broken by preferring the larger index value. 

4376 level : int, optional 

4377 Level of multiindex. 

4378 limit : int, optional 

4379 Maximum number of consecutive labels in ``target`` to match for 

4380 inexact matches. 

4381 tolerance : int or float, optional 

4382 Maximum distance between original and new labels for inexact 

4383 matches. The values of the index at the matching locations must 

4384 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

4385 

4386 Tolerance may be a scalar value, which applies the same tolerance 

4387 to all values, or list-like, which applies variable tolerance per 

4388 element. List-like includes list, tuple, array, Series, and must be 

4389 the same size as the index and its dtype must exactly match the 

4390 index's type. 

4391 

4392 Returns 

4393 ------- 

4394 new_index : pd.Index 

4395 Resulting index. 

4396 indexer : np.ndarray[np.intp] or None 

4397 Indices of output values in original index. 

4398 

4399 Raises 

4400 ------ 

4401 TypeError 

4402 If ``method`` passed along with ``level``. 

4403 ValueError 

4404 If non-unique multi-index 

4405 ValueError 

4406 If non-unique index and ``method`` or ``limit`` passed. 

4407 

4408 See Also 

4409 -------- 

4410 Series.reindex : Conform Series to new index with optional filling logic. 

4411 DataFrame.reindex : Conform DataFrame to new index with optional filling logic. 

4412 

4413 Examples 

4414 -------- 

4415 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) 

4416 >>> idx 

4417 Index(['car', 'bike', 'train', 'tractor'], dtype='object') 

4418 >>> idx.reindex(['car', 'bike']) 

4419 (Index(['car', 'bike'], dtype='object'), array([0, 1])) 

4420 """ 

4421 # GH6552: preserve names when reindexing to non-named target 

4422 # (i.e. neither Index nor Series). 

4423 preserve_names = not hasattr(target, "name") 

4424 

4425 # GH7774: preserve dtype/tz if target is empty and not an Index. 

4426 target = ensure_has_len(target) # target may be an iterator 

4427 

4428 if not isinstance(target, Index) and len(target) == 0: 

4429 if level is not None and self._is_multi: 

4430 # "Index" has no attribute "levels"; maybe "nlevels"? 

4431 idx = self.levels[level] # type: ignore[attr-defined] 

4432 else: 

4433 idx = self 

4434 target = idx[:0] 

4435 else: 

4436 target = ensure_index(target) 

4437 

4438 if level is not None and ( 

4439 isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex) 

4440 ): 

4441 if method is not None: 

4442 raise TypeError("Fill method not supported if level passed") 

4443 

4444 # TODO: tests where passing `keep_order=not self._is_multi` 

4445 # makes a difference for non-MultiIndex case 

4446 target, indexer, _ = self._join_level( 

4447 target, level, how="right", keep_order=not self._is_multi 

4448 ) 

4449 

4450 else: 

4451 if self.equals(target): 

4452 indexer = None 

4453 else: 

4454 if self._index_as_unique: 

4455 indexer = self.get_indexer( 

4456 target, method=method, limit=limit, tolerance=tolerance 

4457 ) 

4458 elif self._is_multi: 

4459 raise ValueError("cannot handle a non-unique multi-index!") 

4460 else: 

4461 if method is not None or limit is not None: 

4462 raise ValueError( 

4463 "cannot reindex a non-unique index " 

4464 "with a method or limit" 

4465 ) 

4466 indexer, _ = self.get_indexer_non_unique(target) 

4467 

4468 if not self.is_unique: 

4469 # GH#42568 

4470 warnings.warn( 

4471 "reindexing with a non-unique Index is deprecated and " 

4472 "will raise in a future version.", 

4473 FutureWarning, 

4474 stacklevel=find_stack_level(), 

4475 ) 

4476 

4477 target = self._wrap_reindex_result(target, indexer, preserve_names) 

4478 return target, indexer 

4479 

4480 def _wrap_reindex_result(self, target, indexer, preserve_names: bool): 

4481 target = self._maybe_preserve_names(target, preserve_names) 

4482 return target 

4483 

4484 def _maybe_preserve_names(self, target: Index, preserve_names: bool): 

4485 if preserve_names and target.nlevels == 1 and target.name != self.name: 

4486 target = target.copy(deep=False) 

4487 target.name = self.name 

4488 return target 

4489 

4490 @final 

4491 def _reindex_non_unique( 

4492 self, target: Index 

4493 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]: 

4494 """ 

4495 Create a new index with target's values (move/add/delete values as 

4496 necessary) use with non-unique Index and a possibly non-unique target. 

4497 

4498 Parameters 

4499 ---------- 

4500 target : an iterable 

4501 

4502 Returns 

4503 ------- 

4504 new_index : pd.Index 

4505 Resulting index. 

4506 indexer : np.ndarray[np.intp] 

4507 Indices of output values in original index. 

4508 new_indexer : np.ndarray[np.intp] or None 

4509 

4510 """ 

4511 target = ensure_index(target) 

4512 if len(target) == 0: 

4513 # GH#13691 

4514 return self[:0], np.array([], dtype=np.intp), None 

4515 

4516 indexer, missing = self.get_indexer_non_unique(target) 

4517 check = indexer != -1 

4518 new_labels = self.take(indexer[check]) 

4519 new_indexer = None 

4520 

4521 if len(missing): 

4522 length = np.arange(len(indexer), dtype=np.intp) 

4523 

4524 missing = ensure_platform_int(missing) 

4525 missing_labels = target.take(missing) 

4526 missing_indexer = length[~check] 

4527 cur_labels = self.take(indexer[check]).values 

4528 cur_indexer = length[check] 

4529 

4530 # Index constructor below will do inference 

4531 new_labels = np.empty((len(indexer),), dtype=object) 

4532 new_labels[cur_indexer] = cur_labels 

4533 new_labels[missing_indexer] = missing_labels 

4534 

4535 # GH#38906 

4536 if not len(self): 

4537 

4538 new_indexer = np.arange(0, dtype=np.intp) 

4539 

4540 # a unique indexer 

4541 elif target.is_unique: 

4542 

4543 # see GH5553, make sure we use the right indexer 

4544 new_indexer = np.arange(len(indexer), dtype=np.intp) 

4545 new_indexer[cur_indexer] = np.arange(len(cur_labels)) 

4546 new_indexer[missing_indexer] = -1 

4547 

4548 # we have a non_unique selector, need to use the original 

4549 # indexer here 

4550 else: 

4551 

4552 # need to retake to have the same size as the indexer 

4553 indexer[~check] = -1 

4554 

4555 # reset the new indexer to account for the new size 

4556 new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp) 

4557 new_indexer[~check] = -1 

4558 

4559 if isinstance(self, ABCMultiIndex): 

4560 new_index = type(self).from_tuples(new_labels, names=self.names) 

4561 else: 

4562 new_index = Index._with_infer(new_labels, name=self.name) 

4563 return new_index, indexer, new_indexer 

4564 

4565 # -------------------------------------------------------------------- 

4566 # Join Methods 

4567 

4568 @overload 

4569 def join( 

4570 self, 

4571 other: Index, 

4572 *, 

4573 how: str_t = ..., 

4574 level: Level = ..., 

4575 return_indexers: Literal[True], 

4576 sort: bool = ..., 

4577 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4578 ... 

4579 

4580 @overload 

4581 def join( 

4582 self, 

4583 other: Index, 

4584 *, 

4585 how: str_t = ..., 

4586 level: Level = ..., 

4587 return_indexers: Literal[False] = ..., 

4588 sort: bool = ..., 

4589 ) -> Index: 

4590 ... 

4591 

4592 @overload 

4593 def join( 

4594 self, 

4595 other: Index, 

4596 *, 

4597 how: str_t = ..., 

4598 level: Level = ..., 

4599 return_indexers: bool = ..., 

4600 sort: bool = ..., 

4601 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4602 ... 

4603 

4604 @final 

4605 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "other"]) 

4606 @_maybe_return_indexers 

4607 def join( 

4608 self, 

4609 other: Index, 

4610 how: str_t = "left", 

4611 level: Level = None, 

4612 return_indexers: bool = False, 

4613 sort: bool = False, 

4614 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4615 """ 

4616 Compute join_index and indexers to conform data structures to the new index. 

4617 

4618 Parameters 

4619 ---------- 

4620 other : Index 

4621 how : {'left', 'right', 'inner', 'outer'} 

4622 level : int or level name, default None 

4623 return_indexers : bool, default False 

4624 sort : bool, default False 

4625 Sort the join keys lexicographically in the result Index. If False, 

4626 the order of the join keys depends on the join type (how keyword). 

4627 

4628 Returns 

4629 ------- 

4630 join_index, (left_indexer, right_indexer) 

4631 """ 

4632 other = ensure_index(other) 

4633 

4634 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): 

4635 if (self.tz is None) ^ (other.tz is None): 

4636 # Raise instead of casting to object below. 

4637 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") 

4638 

4639 if not self._is_multi and not other._is_multi: 

4640 # We have specific handling for MultiIndex below 

4641 pself, pother = self._maybe_promote(other) 

4642 if pself is not self or pother is not other: 

4643 return pself.join( 

4644 pother, how=how, level=level, return_indexers=True, sort=sort 

4645 ) 

4646 

4647 lindexer: np.ndarray | None 

4648 rindexer: np.ndarray | None 

4649 

4650 # try to figure out the join level 

4651 # GH3662 

4652 if level is None and (self._is_multi or other._is_multi): 

4653 

4654 # have the same levels/names so a simple join 

4655 if self.names == other.names: 

4656 pass 

4657 else: 

4658 return self._join_multi(other, how=how) 

4659 

4660 # join on the level 

4661 if level is not None and (self._is_multi or other._is_multi): 

4662 return self._join_level(other, level, how=how) 

4663 

4664 if len(other) == 0: 

4665 if how in ("left", "outer"): 

4666 join_index = self._view() 

4667 rindexer = np.broadcast_to(np.intp(-1), len(join_index)) 

4668 return join_index, None, rindexer 

4669 elif how in ("right", "inner", "cross"): 

4670 join_index = other._view() 

4671 lindexer = np.array([]) 

4672 return join_index, lindexer, None 

4673 

4674 if len(self) == 0: 

4675 if how in ("right", "outer"): 

4676 join_index = other._view() 

4677 lindexer = np.broadcast_to(np.intp(-1), len(join_index)) 

4678 return join_index, lindexer, None 

4679 elif how in ("left", "inner", "cross"): 

4680 join_index = self._view() 

4681 rindexer = np.array([]) 

4682 return join_index, None, rindexer 

4683 

4684 if self._join_precedence < other._join_precedence: 

4685 how = {"right": "left", "left": "right"}.get(how, how) 

4686 join_index, lidx, ridx = other.join( 

4687 self, how=how, level=level, return_indexers=True 

4688 ) 

4689 lidx, ridx = ridx, lidx 

4690 return join_index, lidx, ridx 

4691 

4692 if not is_dtype_equal(self.dtype, other.dtype): 

4693 dtype = self._find_common_type_compat(other) 

4694 this = self.astype(dtype, copy=False) 

4695 other = other.astype(dtype, copy=False) 

4696 return this.join(other, how=how, return_indexers=True) 

4697 

4698 _validate_join_method(how) 

4699 

4700 if not self.is_unique and not other.is_unique: 

4701 return self._join_non_unique(other, how=how) 

4702 elif not self.is_unique or not other.is_unique: 

4703 if self.is_monotonic_increasing and other.is_monotonic_increasing: 

4704 if not is_interval_dtype(self.dtype): 

4705 # otherwise we will fall through to _join_via_get_indexer 

4706 # GH#39133 

4707 # go through object dtype for ea till engine is supported properly 

4708 return self._join_monotonic(other, how=how) 

4709 else: 

4710 return self._join_non_unique(other, how=how) 

4711 elif ( 

4712 self.is_monotonic_increasing 

4713 and other.is_monotonic_increasing 

4714 and self._can_use_libjoin 

4715 and ( 

4716 not isinstance(self, ABCMultiIndex) 

4717 or not any(is_categorical_dtype(dtype) for dtype in self.dtypes) 

4718 ) 

4719 and not is_categorical_dtype(self.dtype) 

4720 ): 

4721 # Categorical is monotonic if data are ordered as categories, but join can 

4722 # not handle this in case of not lexicographically monotonic GH#38502 

4723 try: 

4724 return self._join_monotonic(other, how=how) 

4725 except TypeError: 

4726 # object dtype; non-comparable objects 

4727 pass 

4728 

4729 return self._join_via_get_indexer(other, how, sort) 

4730 

4731 @final 

4732 def _join_via_get_indexer( 

4733 self, other: Index, how: str_t, sort: bool 

4734 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4735 # Fallback if we do not have any fastpaths available based on 

4736 # uniqueness/monotonicity 

4737 

4738 # Note: at this point we have checked matching dtypes 

4739 

4740 if how == "left": 

4741 join_index = self 

4742 elif how == "right": 

4743 join_index = other 

4744 elif how == "inner": 

4745 # TODO: sort=False here for backwards compat. It may 

4746 # be better to use the sort parameter passed into join 

4747 join_index = self.intersection(other, sort=False) 

4748 elif how == "outer": 

4749 # TODO: sort=True here for backwards compat. It may 

4750 # be better to use the sort parameter passed into join 

4751 join_index = self.union(other) 

4752 

4753 if sort: 

4754 join_index = join_index.sort_values() 

4755 

4756 if join_index is self: 

4757 lindexer = None 

4758 else: 

4759 lindexer = self.get_indexer_for(join_index) 

4760 if join_index is other: 

4761 rindexer = None 

4762 else: 

4763 rindexer = other.get_indexer_for(join_index) 

4764 return join_index, lindexer, rindexer 

4765 

4766 @final 

4767 def _join_multi(self, other: Index, how: str_t): 

4768 from pandas.core.indexes.multi import MultiIndex 

4769 from pandas.core.reshape.merge import restore_dropped_levels_multijoin 

4770 

4771 # figure out join names 

4772 self_names_list = list(com.not_none(*self.names)) 

4773 other_names_list = list(com.not_none(*other.names)) 

4774 self_names_order = self_names_list.index 

4775 other_names_order = other_names_list.index 

4776 self_names = set(self_names_list) 

4777 other_names = set(other_names_list) 

4778 overlap = self_names & other_names 

4779 

4780 # need at least 1 in common 

4781 if not overlap: 

4782 raise ValueError("cannot join with no overlapping index names") 

4783 

4784 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): 

4785 

4786 # Drop the non-matching levels from left and right respectively 

4787 ldrop_names = sorted(self_names - overlap, key=self_names_order) 

4788 rdrop_names = sorted(other_names - overlap, key=other_names_order) 

4789 

4790 # if only the order differs 

4791 if not len(ldrop_names + rdrop_names): 

4792 self_jnlevels = self 

4793 other_jnlevels = other.reorder_levels(self.names) 

4794 else: 

4795 self_jnlevels = self.droplevel(ldrop_names) 

4796 other_jnlevels = other.droplevel(rdrop_names) 

4797 

4798 # Join left and right 

4799 # Join on same leveled multi-index frames is supported 

4800 join_idx, lidx, ridx = self_jnlevels.join( 

4801 other_jnlevels, how=how, return_indexers=True 

4802 ) 

4803 

4804 # Restore the dropped levels 

4805 # Returned index level order is 

4806 # common levels, ldrop_names, rdrop_names 

4807 dropped_names = ldrop_names + rdrop_names 

4808 

4809 # error: Argument 5/6 to "restore_dropped_levels_multijoin" has 

4810 # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any 

4811 # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]" 

4812 levels, codes, names = restore_dropped_levels_multijoin( 

4813 self, 

4814 other, 

4815 dropped_names, 

4816 join_idx, 

4817 lidx, # type: ignore[arg-type] 

4818 ridx, # type: ignore[arg-type] 

4819 ) 

4820 

4821 # Re-create the multi-index 

4822 multi_join_idx = MultiIndex( 

4823 levels=levels, codes=codes, names=names, verify_integrity=False 

4824 ) 

4825 

4826 multi_join_idx = multi_join_idx.remove_unused_levels() 

4827 

4828 return multi_join_idx, lidx, ridx 

4829 

4830 jl = list(overlap)[0] 

4831 

4832 # Case where only one index is multi 

4833 # make the indices into mi's that match 

4834 flip_order = False 

4835 if isinstance(self, MultiIndex): 

4836 self, other = other, self 

4837 flip_order = True 

4838 # flip if join method is right or left 

4839 how = {"right": "left", "left": "right"}.get(how, how) 

4840 

4841 level = other.names.index(jl) 

4842 result = self._join_level(other, level, how=how) 

4843 

4844 if flip_order: 

4845 return result[0], result[2], result[1] 

4846 return result 

4847 

4848 @final 

4849 def _join_non_unique( 

4850 self, other: Index, how: str_t = "left" 

4851 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

4852 from pandas.core.reshape.merge import get_join_indexers 

4853 

4854 # We only get here if dtypes match 

4855 assert self.dtype == other.dtype 

4856 

4857 left_idx, right_idx = get_join_indexers( 

4858 [self._values], [other._values], how=how, sort=True 

4859 ) 

4860 mask = left_idx == -1 

4861 

4862 join_array = self._values.take(left_idx) 

4863 right = other._values.take(right_idx) 

4864 

4865 if isinstance(join_array, np.ndarray): 

4866 # error: Argument 3 to "putmask" has incompatible type 

4867 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected 

4868 # "Union[_SupportsArray[dtype[Any]], _NestedSequence[ 

4869 # _SupportsArray[dtype[Any]]], bool, int, float, complex, 

4870 # str, bytes, _NestedSequence[Union[bool, int, float, 

4871 # complex, str, bytes]]]" 

4872 np.putmask(join_array, mask, right) # type: ignore[arg-type] 

4873 else: 

4874 join_array._putmask(mask, right) 

4875 

4876 join_index = self._wrap_joined_index(join_array, other) 

4877 

4878 return join_index, left_idx, right_idx 

4879 

4880 @final 

4881 def _join_level( 

4882 self, other: Index, level, how: str_t = "left", keep_order: bool = True 

4883 ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4884 """ 

4885 The join method *only* affects the level of the resulting 

4886 MultiIndex. Otherwise it just exactly aligns the Index data to the 

4887 labels of the level in the MultiIndex. 

4888 

4889 If ```keep_order == True```, the order of the data indexed by the 

4890 MultiIndex will not be changed; otherwise, it will tie out 

4891 with `other`. 

4892 """ 

4893 from pandas.core.indexes.multi import MultiIndex 

4894 

4895 def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: 

4896 """ 

4897 Returns sorter for the inner most level while preserving the 

4898 order of higher levels. 

4899 

4900 Parameters 

4901 ---------- 

4902 labels : list[np.ndarray] 

4903 Each ndarray has signed integer dtype, not necessarily identical. 

4904 

4905 Returns 

4906 ------- 

4907 np.ndarray[np.intp] 

4908 """ 

4909 if labels[0].size == 0: 

4910 return np.empty(0, dtype=np.intp) 

4911 

4912 if len(labels) == 1: 

4913 return get_group_index_sorter(ensure_platform_int(labels[0])) 

4914 

4915 # find indexers of beginning of each set of 

4916 # same-key labels w.r.t all but last level 

4917 tic = labels[0][:-1] != labels[0][1:] 

4918 for lab in labels[1:-1]: 

4919 tic |= lab[:-1] != lab[1:] 

4920 

4921 starts = np.hstack(([True], tic, [True])).nonzero()[0] 

4922 lab = ensure_int64(labels[-1]) 

4923 return lib.get_level_sorter(lab, ensure_platform_int(starts)) 

4924 

4925 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): 

4926 raise TypeError("Join on level between two MultiIndex objects is ambiguous") 

4927 

4928 left, right = self, other 

4929 

4930 flip_order = not isinstance(self, MultiIndex) 

4931 if flip_order: 

4932 left, right = right, left 

4933 how = {"right": "left", "left": "right"}.get(how, how) 

4934 

4935 assert isinstance(left, MultiIndex) 

4936 

4937 level = left._get_level_number(level) 

4938 old_level = left.levels[level] 

4939 

4940 if not right.is_unique: 

4941 raise NotImplementedError( 

4942 "Index._join_level on non-unique index is not implemented" 

4943 ) 

4944 

4945 new_level, left_lev_indexer, right_lev_indexer = old_level.join( 

4946 right, how=how, return_indexers=True 

4947 ) 

4948 

4949 if left_lev_indexer is None: 

4950 if keep_order or len(left) == 0: 

4951 left_indexer = None 

4952 join_index = left 

4953 else: # sort the leaves 

4954 left_indexer = _get_leaf_sorter(left.codes[: level + 1]) 

4955 join_index = left[left_indexer] 

4956 

4957 else: 

4958 left_lev_indexer = ensure_platform_int(left_lev_indexer) 

4959 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) 

4960 old_codes = left.codes[level] 

4961 

4962 taker = old_codes[old_codes != -1] 

4963 new_lev_codes = rev_indexer.take(taker) 

4964 

4965 new_codes = list(left.codes) 

4966 new_codes[level] = new_lev_codes 

4967 

4968 new_levels = list(left.levels) 

4969 new_levels[level] = new_level 

4970 

4971 if keep_order: # just drop missing values. o.w. keep order 

4972 left_indexer = np.arange(len(left), dtype=np.intp) 

4973 left_indexer = cast(np.ndarray, left_indexer) 

4974 mask = new_lev_codes != -1 

4975 if not mask.all(): 

4976 new_codes = [lab[mask] for lab in new_codes] 

4977 left_indexer = left_indexer[mask] 

4978 

4979 else: # tie out the order with other 

4980 if level == 0: # outer most level, take the fast route 

4981 max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max() 

4982 ngroups = 1 + max_new_lev 

4983 left_indexer, counts = libalgos.groupsort_indexer( 

4984 new_lev_codes, ngroups 

4985 ) 

4986 

4987 # missing values are placed first; drop them! 

4988 left_indexer = left_indexer[counts[0] :] 

4989 new_codes = [lab[left_indexer] for lab in new_codes] 

4990 

4991 else: # sort the leaves 

4992 mask = new_lev_codes != -1 

4993 mask_all = mask.all() 

4994 if not mask_all: 

4995 new_codes = [lab[mask] for lab in new_codes] 

4996 

4997 left_indexer = _get_leaf_sorter(new_codes[: level + 1]) 

4998 new_codes = [lab[left_indexer] for lab in new_codes] 

4999 

5000 # left_indexers are w.r.t masked frame. 

5001 # reverse to original frame! 

5002 if not mask_all: 

5003 left_indexer = mask.nonzero()[0][left_indexer] 

5004 

5005 join_index = MultiIndex( 

5006 levels=new_levels, 

5007 codes=new_codes, 

5008 names=left.names, 

5009 verify_integrity=False, 

5010 ) 

5011 

5012 if right_lev_indexer is not None: 

5013 right_indexer = right_lev_indexer.take(join_index.codes[level]) 

5014 else: 

5015 right_indexer = join_index.codes[level] 

5016 

5017 if flip_order: 

5018 left_indexer, right_indexer = right_indexer, left_indexer 

5019 

5020 left_indexer = ( 

5021 None if left_indexer is None else ensure_platform_int(left_indexer) 

5022 ) 

5023 right_indexer = ( 

5024 None if right_indexer is None else ensure_platform_int(right_indexer) 

5025 ) 

5026 return join_index, left_indexer, right_indexer 

5027 

5028 @final 

5029 def _join_monotonic( 

5030 self, other: Index, how: str_t = "left" 

5031 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

5032 # We only get here with matching dtypes and both monotonic increasing 

5033 assert other.dtype == self.dtype 

5034 

5035 if self.equals(other): 

5036 ret_index = other if how == "right" else self 

5037 return ret_index, None, None 

5038 

5039 ridx: np.ndarray | None 

5040 lidx: np.ndarray | None 

5041 

5042 if self.is_unique and other.is_unique: 

5043 # We can perform much better than the general case 

5044 if how == "left": 

5045 join_index = self 

5046 lidx = None 

5047 ridx = self._left_indexer_unique(other) 

5048 elif how == "right": 

5049 join_index = other 

5050 lidx = other._left_indexer_unique(self) 

5051 ridx = None 

5052 elif how == "inner": 

5053 join_array, lidx, ridx = self._inner_indexer(other) 

5054 join_index = self._wrap_joined_index(join_array, other) 

5055 elif how == "outer": 

5056 join_array, lidx, ridx = self._outer_indexer(other) 

5057 join_index = self._wrap_joined_index(join_array, other) 

5058 else: 

5059 if how == "left": 

5060 join_array, lidx, ridx = self._left_indexer(other) 

5061 elif how == "right": 

5062 join_array, ridx, lidx = other._left_indexer(self) 

5063 elif how == "inner": 

5064 join_array, lidx, ridx = self._inner_indexer(other) 

5065 elif how == "outer": 

5066 join_array, lidx, ridx = self._outer_indexer(other) 

5067 

5068 join_index = self._wrap_joined_index(join_array, other) 

5069 

5070 lidx = None if lidx is None else ensure_platform_int(lidx) 

5071 ridx = None if ridx is None else ensure_platform_int(ridx) 

5072 return join_index, lidx, ridx 

5073 

5074 def _wrap_joined_index(self: _IndexT, joined: ArrayLike, other: _IndexT) -> _IndexT: 

5075 assert other.dtype == self.dtype 

5076 

5077 if isinstance(self, ABCMultiIndex): 

5078 name = self.names if self.names == other.names else None 

5079 # error: Incompatible return value type (got "MultiIndex", 

5080 # expected "_IndexT") 

5081 return self._constructor(joined, name=name) # type: ignore[return-value] 

5082 else: 

5083 name = get_op_result_name(self, other) 

5084 return self._constructor._with_infer(joined, name=name, dtype=self.dtype) 

5085 

5086 @cache_readonly 

5087 def _can_use_libjoin(self) -> bool: 

5088 """ 

5089 Whether we can use the fastpaths implement in _libs.join 

5090 """ 

5091 if type(self) is Index: 

5092 # excludes EAs 

5093 return isinstance(self.dtype, np.dtype) 

5094 return not is_interval_dtype(self.dtype) 

5095 

5096 # -------------------------------------------------------------------- 

5097 # Uncategorized Methods 

5098 

5099 @property 

5100 def values(self) -> ArrayLike: 

5101 """ 

5102 Return an array representing the data in the Index. 

5103 

5104 .. warning:: 

5105 

5106 We recommend using :attr:`Index.array` or 

5107 :meth:`Index.to_numpy`, depending on whether you need 

5108 a reference to the underlying data or a NumPy array. 

5109 

5110 Returns 

5111 ------- 

5112 array: numpy.ndarray or ExtensionArray 

5113 

5114 See Also 

5115 -------- 

5116 Index.array : Reference to the underlying data. 

5117 Index.to_numpy : A NumPy array representing the underlying data. 

5118 """ 

5119 return self._data 

5120 

5121 # error: Decorated property not supported 

5122 # https://github.com/python/mypy/issues/1362 

5123 @cache_readonly # type: ignore[misc] 

5124 @doc(IndexOpsMixin.array) 

5125 def array(self) -> ExtensionArray: 

5126 array = self._data 

5127 if isinstance(array, np.ndarray): 

5128 from pandas.core.arrays.numpy_ import PandasArray 

5129 

5130 array = PandasArray(array) 

5131 return array 

5132 

5133 @property 

5134 def _values(self) -> ExtensionArray | np.ndarray: 

5135 """ 

5136 The best array representation. 

5137 

5138 This is an ndarray or ExtensionArray. 

5139 

5140 ``_values`` are consistent between ``Series`` and ``Index``. 

5141 

5142 It may differ from the public '.values' method. 

5143 

5144 index | values | _values | 

5145 ----------------- | --------------- | ------------- | 

5146 Index | ndarray | ndarray | 

5147 CategoricalIndex | Categorical | Categorical | 

5148 DatetimeIndex | ndarray[M8ns] | DatetimeArray | 

5149 DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray | 

5150 PeriodIndex | ndarray[object] | PeriodArray | 

5151 IntervalIndex | IntervalArray | IntervalArray | 

5152 

5153 See Also 

5154 -------- 

5155 values : Values 

5156 """ 

5157 return self._data 

5158 

5159 def _get_engine_target(self) -> ArrayLike: 

5160 """ 

5161 Get the ndarray or ExtensionArray that we can pass to the IndexEngine 

5162 constructor. 

5163 """ 

5164 vals = self._values 

5165 if isinstance(vals, StringArray): 

5166 # GH#45652 much more performant than ExtensionEngine 

5167 return vals._ndarray 

5168 if type(self) is Index and isinstance(self._values, ExtensionArray): 

5169 # TODO(ExtensionIndex): remove special-case, just use self._values 

5170 return self._values.astype(object) 

5171 return vals 

5172 

5173 def _from_join_target(self, result: np.ndarray) -> ArrayLike: 

5174 """ 

5175 Cast the ndarray returned from one of the libjoin.foo_indexer functions 

5176 back to type(self)._data. 

5177 """ 

5178 return result 

5179 

5180 @doc(IndexOpsMixin._memory_usage) 

5181 def memory_usage(self, deep: bool = False) -> int: 

5182 result = self._memory_usage(deep=deep) 

5183 

5184 # include our engine hashtable 

5185 result += self._engine.sizeof(deep=deep) 

5186 return result 

5187 

5188 @final 

5189 def where(self, cond, other=None) -> Index: 

5190 """ 

5191 Replace values where the condition is False. 

5192 

5193 The replacement is taken from other. 

5194 

5195 Parameters 

5196 ---------- 

5197 cond : bool array-like with the same length as self 

5198 Condition to select the values on. 

5199 other : scalar, or array-like, default None 

5200 Replacement if the condition is False. 

5201 

5202 Returns 

5203 ------- 

5204 pandas.Index 

5205 A copy of self with values replaced from other 

5206 where the condition is False. 

5207 

5208 See Also 

5209 -------- 

5210 Series.where : Same method for Series. 

5211 DataFrame.where : Same method for DataFrame. 

5212 

5213 Examples 

5214 -------- 

5215 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) 

5216 >>> idx 

5217 Index(['car', 'bike', 'train', 'tractor'], dtype='object') 

5218 >>> idx.where(idx.isin(['car', 'train']), 'other') 

5219 Index(['car', 'other', 'train', 'other'], dtype='object') 

5220 """ 

5221 if isinstance(self, ABCMultiIndex): 

5222 raise NotImplementedError( 

5223 ".where is not supported for MultiIndex operations" 

5224 ) 

5225 cond = np.asarray(cond, dtype=bool) 

5226 return self.putmask(~cond, other) 

5227 

5228 # construction helpers 

5229 @final 

5230 @classmethod 

5231 def _scalar_data_error(cls, data): 

5232 # We return the TypeError so that we can raise it from the constructor 

5233 # in order to keep mypy happy 

5234 return TypeError( 

5235 f"{cls.__name__}(...) must be called with a collection of some " 

5236 f"kind, {repr(data)} was passed" 

5237 ) 

5238 

5239 @final 

5240 @classmethod 

5241 def _string_data_error(cls, data): 

5242 raise TypeError( 

5243 "String dtype not supported, you may need " 

5244 "to explicitly cast to a numeric type" 

5245 ) 

5246 

5247 def _validate_fill_value(self, value): 

5248 """ 

5249 Check if the value can be inserted into our array without casting, 

5250 and convert it to an appropriate native type if necessary. 

5251 

5252 Raises 

5253 ------ 

5254 TypeError 

5255 If the value cannot be inserted into an array of this dtype. 

5256 """ 

5257 dtype = self.dtype 

5258 if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]: 

5259 # return np_can_hold_element(dtype, value) 

5260 try: 

5261 return np_can_hold_element(dtype, value) 

5262 except LossySetitemError as err: 

5263 # re-raise as TypeError for consistency 

5264 raise TypeError from err 

5265 elif not can_hold_element(self._values, value): 

5266 raise TypeError 

5267 return value 

5268 

5269 @final 

5270 def _require_scalar(self, value): 

5271 """ 

5272 Check that this is a scalar value that we can use for setitem-like 

5273 operations without changing dtype. 

5274 """ 

5275 if not is_scalar(value): 

5276 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") 

5277 return value 

5278 

5279 def _is_memory_usage_qualified(self) -> bool: 

5280 """ 

5281 Return a boolean if we need a qualified .info display. 

5282 """ 

5283 return self.is_object() 

5284 

5285 def is_type_compatible(self, kind: str_t) -> bool: 

5286 """ 

5287 Whether the index type is compatible with the provided type. 

5288 """ 

5289 warnings.warn( 

5290 "Index.is_type_compatible is deprecated and will be removed in a " 

5291 "future version.", 

5292 FutureWarning, 

5293 stacklevel=find_stack_level(), 

5294 ) 

5295 return kind == self.inferred_type 

5296 

5297 def __contains__(self, key: Any) -> bool: 

5298 """ 

5299 Return a boolean indicating whether the provided key is in the index. 

5300 

5301 Parameters 

5302 ---------- 

5303 key : label 

5304 The key to check if it is present in the index. 

5305 

5306 Returns 

5307 ------- 

5308 bool 

5309 Whether the key search is in the index. 

5310 

5311 Raises 

5312 ------ 

5313 TypeError 

5314 If the key is not hashable. 

5315 

5316 See Also 

5317 -------- 

5318 Index.isin : Returns an ndarray of boolean dtype indicating whether the 

5319 list-like key is in the index. 

5320 

5321 Examples 

5322 -------- 

5323 >>> idx = pd.Index([1, 2, 3, 4]) 

5324 >>> idx 

5325 Int64Index([1, 2, 3, 4], dtype='int64') 

5326 

5327 >>> 2 in idx 

5328 True 

5329 >>> 6 in idx 

5330 False 

5331 """ 

5332 hash(key) 

5333 try: 

5334 return key in self._engine 

5335 except (OverflowError, TypeError, ValueError): 

5336 return False 

5337 

5338 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 

5339 # Incompatible types in assignment (expression has type "None", base class 

5340 # "object" defined the type as "Callable[[object], int]") 

5341 __hash__: ClassVar[None] # type: ignore[assignment] 

5342 

5343 @final 

5344 def __setitem__(self, key, value): 

5345 raise TypeError("Index does not support mutable operations") 

5346 

5347 def __getitem__(self, key): 

5348 """ 

5349 Override numpy.ndarray's __getitem__ method to work as desired. 

5350 

5351 This function adds lists and Series as valid boolean indexers 

5352 (ndarrays only supports ndarray with dtype=bool). 

5353 

5354 If resulting ndim != 1, plain ndarray is returned instead of 

5355 corresponding `Index` subclass. 

5356 

5357 """ 

5358 getitem = self._data.__getitem__ 

5359 

5360 if is_integer(key) or is_float(key): 

5361 # GH#44051 exclude bool, which would return a 2d ndarray 

5362 key = com.cast_scalar_indexer(key, warn_float=True) 

5363 return getitem(key) 

5364 

5365 if isinstance(key, slice): 

5366 # This case is separated from the conditional above to avoid 

5367 # pessimization com.is_bool_indexer and ndim checks. 

5368 result = getitem(key) 

5369 # Going through simple_new for performance. 

5370 return type(self)._simple_new(result, name=self._name) 

5371 

5372 if com.is_bool_indexer(key): 

5373 # if we have list[bools, length=1e5] then doing this check+convert 

5374 # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__ 

5375 # time below from 3.8 ms to 496 µs 

5376 # if we already have ndarray[bool], the overhead is 1.4 µs or .25% 

5377 if is_extension_array_dtype(getattr(key, "dtype", None)): 

5378 key = key.to_numpy(dtype=bool, na_value=False) 

5379 else: 

5380 key = np.asarray(key, dtype=bool) 

5381 

5382 result = getitem(key) 

5383 # Because we ruled out integer above, we always get an arraylike here 

5384 if result.ndim > 1: 

5385 deprecate_ndim_indexing(result) 

5386 if hasattr(result, "_ndarray"): 

5387 # i.e. NDArrayBackedExtensionArray 

5388 # Unpack to ndarray for MPL compat 

5389 # error: Item "ndarray[Any, Any]" of 

5390 # "Union[ExtensionArray, ndarray[Any, Any]]" 

5391 # has no attribute "_ndarray" 

5392 return result._ndarray # type: ignore[union-attr] 

5393 return result 

5394 

5395 # NB: Using _constructor._simple_new would break if MultiIndex 

5396 # didn't override __getitem__ 

5397 return self._constructor._simple_new(result, name=self._name) 

5398 

5399 def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT: 

5400 """ 

5401 Fastpath for __getitem__ when we know we have a slice. 

5402 """ 

5403 res = self._data[slobj] 

5404 return type(self)._simple_new(res, name=self._name) 

5405 

5406 @final 

5407 def _can_hold_identifiers_and_holds_name(self, name) -> bool: 

5408 """ 

5409 Faster check for ``name in self`` when we know `name` is a Python 

5410 identifier (e.g. in NDFrame.__getattr__, which hits this to support 

5411 . key lookup). For indexes that can't hold identifiers (everything 

5412 but object & categorical) we just return False. 

5413 

5414 https://github.com/pandas-dev/pandas/issues/19764 

5415 """ 

5416 if self.is_object() or is_string_dtype(self.dtype) or self.is_categorical(): 

5417 return name in self 

5418 return False 

5419 

5420 def append(self, other: Index | Sequence[Index]) -> Index: 

5421 """ 

5422 Append a collection of Index options together. 

5423 

5424 Parameters 

5425 ---------- 

5426 other : Index or list/tuple of indices 

5427 

5428 Returns 

5429 ------- 

5430 Index 

5431 """ 

5432 to_concat = [self] 

5433 

5434 if isinstance(other, (list, tuple)): 

5435 to_concat += list(other) 

5436 else: 

5437 # error: Argument 1 to "append" of "list" has incompatible type 

5438 # "Union[Index, Sequence[Index]]"; expected "Index" 

5439 to_concat.append(other) # type: ignore[arg-type] 

5440 

5441 for obj in to_concat: 

5442 if not isinstance(obj, Index): 

5443 raise TypeError("all inputs must be Index") 

5444 

5445 names = {obj.name for obj in to_concat} 

5446 name = None if len(names) > 1 else self.name 

5447 

5448 return self._concat(to_concat, name) 

5449 

5450 def _concat(self, to_concat: list[Index], name: Hashable) -> Index: 

5451 """ 

5452 Concatenate multiple Index objects. 

5453 """ 

5454 to_concat_vals = [x._values for x in to_concat] 

5455 

5456 result = concat_compat(to_concat_vals) 

5457 

5458 is_numeric = result.dtype.kind in ["i", "u", "f"] 

5459 if self._is_backward_compat_public_numeric_index and is_numeric: 

5460 return type(self)._simple_new(result, name=name) 

5461 

5462 return Index._with_infer(result, name=name) 

5463 

5464 @final 

5465 def putmask(self, mask, value) -> Index: 

5466 """ 

5467 Return a new Index of the values set with the mask. 

5468 

5469 Returns 

5470 ------- 

5471 Index 

5472 

5473 See Also 

5474 -------- 

5475 numpy.ndarray.putmask : Changes elements of an array 

5476 based on conditional and input values. 

5477 """ 

5478 mask, noop = validate_putmask(self._values, mask) 

5479 if noop: 

5480 return self.copy() 

5481 

5482 if self.dtype != object and is_valid_na_for_dtype(value, self.dtype): 

5483 # e.g. None -> np.nan, see also Block._standardize_fill_value 

5484 value = self._na_value 

5485 try: 

5486 converted = self._validate_fill_value(value) 

5487 except (LossySetitemError, ValueError, TypeError) as err: 

5488 if is_object_dtype(self): # pragma: no cover 

5489 raise err 

5490 

5491 dtype = self._find_common_type_compat(value) 

5492 return self.astype(dtype).putmask(mask, value) 

5493 

5494 values = self._values.copy() 

5495 

5496 if isinstance(values, np.ndarray): 

5497 converted = setitem_datetimelike_compat(values, mask.sum(), converted) 

5498 np.putmask(values, mask, converted) 

5499 

5500 else: 

5501 # Note: we use the original value here, not converted, as 

5502 # _validate_fill_value is not idempotent 

5503 values._putmask(mask, value) 

5504 

5505 return self._shallow_copy(values) 

5506 

5507 def equals(self, other: Any) -> bool: 

5508 """ 

5509 Determine if two Index object are equal. 

5510 

5511 The things that are being compared are: 

5512 

5513 * The elements inside the Index object. 

5514 * The order of the elements inside the Index object. 

5515 

5516 Parameters 

5517 ---------- 

5518 other : Any 

5519 The other object to compare against. 

5520 

5521 Returns 

5522 ------- 

5523 bool 

5524 True if "other" is an Index and it has the same elements and order 

5525 as the calling index; False otherwise. 

5526 

5527 Examples 

5528 -------- 

5529 >>> idx1 = pd.Index([1, 2, 3]) 

5530 >>> idx1 

5531 Int64Index([1, 2, 3], dtype='int64') 

5532 >>> idx1.equals(pd.Index([1, 2, 3])) 

5533 True 

5534 

5535 The elements inside are compared 

5536 

5537 >>> idx2 = pd.Index(["1", "2", "3"]) 

5538 >>> idx2 

5539 Index(['1', '2', '3'], dtype='object') 

5540 

5541 >>> idx1.equals(idx2) 

5542 False 

5543 

5544 The order is compared 

5545 

5546 >>> ascending_idx = pd.Index([1, 2, 3]) 

5547 >>> ascending_idx 

5548 Int64Index([1, 2, 3], dtype='int64') 

5549 >>> descending_idx = pd.Index([3, 2, 1]) 

5550 >>> descending_idx 

5551 Int64Index([3, 2, 1], dtype='int64') 

5552 >>> ascending_idx.equals(descending_idx) 

5553 False 

5554 

5555 The dtype is *not* compared 

5556 

5557 >>> int64_idx = pd.Index([1, 2, 3], dtype='int64') 

5558 >>> int64_idx 

5559 Int64Index([1, 2, 3], dtype='int64') 

5560 >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64') 

5561 >>> uint64_idx 

5562 UInt64Index([1, 2, 3], dtype='uint64') 

5563 >>> int64_idx.equals(uint64_idx) 

5564 True 

5565 """ 

5566 if self.is_(other): 

5567 return True 

5568 

5569 if not isinstance(other, Index): 

5570 return False 

5571 

5572 if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype): 

5573 # if other is not object, use other's logic for coercion 

5574 return other.equals(self) 

5575 

5576 if isinstance(other, ABCMultiIndex): 

5577 # d-level MultiIndex can equal d-tuple Index 

5578 return other.equals(self) 

5579 

5580 if isinstance(self._values, ExtensionArray): 

5581 # Dispatch to the ExtensionArray's .equals method. 

5582 if not isinstance(other, type(self)): 

5583 return False 

5584 

5585 earr = cast(ExtensionArray, self._data) 

5586 return earr.equals(other._data) 

5587 

5588 if is_extension_array_dtype(other.dtype): 

5589 # All EA-backed Index subclasses override equals 

5590 return other.equals(self) 

5591 

5592 return array_equivalent(self._values, other._values) 

5593 

5594 @final 

5595 def identical(self, other) -> bool: 

5596 """ 

5597 Similar to equals, but checks that object attributes and types are also equal. 

5598 

5599 Returns 

5600 ------- 

5601 bool 

5602 If two Index objects have equal elements and same type True, 

5603 otherwise False. 

5604 """ 

5605 return ( 

5606 self.equals(other) 

5607 and all( 

5608 getattr(self, c, None) == getattr(other, c, None) 

5609 for c in self._comparables 

5610 ) 

5611 and type(self) == type(other) 

5612 ) 

5613 

5614 @final 

5615 def asof(self, label): 

5616 """ 

5617 Return the label from the index, or, if not present, the previous one. 

5618 

5619 Assuming that the index is sorted, return the passed index label if it 

5620 is in the index, or return the previous index label if the passed one 

5621 is not in the index. 

5622 

5623 Parameters 

5624 ---------- 

5625 label : object 

5626 The label up to which the method returns the latest index label. 

5627 

5628 Returns 

5629 ------- 

5630 object 

5631 The passed label if it is in the index. The previous label if the 

5632 passed label is not in the sorted index or `NaN` if there is no 

5633 such label. 

5634 

5635 See Also 

5636 -------- 

5637 Series.asof : Return the latest value in a Series up to the 

5638 passed index. 

5639 merge_asof : Perform an asof merge (similar to left join but it 

5640 matches on nearest key rather than equal key). 

5641 Index.get_loc : An `asof` is a thin wrapper around `get_loc` 

5642 with method='pad'. 

5643 

5644 Examples 

5645 -------- 

5646 `Index.asof` returns the latest index label up to the passed label. 

5647 

5648 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03']) 

5649 >>> idx.asof('2014-01-01') 

5650 '2013-12-31' 

5651 

5652 If the label is in the index, the method returns the passed label. 

5653 

5654 >>> idx.asof('2014-01-02') 

5655 '2014-01-02' 

5656 

5657 If all of the labels in the index are later than the passed label, 

5658 NaN is returned. 

5659 

5660 >>> idx.asof('1999-01-02') 

5661 nan 

5662 

5663 If the index is not sorted, an error is raised. 

5664 

5665 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02', 

5666 ... '2014-01-03']) 

5667 >>> idx_not_sorted.asof('2013-12-31') 

5668 Traceback (most recent call last): 

5669 ValueError: index must be monotonic increasing or decreasing 

5670 """ 

5671 self._searchsorted_monotonic(label) # validate sortedness 

5672 try: 

5673 loc = self.get_loc(label) 

5674 except (KeyError, TypeError): 

5675 # KeyError -> No exact match, try for padded 

5676 # TypeError -> passed e.g. non-hashable, fall through to get 

5677 # the tested exception message 

5678 indexer = self.get_indexer([label], method="pad") 

5679 if indexer.ndim > 1 or indexer.size > 1: 

5680 raise TypeError("asof requires scalar valued input") 

5681 loc = indexer.item() 

5682 if loc == -1: 

5683 return self._na_value 

5684 else: 

5685 if isinstance(loc, slice): 

5686 loc = loc.indices(len(self))[-1] 

5687 

5688 return self[loc] 

5689 

5690 def asof_locs( 

5691 self, where: Index, mask: npt.NDArray[np.bool_] 

5692 ) -> npt.NDArray[np.intp]: 

5693 """ 

5694 Return the locations (indices) of labels in the index. 

5695 

5696 As in the `asof` function, if the label (a particular entry in 

5697 `where`) is not in the index, the latest index label up to the 

5698 passed label is chosen and its index returned. 

5699 

5700 If all of the labels in the index are later than a label in `where`, 

5701 -1 is returned. 

5702 

5703 `mask` is used to ignore NA values in the index during calculation. 

5704 

5705 Parameters 

5706 ---------- 

5707 where : Index 

5708 An Index consisting of an array of timestamps. 

5709 mask : np.ndarray[bool] 

5710 Array of booleans denoting where values in the original 

5711 data are not NA. 

5712 

5713 Returns 

5714 ------- 

5715 np.ndarray[np.intp] 

5716 An array of locations (indices) of the labels from the Index 

5717 which correspond to the return values of the `asof` function 

5718 for every element in `where`. 

5719 """ 

5720 # error: No overload variant of "searchsorted" of "ndarray" matches argument 

5721 # types "Union[ExtensionArray, ndarray[Any, Any]]", "str" 

5722 # TODO: will be fixed when ExtensionArray.searchsorted() is fixed 

5723 locs = self._values[mask].searchsorted( 

5724 where._values, side="right" # type: ignore[call-overload] 

5725 ) 

5726 locs = np.where(locs > 0, locs - 1, 0) 

5727 

5728 result = np.arange(len(self), dtype=np.intp)[mask].take(locs) 

5729 

5730 first_value = self._values[mask.argmax()] 

5731 result[(locs == 0) & (where._values < first_value)] = -1 

5732 

5733 return result 

5734 

5735 def sort_values( 

5736 self, 

5737 return_indexer: bool = False, 

5738 ascending: bool = True, 

5739 na_position: str_t = "last", 

5740 key: Callable | None = None, 

5741 ): 

5742 """ 

5743 Return a sorted copy of the index. 

5744 

5745 Return a sorted copy of the index, and optionally return the indices 

5746 that sorted the index itself. 

5747 

5748 Parameters 

5749 ---------- 

5750 return_indexer : bool, default False 

5751 Should the indices that would sort the index be returned. 

5752 ascending : bool, default True 

5753 Should the index values be sorted in an ascending order. 

5754 na_position : {'first' or 'last'}, default 'last' 

5755 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at 

5756 the end. 

5757 

5758 .. versionadded:: 1.2.0 

5759 

5760 key : callable, optional 

5761 If not None, apply the key function to the index values 

5762 before sorting. This is similar to the `key` argument in the 

5763 builtin :meth:`sorted` function, with the notable difference that 

5764 this `key` function should be *vectorized*. It should expect an 

5765 ``Index`` and return an ``Index`` of the same shape. 

5766 

5767 .. versionadded:: 1.1.0 

5768 

5769 Returns 

5770 ------- 

5771 sorted_index : pandas.Index 

5772 Sorted copy of the index. 

5773 indexer : numpy.ndarray, optional 

5774 The indices that the index itself was sorted by. 

5775 

5776 See Also 

5777 -------- 

5778 Series.sort_values : Sort values of a Series. 

5779 DataFrame.sort_values : Sort values in a DataFrame. 

5780 

5781 Examples 

5782 -------- 

5783 >>> idx = pd.Index([10, 100, 1, 1000]) 

5784 >>> idx 

5785 Int64Index([10, 100, 1, 1000], dtype='int64') 

5786 

5787 Sort values in ascending order (default behavior). 

5788 

5789 >>> idx.sort_values() 

5790 Int64Index([1, 10, 100, 1000], dtype='int64') 

5791 

5792 Sort values in descending order, and also get the indices `idx` was 

5793 sorted by. 

5794 

5795 >>> idx.sort_values(ascending=False, return_indexer=True) 

5796 (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) 

5797 """ 

5798 idx = ensure_key_mapped(self, key) 

5799 

5800 # GH 35584. Sort missing values according to na_position kwarg 

5801 # ignore na_position for MultiIndex 

5802 if not isinstance(self, ABCMultiIndex): 

5803 _as = nargsort( 

5804 items=idx, ascending=ascending, na_position=na_position, key=key 

5805 ) 

5806 else: 

5807 _as = idx.argsort() 

5808 if not ascending: 

5809 _as = _as[::-1] 

5810 

5811 sorted_index = self.take(_as) 

5812 

5813 if return_indexer: 

5814 return sorted_index, _as 

5815 else: 

5816 return sorted_index 

5817 

5818 @final 

5819 def sort(self, *args, **kwargs): 

5820 """ 

5821 Use sort_values instead. 

5822 """ 

5823 raise TypeError("cannot sort an Index object in-place, use sort_values instead") 

5824 

5825 def shift(self, periods=1, freq=None): 

5826 """ 

5827 Shift index by desired number of time frequency increments. 

5828 

5829 This method is for shifting the values of datetime-like indexes 

5830 by a specified time increment a given number of times. 

5831 

5832 Parameters 

5833 ---------- 

5834 periods : int, default 1 

5835 Number of periods (or increments) to shift by, 

5836 can be positive or negative. 

5837 freq : pandas.DateOffset, pandas.Timedelta or str, optional 

5838 Frequency increment to shift by. 

5839 If None, the index is shifted by its own `freq` attribute. 

5840 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. 

5841 

5842 Returns 

5843 ------- 

5844 pandas.Index 

5845 Shifted index. 

5846 

5847 See Also 

5848 -------- 

5849 Series.shift : Shift values of Series. 

5850 

5851 Notes 

5852 ----- 

5853 This method is only implemented for datetime-like index classes, 

5854 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex. 

5855 

5856 Examples 

5857 -------- 

5858 Put the first 5 month starts of 2011 into an index. 

5859 

5860 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS') 

5861 >>> month_starts 

5862 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01', 

5863 '2011-05-01'], 

5864 dtype='datetime64[ns]', freq='MS') 

5865 

5866 Shift the index by 10 days. 

5867 

5868 >>> month_starts.shift(10, freq='D') 

5869 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11', 

5870 '2011-05-11'], 

5871 dtype='datetime64[ns]', freq=None) 

5872 

5873 The default value of `freq` is the `freq` attribute of the index, 

5874 which is 'MS' (month start) in this example. 

5875 

5876 >>> month_starts.shift(10) 

5877 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01', 

5878 '2012-03-01'], 

5879 dtype='datetime64[ns]', freq='MS') 

5880 """ 

5881 raise NotImplementedError( 

5882 f"This method is only implemented for DatetimeIndex, PeriodIndex and " 

5883 f"TimedeltaIndex; Got type {type(self).__name__}" 

5884 ) 

5885 

5886 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: 

5887 """ 

5888 Return the integer indices that would sort the index. 

5889 

5890 Parameters 

5891 ---------- 

5892 *args 

5893 Passed to `numpy.ndarray.argsort`. 

5894 **kwargs 

5895 Passed to `numpy.ndarray.argsort`. 

5896 

5897 Returns 

5898 ------- 

5899 np.ndarray[np.intp] 

5900 Integer indices that would sort the index if used as 

5901 an indexer. 

5902 

5903 See Also 

5904 -------- 

5905 numpy.argsort : Similar method for NumPy arrays. 

5906 Index.sort_values : Return sorted copy of Index. 

5907 

5908 Examples 

5909 -------- 

5910 >>> idx = pd.Index(['b', 'a', 'd', 'c']) 

5911 >>> idx 

5912 Index(['b', 'a', 'd', 'c'], dtype='object') 

5913 

5914 >>> order = idx.argsort() 

5915 >>> order 

5916 array([1, 0, 3, 2]) 

5917 

5918 >>> idx[order] 

5919 Index(['a', 'b', 'c', 'd'], dtype='object') 

5920 """ 

5921 # This works for either ndarray or EA, is overridden 

5922 # by RangeIndex, MultIIndex 

5923 return self._data.argsort(*args, **kwargs) 

5924 

5925 @final 

5926 def get_value(self, series: Series, key): 

5927 """ 

5928 Fast lookup of value from 1-dimensional ndarray. 

5929 

5930 Only use this if you know what you're doing. 

5931 

5932 Returns 

5933 ------- 

5934 scalar or Series 

5935 """ 

5936 warnings.warn( 

5937 "get_value is deprecated and will be removed in a future version. " 

5938 "Use Series[key] instead.", 

5939 FutureWarning, 

5940 stacklevel=find_stack_level(), 

5941 ) 

5942 

5943 self._check_indexing_error(key) 

5944 

5945 try: 

5946 # GH 20882, 21257 

5947 # First try to convert the key to a location 

5948 # If that fails, raise a KeyError if an integer 

5949 # index, otherwise, see if key is an integer, and 

5950 # try that 

5951 loc = self.get_loc(key) 

5952 except KeyError: 

5953 if not self._should_fallback_to_positional: 

5954 raise 

5955 elif is_integer(key): 

5956 # If the Index cannot hold integer, then this is unambiguously 

5957 # a locational lookup. 

5958 loc = key 

5959 else: 

5960 raise 

5961 

5962 return self._get_values_for_loc(series, loc, key) 

5963 

5964 def _check_indexing_error(self, key): 

5965 if not is_scalar(key): 

5966 # if key is not a scalar, directly raise an error (the code below 

5967 # would convert to numpy arrays and raise later any way) - GH29926 

5968 raise InvalidIndexError(key) 

5969 

5970 @cache_readonly 

5971 def _should_fallback_to_positional(self) -> bool: 

5972 """ 

5973 Should an integer key be treated as positional? 

5974 """ 

5975 return not self.holds_integer() 

5976 

5977 def _get_values_for_loc(self, series: Series, loc, key): 

5978 """ 

5979 Do a positional lookup on the given Series, returning either a scalar 

5980 or a Series. 

5981 

5982 Assumes that `series.index is self` 

5983 

5984 key is included for MultiIndex compat. 

5985 """ 

5986 if is_integer(loc): 

5987 return series._values[loc] 

5988 

5989 return series.iloc[loc] 

5990 

5991 @final 

5992 def set_value(self, arr, key, value) -> None: 

5993 """ 

5994 Fast lookup of value from 1-dimensional ndarray. 

5995 

5996 .. deprecated:: 1.0 

5997 

5998 Notes 

5999 ----- 

6000 Only use this if you know what you're doing. 

6001 """ 

6002 warnings.warn( 

6003 ( 

6004 "The 'set_value' method is deprecated, and " 

6005 "will be removed in a future version." 

6006 ), 

6007 FutureWarning, 

6008 stacklevel=find_stack_level(), 

6009 ) 

6010 loc = self._engine.get_loc(key) 

6011 if not can_hold_element(arr, value): 

6012 raise ValueError 

6013 arr[loc] = value 

6014 

6015 _index_shared_docs[ 

6016 "get_indexer_non_unique" 

6017 ] = """ 

6018 Compute indexer and mask for new index given the current index. 

6019 

6020 The indexer should be then used as an input to ndarray.take to align the 

6021 current data to the new index. 

6022 

6023 Parameters 

6024 ---------- 

6025 target : %(target_klass)s 

6026 

6027 Returns 

6028 ------- 

6029 indexer : np.ndarray[np.intp] 

6030 Integers from 0 to n - 1 indicating that the index at these 

6031 positions matches the corresponding target values. Missing values 

6032 in the target are marked by -1. 

6033 missing : np.ndarray[np.intp] 

6034 An indexer into the target of the values not found. 

6035 These correspond to the -1 in the indexer array. 

6036 """ 

6037 

6038 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) 

6039 def get_indexer_non_unique( 

6040 self, target 

6041 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

6042 target = ensure_index(target) 

6043 target = self._maybe_cast_listlike_indexer(target) 

6044 

6045 if not self._should_compare(target) and not is_interval_dtype(self.dtype): 

6046 # IntervalIndex get special treatment bc numeric scalars can be 

6047 # matched to Interval scalars 

6048 return self._get_indexer_non_comparable(target, method=None, unique=False) 

6049 

6050 pself, ptarget = self._maybe_promote(target) 

6051 if pself is not self or ptarget is not target: 

6052 return pself.get_indexer_non_unique(ptarget) 

6053 

6054 if not is_dtype_equal(self.dtype, target.dtype): 

6055 # TODO: if object, could use infer_dtype to preempt costly 

6056 # conversion if still non-comparable? 

6057 dtype = self._find_common_type_compat(target) 

6058 

6059 this = self.astype(dtype, copy=False) 

6060 that = target.astype(dtype, copy=False) 

6061 return this.get_indexer_non_unique(that) 

6062 

6063 # Note: _maybe_promote ensures we never get here with MultiIndex 

6064 # self and non-Multi target 

6065 tgt_values = target._get_engine_target() 

6066 if self._is_multi and target._is_multi: 

6067 engine = self._engine 

6068 # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has 

6069 # no attribute "_extract_level_codes" 

6070 tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr] 

6071 

6072 indexer, missing = self._engine.get_indexer_non_unique(tgt_values) 

6073 return ensure_platform_int(indexer), ensure_platform_int(missing) 

6074 

6075 @final 

6076 def get_indexer_for(self, target) -> npt.NDArray[np.intp]: 

6077 """ 

6078 Guaranteed return of an indexer even when non-unique. 

6079 

6080 This dispatches to get_indexer or get_indexer_non_unique 

6081 as appropriate. 

6082 

6083 Returns 

6084 ------- 

6085 np.ndarray[np.intp] 

6086 List of indices. 

6087 

6088 Examples 

6089 -------- 

6090 >>> idx = pd.Index([np.nan, 'var1', np.nan]) 

6091 >>> idx.get_indexer_for([np.nan]) 

6092 array([0, 2]) 

6093 """ 

6094 if self._index_as_unique: 

6095 return self.get_indexer(target) 

6096 indexer, _ = self.get_indexer_non_unique(target) 

6097 return indexer 

6098 

6099 def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]: 

6100 """ 

6101 Analogue to get_indexer that raises if any elements are missing. 

6102 """ 

6103 keyarr = key 

6104 if not isinstance(keyarr, Index): 

6105 keyarr = com.asarray_tuplesafe(keyarr) 

6106 

6107 if self._index_as_unique: 

6108 indexer = self.get_indexer_for(keyarr) 

6109 keyarr = self.reindex(keyarr)[0] 

6110 else: 

6111 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr) 

6112 

6113 self._raise_if_missing(keyarr, indexer, axis_name) 

6114 

6115 keyarr = self.take(indexer) 

6116 if isinstance(key, Index): 

6117 # GH 42790 - Preserve name from an Index 

6118 keyarr.name = key.name 

6119 if keyarr.dtype.kind in ["m", "M"]: 

6120 # DTI/TDI.take can infer a freq in some cases when we dont want one 

6121 if isinstance(key, list) or ( 

6122 isinstance(key, type(self)) 

6123 # "Index" has no attribute "freq" 

6124 and key.freq is None # type: ignore[attr-defined] 

6125 ): 

6126 keyarr = keyarr._with_freq(None) 

6127 

6128 return keyarr, indexer 

6129 

6130 def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None: 

6131 """ 

6132 Check that indexer can be used to return a result. 

6133 

6134 e.g. at least one element was found, 

6135 unless the list of keys was actually empty. 

6136 

6137 Parameters 

6138 ---------- 

6139 key : list-like 

6140 Targeted labels (only used to show correct error message). 

6141 indexer: array-like of booleans 

6142 Indices corresponding to the key, 

6143 (with -1 indicating not found). 

6144 axis_name : str 

6145 

6146 Raises 

6147 ------ 

6148 KeyError 

6149 If at least one key was requested but none was found. 

6150 """ 

6151 if len(key) == 0: 

6152 return 

6153 

6154 # Count missing values 

6155 missing_mask = indexer < 0 

6156 nmissing = missing_mask.sum() 

6157 

6158 if nmissing: 

6159 

6160 # TODO: remove special-case; this is just to keep exception 

6161 # message tests from raising while debugging 

6162 use_interval_msg = is_interval_dtype(self.dtype) or ( 

6163 is_categorical_dtype(self.dtype) 

6164 # "Index" has no attribute "categories" [attr-defined] 

6165 and is_interval_dtype( 

6166 self.categories.dtype # type: ignore[attr-defined] 

6167 ) 

6168 ) 

6169 

6170 if nmissing == len(indexer): 

6171 if use_interval_msg: 

6172 key = list(key) 

6173 raise KeyError(f"None of [{key}] are in the [{axis_name}]") 

6174 

6175 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique()) 

6176 raise KeyError(f"{not_found} not in index") 

6177 

6178 @overload 

6179 def _get_indexer_non_comparable( 

6180 self, target: Index, method, unique: Literal[True] = ... 

6181 ) -> npt.NDArray[np.intp]: 

6182 ... 

6183 

6184 @overload 

6185 def _get_indexer_non_comparable( 

6186 self, target: Index, method, unique: Literal[False] 

6187 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

6188 ... 

6189 

6190 @overload 

6191 def _get_indexer_non_comparable( 

6192 self, target: Index, method, unique: bool = True 

6193 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

6194 ... 

6195 

6196 @final 

6197 def _get_indexer_non_comparable( 

6198 self, target: Index, method, unique: bool = True 

6199 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

6200 """ 

6201 Called from get_indexer or get_indexer_non_unique when the target 

6202 is of a non-comparable dtype. 

6203 

6204 For get_indexer lookups with method=None, get_indexer is an _equality_ 

6205 check, so non-comparable dtypes mean we will always have no matches. 

6206 

6207 For get_indexer lookups with a method, get_indexer is an _inequality_ 

6208 check, so non-comparable dtypes mean we will always raise TypeError. 

6209 

6210 Parameters 

6211 ---------- 

6212 target : Index 

6213 method : str or None 

6214 unique : bool, default True 

6215 * True if called from get_indexer. 

6216 * False if called from get_indexer_non_unique. 

6217 

6218 Raises 

6219 ------ 

6220 TypeError 

6221 If doing an inequality check, i.e. method is not None. 

6222 """ 

6223 if method is not None: 

6224 other = unpack_nested_dtype(target) 

6225 raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}") 

6226 

6227 no_matches = -1 * np.ones(target.shape, dtype=np.intp) 

6228 if unique: 

6229 # This is for get_indexer 

6230 return no_matches 

6231 else: 

6232 # This is for get_indexer_non_unique 

6233 missing = np.arange(len(target), dtype=np.intp) 

6234 return no_matches, missing 

6235 

6236 @property 

6237 def _index_as_unique(self) -> bool: 

6238 """ 

6239 Whether we should treat this as unique for the sake of 

6240 get_indexer vs get_indexer_non_unique. 

6241 

6242 For IntervalIndex compat. 

6243 """ 

6244 return self.is_unique 

6245 

6246 _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects" 

6247 

6248 @final 

6249 def _maybe_promote(self, other: Index) -> tuple[Index, Index]: 

6250 """ 

6251 When dealing with an object-dtype Index and a non-object Index, see 

6252 if we can upcast the object-dtype one to improve performance. 

6253 """ 

6254 

6255 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): 

6256 if ( 

6257 self.tz is not None 

6258 and other.tz is not None 

6259 and not tz_compare(self.tz, other.tz) 

6260 ): 

6261 # standardize on UTC 

6262 return self.tz_convert("UTC"), other.tz_convert("UTC") 

6263 

6264 elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex): 

6265 try: 

6266 return type(other)(self), other 

6267 except OutOfBoundsDatetime: 

6268 return self, other 

6269 elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex): 

6270 # TODO: we dont have tests that get here 

6271 return type(other)(self), other 

6272 

6273 elif self.dtype.kind == "u" and other.dtype.kind == "i": 

6274 # GH#41873 

6275 if other.min() >= 0: 

6276 # lookup min as it may be cached 

6277 # TODO: may need itemsize check if we have non-64-bit Indexes 

6278 return self, other.astype(self.dtype) 

6279 

6280 elif self._is_multi and not other._is_multi: 

6281 try: 

6282 # "Type[Index]" has no attribute "from_tuples" 

6283 other = type(self).from_tuples(other) # type: ignore[attr-defined] 

6284 except (TypeError, ValueError): 

6285 # let's instead try with a straight Index 

6286 self = Index(self._values) 

6287 

6288 if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype): 

6289 # Reverse op so we dont need to re-implement on the subclasses 

6290 other, self = other._maybe_promote(self) 

6291 

6292 return self, other 

6293 

6294 @final 

6295 def _find_common_type_compat(self, target) -> DtypeObj: 

6296 """ 

6297 Implementation of find_common_type that adjusts for Index-specific 

6298 special cases. 

6299 """ 

6300 if is_valid_na_for_dtype(target, self.dtype): 

6301 # e.g. setting NA value into IntervalArray[int64] 

6302 dtype = ensure_dtype_can_hold_na(self.dtype) 

6303 if is_dtype_equal(self.dtype, dtype): 

6304 raise NotImplementedError( 

6305 "This should not be reached. Please report a bug at " 

6306 "github.com/pandas-dev/pandas" 

6307 ) 

6308 return dtype 

6309 

6310 target_dtype, _ = infer_dtype_from(target, pandas_dtype=True) 

6311 

6312 # special case: if one dtype is uint64 and the other a signed int, return object 

6313 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion 

6314 # Now it's: 

6315 # * float | [u]int -> float 

6316 # * uint64 | signed int -> object 

6317 # We may change union(float | [u]int) to go to object. 

6318 if self.dtype == "uint64" or target_dtype == "uint64": 

6319 if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype( 

6320 target_dtype 

6321 ): 

6322 return _dtype_obj 

6323 

6324 dtype = find_common_type([self.dtype, target_dtype]) 

6325 dtype = common_dtype_categorical_compat([self, target], dtype) 

6326 return dtype 

6327 

6328 @final 

6329 def _should_compare(self, other: Index) -> bool: 

6330 """ 

6331 Check if `self == other` can ever have non-False entries. 

6332 """ 

6333 

6334 if (other.is_boolean() and self.is_numeric()) or ( 

6335 self.is_boolean() and other.is_numeric() 

6336 ): 

6337 # GH#16877 Treat boolean labels passed to a numeric index as not 

6338 # found. Without this fix False and True would be treated as 0 and 1 

6339 # respectively. 

6340 return False 

6341 

6342 other = unpack_nested_dtype(other) 

6343 dtype = other.dtype 

6344 return self._is_comparable_dtype(dtype) or is_object_dtype(dtype) 

6345 

6346 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

6347 """ 

6348 Can we compare values of the given dtype to our own? 

6349 """ 

6350 if self.dtype.kind == "b": 

6351 return dtype.kind == "b" 

6352 elif is_numeric_dtype(self.dtype): 

6353 return is_numeric_dtype(dtype) 

6354 return True 

6355 

6356 @final 

6357 def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]: 

6358 """ 

6359 Group the index labels by a given array of values. 

6360 

6361 Parameters 

6362 ---------- 

6363 values : array 

6364 Values used to determine the groups. 

6365 

6366 Returns 

6367 ------- 

6368 dict 

6369 {group name -> group labels} 

6370 """ 

6371 # TODO: if we are a MultiIndex, we can do better 

6372 # that converting to tuples 

6373 if isinstance(values, ABCMultiIndex): 

6374 values = values._values 

6375 values = Categorical(values) 

6376 result = values._reverse_indexer() 

6377 

6378 # map to the label 

6379 result = {k: self.take(v) for k, v in result.items()} 

6380 

6381 return PrettyDict(result) 

6382 

6383 def map(self, mapper, na_action=None): 

6384 """ 

6385 Map values using an input mapping or function. 

6386 

6387 Parameters 

6388 ---------- 

6389 mapper : function, dict, or Series 

6390 Mapping correspondence. 

6391 na_action : {None, 'ignore'} 

6392 If 'ignore', propagate NA values, without passing them to the 

6393 mapping correspondence. 

6394 

6395 Returns 

6396 ------- 

6397 applied : Union[Index, MultiIndex], inferred 

6398 The output of the mapping function applied to the index. 

6399 If the function returns a tuple with more than one element 

6400 a MultiIndex will be returned. 

6401 """ 

6402 from pandas.core.indexes.multi import MultiIndex 

6403 

6404 new_values = self._map_values(mapper, na_action=na_action) 

6405 

6406 # we can return a MultiIndex 

6407 if new_values.size and isinstance(new_values[0], tuple): 

6408 if isinstance(self, MultiIndex): 

6409 names = self.names 

6410 elif self.name: 

6411 names = [self.name] * len(new_values[0]) 

6412 else: 

6413 names = None 

6414 return MultiIndex.from_tuples(new_values, names=names) 

6415 

6416 dtype = None 

6417 if not new_values.size: 

6418 # empty 

6419 dtype = self.dtype 

6420 

6421 # e.g. if we are floating and new_values is all ints, then we 

6422 # don't want to cast back to floating. But if we are UInt64 

6423 # and new_values is all ints, we want to try. 

6424 same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type 

6425 if same_dtype: 

6426 new_values = maybe_cast_pointwise_result( 

6427 new_values, self.dtype, same_dtype=same_dtype 

6428 ) 

6429 

6430 if self._is_backward_compat_public_numeric_index and is_numeric_dtype( 

6431 new_values.dtype 

6432 ): 

6433 return self._constructor( 

6434 new_values, dtype=dtype, copy=False, name=self.name 

6435 ) 

6436 

6437 return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name) 

6438 

6439 # TODO: De-duplicate with map, xref GH#32349 

6440 @final 

6441 def _transform_index(self, func, *, level=None) -> Index: 

6442 """ 

6443 Apply function to all values found in index. 

6444 

6445 This includes transforming multiindex entries separately. 

6446 Only apply function to one level of the MultiIndex if level is specified. 

6447 """ 

6448 if isinstance(self, ABCMultiIndex): 

6449 if level is not None: 

6450 # Caller is responsible for ensuring level is positional. 

6451 items = [ 

6452 tuple(func(y) if i == level else y for i, y in enumerate(x)) 

6453 for x in self 

6454 ] 

6455 else: 

6456 items = [tuple(func(y) for y in x) for x in self] 

6457 return type(self).from_tuples(items, names=self.names) 

6458 else: 

6459 items = [func(x) for x in self] 

6460 return Index(items, name=self.name, tupleize_cols=False) 

6461 

6462 def isin(self, values, level=None) -> npt.NDArray[np.bool_]: 

6463 """ 

6464 Return a boolean array where the index values are in `values`. 

6465 

6466 Compute boolean array of whether each index value is found in the 

6467 passed set of values. The length of the returned boolean array matches 

6468 the length of the index. 

6469 

6470 Parameters 

6471 ---------- 

6472 values : set or list-like 

6473 Sought values. 

6474 level : str or int, optional 

6475 Name or position of the index level to use (if the index is a 

6476 `MultiIndex`). 

6477 

6478 Returns 

6479 ------- 

6480 np.ndarray[bool] 

6481 NumPy array of boolean values. 

6482 

6483 See Also 

6484 -------- 

6485 Series.isin : Same for Series. 

6486 DataFrame.isin : Same method for DataFrames. 

6487 

6488 Notes 

6489 ----- 

6490 In the case of `MultiIndex` you must either specify `values` as a 

6491 list-like object containing tuples that are the same length as the 

6492 number of levels, or specify `level`. Otherwise it will raise a 

6493 ``ValueError``. 

6494 

6495 If `level` is specified: 

6496 

6497 - if it is the name of one *and only one* index level, use that level; 

6498 - otherwise it should be a number indicating level position. 

6499 

6500 Examples 

6501 -------- 

6502 >>> idx = pd.Index([1,2,3]) 

6503 >>> idx 

6504 Int64Index([1, 2, 3], dtype='int64') 

6505 

6506 Check whether each index value in a list of values. 

6507 

6508 >>> idx.isin([1, 4]) 

6509 array([ True, False, False]) 

6510 

6511 >>> midx = pd.MultiIndex.from_arrays([[1,2,3], 

6512 ... ['red', 'blue', 'green']], 

6513 ... names=('number', 'color')) 

6514 >>> midx 

6515 MultiIndex([(1, 'red'), 

6516 (2, 'blue'), 

6517 (3, 'green')], 

6518 names=['number', 'color']) 

6519 

6520 Check whether the strings in the 'color' level of the MultiIndex 

6521 are in a list of colors. 

6522 

6523 >>> midx.isin(['red', 'orange', 'yellow'], level='color') 

6524 array([ True, False, False]) 

6525 

6526 To check across the levels of a MultiIndex, pass a list of tuples: 

6527 

6528 >>> midx.isin([(1, 'red'), (3, 'red')]) 

6529 array([ True, False, False]) 

6530 

6531 For a DatetimeIndex, string values in `values` are converted to 

6532 Timestamps. 

6533 

6534 >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13'] 

6535 >>> dti = pd.to_datetime(dates) 

6536 >>> dti 

6537 DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'], 

6538 dtype='datetime64[ns]', freq=None) 

6539 

6540 >>> dti.isin(['2000-03-11']) 

6541 array([ True, False, False]) 

6542 """ 

6543 if level is not None: 

6544 self._validate_index_level(level) 

6545 return algos.isin(self._values, values) 

6546 

6547 def _get_string_slice(self, key: str_t): 

6548 # this is for partial string indexing, 

6549 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex 

6550 raise NotImplementedError 

6551 

6552 def slice_indexer( 

6553 self, 

6554 start: Hashable | None = None, 

6555 end: Hashable | None = None, 

6556 step: int | None = None, 

6557 kind=no_default, 

6558 ) -> slice: 

6559 """ 

6560 Compute the slice indexer for input labels and step. 

6561 

6562 Index needs to be ordered and unique. 

6563 

6564 Parameters 

6565 ---------- 

6566 start : label, default None 

6567 If None, defaults to the beginning. 

6568 end : label, default None 

6569 If None, defaults to the end. 

6570 step : int, default None 

6571 kind : str, default None 

6572 

6573 .. deprecated:: 1.4.0 

6574 

6575 Returns 

6576 ------- 

6577 indexer : slice 

6578 

6579 Raises 

6580 ------ 

6581 KeyError : If key does not exist, or key is not unique and index is 

6582 not ordered. 

6583 

6584 Notes 

6585 ----- 

6586 This function assumes that the data is sorted, so use at your own peril 

6587 

6588 Examples 

6589 -------- 

6590 This is a method on all index types. For example you can do: 

6591 

6592 >>> idx = pd.Index(list('abcd')) 

6593 >>> idx.slice_indexer(start='b', end='c') 

6594 slice(1, 3, None) 

6595 

6596 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')]) 

6597 >>> idx.slice_indexer(start='b', end=('c', 'g')) 

6598 slice(1, 3, None) 

6599 """ 

6600 self._deprecated_arg(kind, "kind", "slice_indexer") 

6601 

6602 start_slice, end_slice = self.slice_locs(start, end, step=step) 

6603 

6604 # return a slice 

6605 if not is_scalar(start_slice): 

6606 raise AssertionError("Start slice bound is non-scalar") 

6607 if not is_scalar(end_slice): 

6608 raise AssertionError("End slice bound is non-scalar") 

6609 

6610 return slice(start_slice, end_slice, step) 

6611 

6612 def _maybe_cast_indexer(self, key): 

6613 """ 

6614 If we have a float key and are not a floating index, then try to cast 

6615 to an int if equivalent. 

6616 """ 

6617 return key 

6618 

6619 def _maybe_cast_listlike_indexer(self, target) -> Index: 

6620 """ 

6621 Analogue to maybe_cast_indexer for get_indexer instead of get_loc. 

6622 """ 

6623 return ensure_index(target) 

6624 

6625 @final 

6626 def _validate_indexer(self, form: str_t, key, kind: str_t): 

6627 """ 

6628 If we are positional indexer, validate that we have appropriate 

6629 typed bounds must be an integer. 

6630 """ 

6631 assert kind in ["getitem", "iloc"] 

6632 

6633 if key is not None and not is_integer(key): 

6634 raise self._invalid_indexer(form, key) 

6635 

6636 def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default): 

6637 """ 

6638 This function should be overloaded in subclasses that allow non-trivial 

6639 casting on label-slice bounds, e.g. datetime-like indices allowing 

6640 strings containing formatted datetimes. 

6641 

6642 Parameters 

6643 ---------- 

6644 label : object 

6645 side : {'left', 'right'} 

6646 kind : {'loc', 'getitem'} or None 

6647 

6648 .. deprecated:: 1.3.0 

6649 

6650 Returns 

6651 ------- 

6652 label : object 

6653 

6654 Notes 

6655 ----- 

6656 Value of `side` parameter should be validated in caller. 

6657 """ 

6658 assert kind in ["loc", "getitem", None, no_default] 

6659 self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") 

6660 

6661 # We are a plain index here (sub-class override this method if they 

6662 # wish to have special treatment for floats/ints, e.g. Float64Index and 

6663 # datetimelike Indexes 

6664 # reject them, if index does not contain label 

6665 if (is_float(label) or is_integer(label)) and label not in self: 

6666 raise self._invalid_indexer("slice", label) 

6667 

6668 return label 

6669 

6670 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): 

6671 if self.is_monotonic_increasing: 

6672 return self.searchsorted(label, side=side) 

6673 elif self.is_monotonic_decreasing: 

6674 # np.searchsorted expects ascending sort order, have to reverse 

6675 # everything for it to work (element ordering, search side and 

6676 # resulting value). 

6677 pos = self[::-1].searchsorted( 

6678 label, side="right" if side == "left" else "left" 

6679 ) 

6680 return len(self) - pos 

6681 

6682 raise ValueError("index must be monotonic increasing or decreasing") 

6683 

6684 def get_slice_bound( 

6685 self, label, side: Literal["left", "right"], kind=no_default 

6686 ) -> int: 

6687 """ 

6688 Calculate slice bound that corresponds to given label. 

6689 

6690 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position 

6691 of given label. 

6692 

6693 Parameters 

6694 ---------- 

6695 label : object 

6696 side : {'left', 'right'} 

6697 kind : {'loc', 'getitem'} or None 

6698 

6699 .. deprecated:: 1.4.0 

6700 

6701 Returns 

6702 ------- 

6703 int 

6704 Index of label. 

6705 """ 

6706 assert kind in ["loc", "getitem", None, no_default] 

6707 self._deprecated_arg(kind, "kind", "get_slice_bound") 

6708 

6709 if side not in ("left", "right"): 

6710 raise ValueError( 

6711 "Invalid value for side kwarg, must be either " 

6712 f"'left' or 'right': {side}" 

6713 ) 

6714 

6715 original_label = label 

6716 

6717 # For datetime indices label may be a string that has to be converted 

6718 # to datetime boundary according to its resolution. 

6719 label = self._maybe_cast_slice_bound(label, side) 

6720 

6721 # we need to look up the label 

6722 try: 

6723 slc = self.get_loc(label) 

6724 except KeyError as err: 

6725 try: 

6726 return self._searchsorted_monotonic(label, side) 

6727 except ValueError: 

6728 # raise the original KeyError 

6729 raise err 

6730 

6731 if isinstance(slc, np.ndarray): 

6732 # get_loc may return a boolean array, which 

6733 # is OK as long as they are representable by a slice. 

6734 assert is_bool_dtype(slc.dtype) 

6735 slc = lib.maybe_booleans_to_slice(slc.view("u1")) 

6736 if isinstance(slc, np.ndarray): 

6737 raise KeyError( 

6738 f"Cannot get {side} slice bound for non-unique " 

6739 f"label: {repr(original_label)}" 

6740 ) 

6741 

6742 if isinstance(slc, slice): 

6743 if side == "left": 

6744 return slc.start 

6745 else: 

6746 return slc.stop 

6747 else: 

6748 if side == "right": 

6749 return slc + 1 

6750 else: 

6751 return slc 

6752 

6753 def slice_locs( 

6754 self, start=None, end=None, step=None, kind=no_default 

6755 ) -> tuple[int, int]: 

6756 """ 

6757 Compute slice locations for input labels. 

6758 

6759 Parameters 

6760 ---------- 

6761 start : label, default None 

6762 If None, defaults to the beginning. 

6763 end : label, default None 

6764 If None, defaults to the end. 

6765 step : int, defaults None 

6766 If None, defaults to 1. 

6767 kind : {'loc', 'getitem'} or None 

6768 

6769 .. deprecated:: 1.4.0 

6770 

6771 Returns 

6772 ------- 

6773 start, end : int 

6774 

6775 See Also 

6776 -------- 

6777 Index.get_loc : Get location for a single label. 

6778 

6779 Notes 

6780 ----- 

6781 This method only works if the index is monotonic or unique. 

6782 

6783 Examples 

6784 -------- 

6785 >>> idx = pd.Index(list('abcd')) 

6786 >>> idx.slice_locs(start='b', end='c') 

6787 (1, 3) 

6788 """ 

6789 self._deprecated_arg(kind, "kind", "slice_locs") 

6790 inc = step is None or step >= 0 

6791 

6792 if not inc: 

6793 # If it's a reverse slice, temporarily swap bounds. 

6794 start, end = end, start 

6795 

6796 # GH 16785: If start and end happen to be date strings with UTC offsets 

6797 # attempt to parse and check that the offsets are the same 

6798 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)): 

6799 try: 

6800 ts_start = Timestamp(start) 

6801 ts_end = Timestamp(end) 

6802 except (ValueError, TypeError): 

6803 pass 

6804 else: 

6805 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): 

6806 raise ValueError("Both dates must have the same UTC offset") 

6807 

6808 start_slice = None 

6809 if start is not None: 

6810 start_slice = self.get_slice_bound(start, "left") 

6811 if start_slice is None: 

6812 start_slice = 0 

6813 

6814 end_slice = None 

6815 if end is not None: 

6816 end_slice = self.get_slice_bound(end, "right") 

6817 if end_slice is None: 

6818 end_slice = len(self) 

6819 

6820 if not inc: 

6821 # Bounds at this moment are swapped, swap them back and shift by 1. 

6822 # 

6823 # slice_locs('B', 'A', step=-1): s='B', e='A' 

6824 # 

6825 # s='A' e='B' 

6826 # AFTER SWAP: | | 

6827 # v ------------------> V 

6828 # ----------------------------------- 

6829 # | | |A|A|A|A| | | | | |B|B| | | | | 

6830 # ----------------------------------- 

6831 # ^ <------------------ ^ 

6832 # SHOULD BE: | | 

6833 # end=s-1 start=e-1 

6834 # 

6835 end_slice, start_slice = start_slice - 1, end_slice - 1 

6836 

6837 # i == -1 triggers ``len(self) + i`` selection that points to the 

6838 # last element, not before-the-first one, subtracting len(self) 

6839 # compensates that. 

6840 if end_slice == -1: 

6841 end_slice -= len(self) 

6842 if start_slice == -1: 

6843 start_slice -= len(self) 

6844 

6845 return start_slice, end_slice 

6846 

6847 def delete(self: _IndexT, loc) -> _IndexT: 

6848 """ 

6849 Make new Index with passed location(-s) deleted. 

6850 

6851 Parameters 

6852 ---------- 

6853 loc : int or list of int 

6854 Location of item(-s) which will be deleted. 

6855 Use a list of locations to delete more than one value at the same time. 

6856 

6857 Returns 

6858 ------- 

6859 Index 

6860 Will be same type as self, except for RangeIndex. 

6861 

6862 See Also 

6863 -------- 

6864 numpy.delete : Delete any rows and column from NumPy array (ndarray). 

6865 

6866 Examples 

6867 -------- 

6868 >>> idx = pd.Index(['a', 'b', 'c']) 

6869 >>> idx.delete(1) 

6870 Index(['a', 'c'], dtype='object') 

6871 

6872 >>> idx = pd.Index(['a', 'b', 'c']) 

6873 >>> idx.delete([0, 2]) 

6874 Index(['b'], dtype='object') 

6875 """ 

6876 values = self._values 

6877 res_values: ArrayLike 

6878 if isinstance(values, np.ndarray): 

6879 # TODO(__array_function__): special casing will be unnecessary 

6880 res_values = np.delete(values, loc) 

6881 else: 

6882 res_values = values.delete(loc) 

6883 

6884 # _constructor so RangeIndex->Int64Index 

6885 return self._constructor._simple_new(res_values, name=self.name) 

6886 

6887 def insert(self, loc: int, item) -> Index: 

6888 """ 

6889 Make new Index inserting new item at location. 

6890 

6891 Follows Python numpy.insert semantics for negative values. 

6892 

6893 Parameters 

6894 ---------- 

6895 loc : int 

6896 item : object 

6897 

6898 Returns 

6899 ------- 

6900 new_index : Index 

6901 """ 

6902 item = lib.item_from_zerodim(item) 

6903 if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object: 

6904 item = self._na_value 

6905 

6906 arr = self._values 

6907 

6908 try: 

6909 if isinstance(arr, ExtensionArray): 

6910 res_values = arr.insert(loc, item) 

6911 return type(self)._simple_new(res_values, name=self.name) 

6912 else: 

6913 item = self._validate_fill_value(item) 

6914 except (TypeError, ValueError, LossySetitemError): 

6915 # e.g. trying to insert an integer into a DatetimeIndex 

6916 # We cannot keep the same dtype, so cast to the (often object) 

6917 # minimal shared dtype before doing the insert. 

6918 dtype = self._find_common_type_compat(item) 

6919 return self.astype(dtype).insert(loc, item) 

6920 

6921 if arr.dtype != object or not isinstance( 

6922 item, (tuple, np.datetime64, np.timedelta64) 

6923 ): 

6924 # with object-dtype we need to worry about numpy incorrectly casting 

6925 # dt64/td64 to integer, also about treating tuples as sequences 

6926 # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550 

6927 casted = arr.dtype.type(item) 

6928 new_values = np.insert(arr, loc, casted) 

6929 

6930 else: 

6931 # error: No overload variant of "insert" matches argument types 

6932 # "ndarray[Any, Any]", "int", "None" 

6933 new_values = np.insert(arr, loc, None) # type: ignore[call-overload] 

6934 loc = loc if loc >= 0 else loc - 1 

6935 new_values[loc] = item 

6936 

6937 if self._typ == "numericindex": 

6938 # Use self._constructor instead of Index to retain NumericIndex GH#43921 

6939 # TODO(2.0) can use Index instead of self._constructor 

6940 return self._constructor._with_infer(new_values, name=self.name) 

6941 else: 

6942 return Index._with_infer(new_values, name=self.name) 

6943 

6944 def drop( 

6945 self, 

6946 labels: Index | np.ndarray | Iterable[Hashable], 

6947 errors: IgnoreRaise = "raise", 

6948 ) -> Index: 

6949 """ 

6950 Make new Index with passed list of labels deleted. 

6951 

6952 Parameters 

6953 ---------- 

6954 labels : array-like or scalar 

6955 errors : {'ignore', 'raise'}, default 'raise' 

6956 If 'ignore', suppress error and existing labels are dropped. 

6957 

6958 Returns 

6959 ------- 

6960 dropped : Index 

6961 Will be same type as self, except for RangeIndex. 

6962 

6963 Raises 

6964 ------ 

6965 KeyError 

6966 If not all of the labels are found in the selected axis 

6967 """ 

6968 if not isinstance(labels, Index): 

6969 # avoid materializing e.g. RangeIndex 

6970 arr_dtype = "object" if self.dtype == "object" else None 

6971 labels = com.index_labels_to_array(labels, dtype=arr_dtype) 

6972 

6973 indexer = self.get_indexer_for(labels) 

6974 mask = indexer == -1 

6975 if mask.any(): 

6976 if errors != "ignore": 

6977 raise KeyError(f"{list(labels[mask])} not found in axis") 

6978 indexer = indexer[~mask] 

6979 return self.delete(indexer) 

6980 

6981 # -------------------------------------------------------------------- 

6982 # Generated Arithmetic, Comparison, and Unary Methods 

6983 

6984 def _cmp_method(self, other, op): 

6985 """ 

6986 Wrapper used to dispatch comparison operations. 

6987 """ 

6988 if self.is_(other): 

6989 # fastpath 

6990 if op in {operator.eq, operator.le, operator.ge}: 

6991 arr = np.ones(len(self), dtype=bool) 

6992 if self._can_hold_na and not isinstance(self, ABCMultiIndex): 

6993 # TODO: should set MultiIndex._can_hold_na = False? 

6994 arr[self.isna()] = False 

6995 return arr 

6996 elif op is operator.ne: 

6997 arr = np.zeros(len(self), dtype=bool) 

6998 if self._can_hold_na and not isinstance(self, ABCMultiIndex): 

6999 arr[self.isna()] = True 

7000 return arr 

7001 

7002 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len( 

7003 self 

7004 ) != len(other): 

7005 raise ValueError("Lengths must match to compare") 

7006 

7007 if not isinstance(other, ABCMultiIndex): 

7008 other = extract_array(other, extract_numpy=True) 

7009 else: 

7010 other = np.asarray(other) 

7011 

7012 if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray): 

7013 # e.g. PeriodArray, Categorical 

7014 with np.errstate(all="ignore"): 

7015 result = op(self._values, other) 

7016 

7017 elif isinstance(self._values, ExtensionArray): 

7018 result = op(self._values, other) 

7019 

7020 elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex): 

7021 # don't pass MultiIndex 

7022 with np.errstate(all="ignore"): 

7023 result = ops.comp_method_OBJECT_ARRAY(op, self._values, other) 

7024 

7025 else: 

7026 with np.errstate(all="ignore"): 

7027 result = ops.comparison_op(self._values, other, op) 

7028 

7029 return result 

7030 

7031 def _construct_result(self, result, name): 

7032 if isinstance(result, tuple): 

7033 return ( 

7034 Index._with_infer(result[0], name=name), 

7035 Index._with_infer(result[1], name=name), 

7036 ) 

7037 return Index._with_infer(result, name=name) 

7038 

7039 def _arith_method(self, other, op): 

7040 if ( 

7041 isinstance(other, Index) 

7042 and is_object_dtype(other.dtype) 

7043 and type(other) is not Index 

7044 ): 

7045 # We return NotImplemented for object-dtype index *subclasses* so they have 

7046 # a chance to implement ops before we unwrap them. 

7047 # See https://github.com/pandas-dev/pandas/issues/31109 

7048 return NotImplemented 

7049 

7050 return super()._arith_method(other, op) 

7051 

7052 @final 

7053 def _unary_method(self, op): 

7054 result = op(self._values) 

7055 return Index(result, name=self.name) 

7056 

7057 def __abs__(self) -> Index: 

7058 return self._unary_method(operator.abs) 

7059 

7060 def __neg__(self) -> Index: 

7061 return self._unary_method(operator.neg) 

7062 

7063 def __pos__(self) -> Index: 

7064 return self._unary_method(operator.pos) 

7065 

7066 def __invert__(self) -> Index: 

7067 # GH#8875 

7068 return self._unary_method(operator.inv) 

7069 

7070 # -------------------------------------------------------------------- 

7071 # Reductions 

7072 

7073 def any(self, *args, **kwargs): 

7074 """ 

7075 Return whether any element is Truthy. 

7076 

7077 Parameters 

7078 ---------- 

7079 *args 

7080 Required for compatibility with numpy. 

7081 **kwargs 

7082 Required for compatibility with numpy. 

7083 

7084 Returns 

7085 ------- 

7086 any : bool or array-like (if axis is specified) 

7087 A single element array-like may be converted to bool. 

7088 

7089 See Also 

7090 -------- 

7091 Index.all : Return whether all elements are True. 

7092 Series.all : Return whether all elements are True. 

7093 

7094 Notes 

7095 ----- 

7096 Not a Number (NaN), positive infinity and negative infinity 

7097 evaluate to True because these are not equal to zero. 

7098 

7099 Examples 

7100 -------- 

7101 >>> index = pd.Index([0, 1, 2]) 

7102 >>> index.any() 

7103 True 

7104 

7105 >>> index = pd.Index([0, 0, 0]) 

7106 >>> index.any() 

7107 False 

7108 """ 

7109 nv.validate_any(args, kwargs) 

7110 self._maybe_disable_logical_methods("any") 

7111 # error: Argument 1 to "any" has incompatible type "ArrayLike"; expected 

7112 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int, 

7113 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]], 

7114 # _SupportsArray]" 

7115 return np.any(self.values) # type: ignore[arg-type] 

7116 

7117 def all(self, *args, **kwargs): 

7118 """ 

7119 Return whether all elements are Truthy. 

7120 

7121 Parameters 

7122 ---------- 

7123 *args 

7124 Required for compatibility with numpy. 

7125 **kwargs 

7126 Required for compatibility with numpy. 

7127 

7128 Returns 

7129 ------- 

7130 all : bool or array-like (if axis is specified) 

7131 A single element array-like may be converted to bool. 

7132 

7133 See Also 

7134 -------- 

7135 Index.any : Return whether any element in an Index is True. 

7136 Series.any : Return whether any element in a Series is True. 

7137 Series.all : Return whether all elements in a Series are True. 

7138 

7139 Notes 

7140 ----- 

7141 Not a Number (NaN), positive infinity and negative infinity 

7142 evaluate to True because these are not equal to zero. 

7143 

7144 Examples 

7145 -------- 

7146 True, because nonzero integers are considered True. 

7147 

7148 >>> pd.Index([1, 2, 3]).all() 

7149 True 

7150 

7151 False, because ``0`` is considered False. 

7152 

7153 >>> pd.Index([0, 1, 2]).all() 

7154 False 

7155 """ 

7156 nv.validate_all(args, kwargs) 

7157 self._maybe_disable_logical_methods("all") 

7158 # error: Argument 1 to "all" has incompatible type "ArrayLike"; expected 

7159 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int, 

7160 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]], 

7161 # _SupportsArray]" 

7162 return np.all(self.values) # type: ignore[arg-type] 

7163 

7164 @final 

7165 def _maybe_disable_logical_methods(self, opname: str_t) -> None: 

7166 """ 

7167 raise if this Index subclass does not support any or all. 

7168 """ 

7169 if ( 

7170 isinstance(self, ABCMultiIndex) 

7171 or needs_i8_conversion(self.dtype) 

7172 or is_interval_dtype(self.dtype) 

7173 or is_categorical_dtype(self.dtype) 

7174 or is_float_dtype(self.dtype) 

7175 ): 

7176 # This call will raise 

7177 make_invalid_op(opname)(self) 

7178 

7179 @Appender(IndexOpsMixin.argmin.__doc__) 

7180 def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int: 

7181 nv.validate_argmin(args, kwargs) 

7182 nv.validate_minmax_axis(axis) 

7183 

7184 if not self._is_multi and self.hasnans: 

7185 # Take advantage of cache 

7186 mask = self._isnan 

7187 if not skipna or mask.all(): 

7188 return -1 

7189 return super().argmin(skipna=skipna) 

7190 

7191 @Appender(IndexOpsMixin.argmax.__doc__) 

7192 def argmax(self, axis=None, skipna=True, *args, **kwargs) -> int: 

7193 nv.validate_argmax(args, kwargs) 

7194 nv.validate_minmax_axis(axis) 

7195 

7196 if not self._is_multi and self.hasnans: 

7197 # Take advantage of cache 

7198 mask = self._isnan 

7199 if not skipna or mask.all(): 

7200 return -1 

7201 return super().argmax(skipna=skipna) 

7202 

7203 @doc(IndexOpsMixin.min) 

7204 def min(self, axis=None, skipna=True, *args, **kwargs): 

7205 nv.validate_min(args, kwargs) 

7206 nv.validate_minmax_axis(axis) 

7207 

7208 if not len(self): 

7209 return self._na_value 

7210 

7211 if len(self) and self.is_monotonic_increasing: 

7212 # quick check 

7213 first = self[0] 

7214 if not isna(first): 

7215 return first 

7216 

7217 if not self._is_multi and self.hasnans: 

7218 # Take advantage of cache 

7219 mask = self._isnan 

7220 if not skipna or mask.all(): 

7221 return self._na_value 

7222 

7223 if not self._is_multi and not isinstance(self._values, np.ndarray): 

7224 # "ExtensionArray" has no attribute "min" 

7225 return self._values.min(skipna=skipna) # type: ignore[attr-defined] 

7226 

7227 return super().min(skipna=skipna) 

7228 

7229 @doc(IndexOpsMixin.max) 

7230 def max(self, axis=None, skipna=True, *args, **kwargs): 

7231 nv.validate_max(args, kwargs) 

7232 nv.validate_minmax_axis(axis) 

7233 

7234 if not len(self): 

7235 return self._na_value 

7236 

7237 if len(self) and self.is_monotonic_increasing: 

7238 # quick check 

7239 last = self[-1] 

7240 if not isna(last): 

7241 return last 

7242 

7243 if not self._is_multi and self.hasnans: 

7244 # Take advantage of cache 

7245 mask = self._isnan 

7246 if not skipna or mask.all(): 

7247 return self._na_value 

7248 

7249 if not self._is_multi and not isinstance(self._values, np.ndarray): 

7250 # "ExtensionArray" has no attribute "max" 

7251 return self._values.max(skipna=skipna) # type: ignore[attr-defined] 

7252 

7253 return super().max(skipna=skipna) 

7254 

7255 # -------------------------------------------------------------------- 

7256 

7257 @final 

7258 @property 

7259 def shape(self) -> Shape: 

7260 """ 

7261 Return a tuple of the shape of the underlying data. 

7262 """ 

7263 # See GH#27775, GH#27384 for history/reasoning in how this is defined. 

7264 return (len(self),) 

7265 

7266 @final 

7267 def _deprecated_arg(self, value, name: str_t, methodname: str_t) -> None: 

7268 """ 

7269 Issue a FutureWarning if the arg/kwarg is not no_default. 

7270 """ 

7271 if value is not no_default: 

7272 warnings.warn( 

7273 f"'{name}' argument in {methodname} is deprecated " 

7274 "and will be removed in a future version. Do not pass it.", 

7275 FutureWarning, 

7276 stacklevel=find_stack_level(), 

7277 ) 

7278 

7279 

7280def ensure_index_from_sequences(sequences, names=None) -> Index: 

7281 """ 

7282 Construct an index from sequences of data. 

7283 

7284 A single sequence returns an Index. Many sequences returns a 

7285 MultiIndex. 

7286 

7287 Parameters 

7288 ---------- 

7289 sequences : sequence of sequences 

7290 names : sequence of str 

7291 

7292 Returns 

7293 ------- 

7294 index : Index or MultiIndex 

7295 

7296 Examples 

7297 -------- 

7298 >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"]) 

7299 Int64Index([1, 2, 3], dtype='int64', name='name') 

7300 

7301 >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) 

7302 MultiIndex([('a', 'a'), 

7303 ('a', 'b')], 

7304 names=['L1', 'L2']) 

7305 

7306 See Also 

7307 -------- 

7308 ensure_index 

7309 """ 

7310 from pandas.core.indexes.multi import MultiIndex 

7311 

7312 if len(sequences) == 1: 

7313 if names is not None: 

7314 names = names[0] 

7315 return Index._with_infer(sequences[0], name=names) 

7316 else: 

7317 return MultiIndex.from_arrays(sequences, names=names) 

7318 

7319 

7320def ensure_index(index_like: Axes, copy: bool = False) -> Index: 

7321 """ 

7322 Ensure that we have an index from some index-like object. 

7323 

7324 Parameters 

7325 ---------- 

7326 index_like : sequence 

7327 An Index or other sequence 

7328 copy : bool, default False 

7329 

7330 Returns 

7331 ------- 

7332 index : Index or MultiIndex 

7333 

7334 See Also 

7335 -------- 

7336 ensure_index_from_sequences 

7337 

7338 Examples 

7339 -------- 

7340 >>> ensure_index(['a', 'b']) 

7341 Index(['a', 'b'], dtype='object') 

7342 

7343 >>> ensure_index([('a', 'a'), ('b', 'c')]) 

7344 Index([('a', 'a'), ('b', 'c')], dtype='object') 

7345 

7346 >>> ensure_index([['a', 'a'], ['b', 'c']]) 

7347 MultiIndex([('a', 'b'), 

7348 ('a', 'c')], 

7349 ) 

7350 """ 

7351 if isinstance(index_like, Index): 

7352 if copy: 

7353 index_like = index_like.copy() 

7354 return index_like 

7355 

7356 if isinstance(index_like, ABCSeries): 

7357 name = index_like.name 

7358 return Index._with_infer(index_like, name=name, copy=copy) 

7359 

7360 if is_iterator(index_like): 

7361 index_like = list(index_like) 

7362 

7363 if isinstance(index_like, list): 

7364 if type(index_like) is not list: 

7365 # must check for exactly list here because of strict type 

7366 # check in clean_index_list 

7367 index_like = list(index_like) 

7368 

7369 if len(index_like) and lib.is_all_arraylike(index_like): 

7370 from pandas.core.indexes.multi import MultiIndex 

7371 

7372 return MultiIndex.from_arrays(index_like) 

7373 else: 

7374 return Index._with_infer(index_like, copy=copy, tupleize_cols=False) 

7375 else: 

7376 return Index._with_infer(index_like, copy=copy) 

7377 

7378 

7379def ensure_has_len(seq): 

7380 """ 

7381 If seq is an iterator, put its values into a list. 

7382 """ 

7383 try: 

7384 len(seq) 

7385 except TypeError: 

7386 return list(seq) 

7387 else: 

7388 return seq 

7389 

7390 

7391def trim_front(strings: list[str]) -> list[str]: 

7392 """ 

7393 Trims zeros and decimal points. 

7394 

7395 Examples 

7396 -------- 

7397 >>> trim_front([" a", " b"]) 

7398 ['a', 'b'] 

7399 

7400 >>> trim_front([" a", " "]) 

7401 ['a', ''] 

7402 """ 

7403 if not strings: 

7404 return strings 

7405 while all(strings) and all(x[0] == " " for x in strings): 

7406 strings = [x[1:] for x in strings] 

7407 return strings 

7408 

7409 

7410def _validate_join_method(method: str) -> None: 

7411 if method not in ["left", "right", "inner", "outer"]: 

7412 raise ValueError(f"do not recognize join method {method}") 

7413 

7414 

7415def maybe_extract_name(name, obj, cls) -> Hashable: 

7416 """ 

7417 If no name is passed, then extract it from data, validating hashability. 

7418 """ 

7419 if name is None and isinstance(obj, (Index, ABCSeries)): 

7420 # Note we don't just check for "name" attribute since that would 

7421 # pick up e.g. dtype.name 

7422 name = obj.name 

7423 

7424 # GH#29069 

7425 if not is_hashable(name): 

7426 raise TypeError(f"{cls.__name__}.name must be a hashable type") 

7427 

7428 return name 

7429 

7430 

7431_cast_depr_msg = ( 

7432 "In a future version, passing an object-dtype arraylike to pd.Index will " 

7433 "not infer numeric values to numeric dtype (matching the Series behavior). " 

7434 "To retain the old behavior, explicitly pass the desired dtype or use the " 

7435 "desired Index subclass" 

7436) 

7437 

7438 

7439def _maybe_cast_data_without_dtype( 

7440 subarr: np.ndarray, cast_numeric_deprecated: bool = True 

7441) -> ArrayLike: 

7442 """ 

7443 If we have an arraylike input but no passed dtype, try to infer 

7444 a supported dtype. 

7445 

7446 Parameters 

7447 ---------- 

7448 subarr : np.ndarray[object] 

7449 cast_numeric_deprecated : bool, default True 

7450 Whether to issue a FutureWarning when inferring numeric dtypes. 

7451 

7452 Returns 

7453 ------- 

7454 np.ndarray or ExtensionArray 

7455 """ 

7456 

7457 result = lib.maybe_convert_objects( 

7458 subarr, 

7459 convert_datetime=True, 

7460 convert_timedelta=True, 

7461 convert_period=True, 

7462 convert_interval=True, 

7463 dtype_if_all_nat=np.dtype("datetime64[ns]"), 

7464 ) 

7465 if result.dtype.kind in ["i", "u", "f"]: 

7466 if not cast_numeric_deprecated: 

7467 # i.e. we started with a list, not an ndarray[object] 

7468 return result 

7469 

7470 warnings.warn( 

7471 "In a future version, the Index constructor will not infer numeric " 

7472 "dtypes when passed object-dtype sequences (matching Series behavior)", 

7473 FutureWarning, 

7474 stacklevel=find_stack_level(), 

7475 ) 

7476 result = ensure_wrapped_if_datetimelike(result) 

7477 return result 

7478 

7479 

7480def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]: 

7481 """ 

7482 Return common name if all indices agree, otherwise None (level-by-level). 

7483 

7484 Parameters 

7485 ---------- 

7486 indexes : list of Index objects 

7487 

7488 Returns 

7489 ------- 

7490 list 

7491 A list representing the unanimous 'names' found. 

7492 """ 

7493 name_tups = [tuple(i.names) for i in indexes] 

7494 name_sets = [{*ns} for ns in zip_longest(*name_tups)] 

7495 names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets) 

7496 return names 

7497 

7498 

7499def unpack_nested_dtype(other: _IndexT) -> _IndexT: 

7500 """ 

7501 When checking if our dtype is comparable with another, we need 

7502 to unpack CategoricalDtype to look at its categories.dtype. 

7503 

7504 Parameters 

7505 ---------- 

7506 other : Index 

7507 

7508 Returns 

7509 ------- 

7510 Index 

7511 """ 

7512 dtype = other.dtype 

7513 if is_categorical_dtype(dtype): 

7514 # If there is ever a SparseIndex, this could get dispatched 

7515 # here too. 

7516 # error: Item "dtype[Any]"/"ExtensionDtype" of "Union[dtype[Any], 

7517 # ExtensionDtype]" has no attribute "categories" 

7518 return dtype.categories # type: ignore[union-attr] 

7519 return other 

7520 

7521 

7522def _maybe_try_sort(result, sort): 

7523 if sort is None: 

7524 try: 

7525 result = algos.safe_sort(result) 

7526 except TypeError as err: 

7527 warnings.warn( 

7528 f"{err}, sort order is undefined for incomparable objects.", 

7529 RuntimeWarning, 

7530 stacklevel=find_stack_level(), 

7531 ) 

7532 return result