Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/base.py: 14%

1from __future__ import annotations

3from datetime import datetime

4import functools

5from itertools import zip_longest

6import operator

7from typing import (

8 TYPE_CHECKING,

9 Any,

10 Callable,

11 ClassVar,

12 Hashable,

13 Iterable,

14 Literal,

15 NoReturn,

16 Sequence,

17 TypeVar,

18 cast,

19 final,

20 overload,

21)

22import warnings

24import numpy as np

26from pandas._config import get_option

28from pandas._libs import (

29 NaT,

30 algos as libalgos,

31 index as libindex,

32 lib,

33)

34import pandas._libs.join as libjoin

35from pandas._libs.lib import (

36 is_datetime_array,

37 no_default,

38)

39from pandas._libs.missing import is_float_nan

40from pandas._libs.tslibs import (

41 IncompatibleFrequency,

42 OutOfBoundsDatetime,

43 Timestamp,

44 is_unitless,

45 tz_compare,

46)

47from pandas._typing import (

48 ArrayLike,

49 Axes,

50 Dtype,

51 DtypeObj,

52 F,

53 IgnoreRaise,

54 Level,

55 Shape,

56 npt,

57)

58from pandas.compat.numpy import function as nv

59from pandas.errors import (

60 DuplicateLabelError,

61 IntCastingNaNError,

62 InvalidIndexError,

63)

64from pandas.util._decorators import (

65 Appender,

66 cache_readonly,

67 deprecate_nonkeyword_arguments,

68 doc,

69)

70from pandas.util._exceptions import (

71 find_stack_level,

72 rewrite_exception,

73)

75from pandas.core.dtypes.astype import astype_nansafe

76from pandas.core.dtypes.cast import (

77 LossySetitemError,

78 can_hold_element,

79 common_dtype_categorical_compat,

80 ensure_dtype_can_hold_na,

81 find_common_type,

82 infer_dtype_from,

83 maybe_cast_pointwise_result,

84 np_can_hold_element,

85)

86from pandas.core.dtypes.common import (

87 ensure_int64,

88 ensure_object,

89 ensure_platform_int,

90 is_bool_dtype,

91 is_categorical_dtype,

92 is_dtype_equal,

93 is_ea_or_datetimelike_dtype,

94 is_extension_array_dtype,

95 is_float,

96 is_float_dtype,

97 is_hashable,

98 is_integer,

99 is_interval_dtype,

100 is_iterator,

101 is_list_like,

102 is_numeric_dtype,

103 is_object_dtype,

104 is_scalar,

105 is_signed_integer_dtype,

106 is_string_dtype,

107 is_unsigned_integer_dtype,

108 needs_i8_conversion,

109 pandas_dtype,

110 validate_all_hashable,

111)

112from pandas.core.dtypes.concat import concat_compat

113from pandas.core.dtypes.dtypes import (

114 CategoricalDtype,

115 DatetimeTZDtype,

116 ExtensionDtype,

117 IntervalDtype,

118 PandasDtype,

119 PeriodDtype,

120)

121from pandas.core.dtypes.generic import (

122 ABCDataFrame,

123 ABCDatetimeIndex,

124 ABCMultiIndex,

125 ABCPeriodIndex,

126 ABCRangeIndex,

127 ABCSeries,

128 ABCTimedeltaIndex,

129)

130from pandas.core.dtypes.inference import is_dict_like

131from pandas.core.dtypes.missing import (

132 array_equivalent,

133 is_valid_na_for_dtype,

134 isna,

135)

136

137from pandas.core import (

138 arraylike,

139 missing,

140 ops,

141)

142from pandas.core.accessor import CachedAccessor

143import pandas.core.algorithms as algos

144from pandas.core.array_algos.putmask import (

145 setitem_datetimelike_compat,

146 validate_putmask,

147)

148from pandas.core.arrays import (

149 Categorical,

150 ExtensionArray,

151)

152from pandas.core.arrays.datetimes import (

153 tz_to_dtype,

154 validate_tz_from_dtype,

155)

156from pandas.core.arrays.sparse import SparseDtype

157from pandas.core.arrays.string_ import StringArray

158from pandas.core.base import (

159 IndexOpsMixin,

160 PandasObject,

161)

162import pandas.core.common as com

163from pandas.core.construction import (

164 ensure_wrapped_if_datetimelike,

165 extract_array,

166 sanitize_array,

167)

168from pandas.core.indexers import deprecate_ndim_indexing

169from pandas.core.indexes.frozen import FrozenList

170from pandas.core.ops import get_op_result_name

171from pandas.core.ops.invalid import make_invalid_op

172from pandas.core.sorting import (

173 ensure_key_mapped,

174 get_group_index_sorter,

175 nargsort,

176)

177from pandas.core.strings import StringMethods

178

179from pandas.io.formats.printing import (

180 PrettyDict,

181 default_pprint,

182 format_object_summary,

183 pprint_thing,

184)

185

186if TYPE_CHECKING: 186 ↛ 187line 186 didn't jump to line 187, because the condition on line 186 was never true

187 from pandas import (

188 CategoricalIndex,

189 DataFrame,

190 MultiIndex,

191 Series,

192 )

193 from pandas.core.arrays import PeriodArray

194

195

196__all__ = ["Index"]

197

198_unsortable_types = frozenset(("mixed", "mixed-integer"))

199

200_index_doc_kwargs: dict[str, str] = {

201 "klass": "Index",

202 "inplace": "",

203 "target_klass": "Index",

204 "raises_section": "",

205 "unique": "Index",

206 "duplicated": "np.ndarray",

207}

208_index_shared_docs: dict[str, str] = {}

209str_t = str

210

211

212_dtype_obj = np.dtype("object")

213

214

215def _maybe_return_indexers(meth: F) -> F:

216 """

217 Decorator to simplify 'return_indexers' checks in Index.join.

218 """

219

220 @functools.wraps(meth)

221 def join(

222 self,

223 other,

224 how: str_t = "left",

225 level=None,

226 return_indexers: bool = False,

227 sort: bool = False,

228 ):

229 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)

230 if not return_indexers:

231 return join_index

232

233 if lidx is not None:

234 lidx = ensure_platform_int(lidx)

235 if ridx is not None:

236 ridx = ensure_platform_int(ridx)

237 return join_index, lidx, ridx

238

239 return cast(F, join)

240

241

242def disallow_kwargs(kwargs: dict[str, Any]) -> None:

243 if kwargs:

244 raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")

245

246

247def _new_Index(cls, d):

248 """

249 This is called upon unpickling, rather than the default which doesn't

250 have arguments and breaks __new__.

251 """

252 # required for backward compat, because PI can't be instantiated with

253 # ordinals through __new__ GH #13277

254 if issubclass(cls, ABCPeriodIndex):

255 from pandas.core.indexes.period import _new_PeriodIndex

256

257 return _new_PeriodIndex(cls, **d)

258

259 if issubclass(cls, ABCMultiIndex):

260 if "labels" in d and "codes" not in d:

261 # GH#23752 "labels" kwarg has been replaced with "codes"

262 d["codes"] = d.pop("labels")

263

264 # Since this was a valid MultiIndex at pickle-time, we don't need to

265 # check validty at un-pickle time.

266 d["verify_integrity"] = False

267

268 elif "dtype" not in d and "data" in d:

269 # Prevent Index.__new__ from conducting inference;

270 # "data" key not in RangeIndex

271 d["dtype"] = d["data"].dtype

272 return cls.__new__(cls, **d)

273

274

275_IndexT = TypeVar("_IndexT", bound="Index")

276

277

278class Index(IndexOpsMixin, PandasObject):

279 """

280 Immutable sequence used for indexing and alignment.

281

282 The basic object storing axis labels for all pandas objects.

283

284 Parameters

285 ----------

286 data : array-like (1-dimensional)

287 dtype : NumPy dtype (default: object)

288 If dtype is None, we find the dtype that best fits the data.

289 If an actual dtype is provided, we coerce to that dtype if it's safe.

290 Otherwise, an error will be raised.

291 copy : bool

292 Make a copy of input ndarray.

293 name : object

294 Name to be stored in the index.

295 tupleize_cols : bool (default: True)

296 When True, attempt to create a MultiIndex if possible.

297

298 See Also

299 --------

300 RangeIndex : Index implementing a monotonic integer range.

301 CategoricalIndex : Index of :class:`Categorical` s.

302 MultiIndex : A multi-level, or hierarchical Index.

303 IntervalIndex : An Index of :class:`Interval` s.

304 DatetimeIndex : Index of datetime64 data.

305 TimedeltaIndex : Index of timedelta64 data.

306 PeriodIndex : Index of Period data.

307 NumericIndex : Index of numpy int/uint/float data.

308 Int64Index : Index of purely int64 labels (deprecated).

309 UInt64Index : Index of purely uint64 labels (deprecated).

310 Float64Index : Index of purely float64 labels (deprecated).

311

312 Notes

313 -----

314 An Index instance can **only** contain hashable objects

315

316 Examples

317 --------

318 >>> pd.Index([1, 2, 3])

319 Int64Index([1, 2, 3], dtype='int64')

320

321 >>> pd.Index(list('abc'))

322 Index(['a', 'b', 'c'], dtype='object')

323 """

324

325 # tolist is not actually deprecated, just suppressed in the __dir__

326 _hidden_attrs: frozenset[str] = (

327 PandasObject._hidden_attrs

328 | IndexOpsMixin._hidden_attrs

329 | frozenset(["contains", "set_value"])

330 )

331

332 # To hand over control to subclasses

333 _join_precedence = 1

334

335 # Cython methods; see github.com/cython/cython/issues/2647

336 # for why we need to wrap these instead of making them class attributes

337 # Moreover, cython will choose the appropriate-dtyped sub-function

338 # given the dtypes of the passed arguments

339

340 @final

341 def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]:

342 # Caller is responsible for ensuring other.dtype == self.dtype

343 sv = self._get_engine_target()

344 ov = other._get_engine_target()

345 # can_use_libjoin assures sv and ov are ndarrays

346 sv = cast(np.ndarray, sv)

347 ov = cast(np.ndarray, ov)

348 return libjoin.left_join_indexer_unique(sv, ov)

349

350 @final

351 def _left_indexer(

352 self: _IndexT, other: _IndexT

353 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

354 # Caller is responsible for ensuring other.dtype == self.dtype

355 sv = self._get_engine_target()

356 ov = other._get_engine_target()

357 # can_use_libjoin assures sv and ov are ndarrays

358 sv = cast(np.ndarray, sv)

359 ov = cast(np.ndarray, ov)

360 joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov)

361 joined = self._from_join_target(joined_ndarray)

362 return joined, lidx, ridx

363

364 @final

365 def _inner_indexer(

366 self: _IndexT, other: _IndexT

367 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

368 # Caller is responsible for ensuring other.dtype == self.dtype

369 sv = self._get_engine_target()

370 ov = other._get_engine_target()

371 # can_use_libjoin assures sv and ov are ndarrays

372 sv = cast(np.ndarray, sv)

373 ov = cast(np.ndarray, ov)

374 joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov)

375 joined = self._from_join_target(joined_ndarray)

376 return joined, lidx, ridx

377

378 @final

379 def _outer_indexer(

380 self: _IndexT, other: _IndexT

381 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

382 # Caller is responsible for ensuring other.dtype == self.dtype

383 sv = self._get_engine_target()

384 ov = other._get_engine_target()

385 # can_use_libjoin assures sv and ov are ndarrays

386 sv = cast(np.ndarray, sv)

387 ov = cast(np.ndarray, ov)

388 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov)

389 joined = self._from_join_target(joined_ndarray)

390 return joined, lidx, ridx

391

392 _typ: str = "index"

393 _data: ExtensionArray | np.ndarray

394 _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = (

395 np.ndarray,

396 ExtensionArray,

397 )

398 _id: object | None = None

399 _name: Hashable = None

400 # MultiIndex.levels previously allowed setting the index name. We

401 # don't allow this anymore, and raise if it happens rather than

402 # failing silently.

403 _no_setting_name: bool = False

404 _comparables: list[str] = ["name"]

405 _attributes: list[str] = ["name"]

406 _is_numeric_dtype: bool = False

407 _can_hold_strings: bool = True

408

409 # Whether this index is a NumericIndex, but not a Int64Index, Float64Index,

410 # UInt64Index or RangeIndex. Needed for backwards compat. Remove this attribute and

411 # associated code in pandas 2.0.

412 _is_backward_compat_public_numeric_index: bool = False

413

414 @property

415 def _engine_type(

416 self,

417 ) -> type[libindex.IndexEngine] | type[libindex.ExtensionEngine]:

418 return libindex.ObjectEngine

419

420 # whether we support partial string indexing. Overridden

421 # in DatetimeIndex and PeriodIndex

422 _supports_partial_string_indexing = False

423

424 _accessors = {"str"}

425

426 str = CachedAccessor("str", StringMethods)

427

428 # --------------------------------------------------------------------

429 # Constructors

430

431 def __new__(

432 cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs

433 ) -> Index:

434

435 if kwargs:

436 warnings.warn(

437 "Passing keywords other than 'data', 'dtype', 'copy', 'name', "

438 "'tupleize_cols' is deprecated and will raise TypeError in a "

439 "future version. Use the specific Index subclass directly instead.",

440 FutureWarning,

441 stacklevel=find_stack_level(),

442 )

443

444 from pandas.core.arrays import PandasArray

445 from pandas.core.indexes.range import RangeIndex

446

447 name = maybe_extract_name(name, data, cls)

448

449 if dtype is not None:

450 dtype = pandas_dtype(dtype)

451 if "tz" in kwargs:

452 tz = kwargs.pop("tz")

453 validate_tz_from_dtype(dtype, tz)

454 dtype = tz_to_dtype(tz)

455

456 if type(data) is PandasArray:

457 # ensure users don't accidentally put a PandasArray in an index,

458 # but don't unpack StringArray

459 data = data.to_numpy()

460 if isinstance(dtype, PandasDtype):

461 dtype = dtype.numpy_dtype

462

463 data_dtype = getattr(data, "dtype", None)

464

465 # range

466 if isinstance(data, (range, RangeIndex)):

467 result = RangeIndex(start=data, copy=copy, name=name)

468 if dtype is not None:

469 return result.astype(dtype, copy=False)

470 return result

471

472 elif is_ea_or_datetimelike_dtype(dtype):

473 # non-EA dtype indexes have special casting logic, so we punt here

474 klass = cls._dtype_to_subclass(dtype)

475 if klass is not Index:

476 return klass(data, dtype=dtype, copy=copy, name=name, **kwargs)

477

478 ea_cls = dtype.construct_array_type()

479 data = ea_cls._from_sequence(data, dtype=dtype, copy=copy)

480 disallow_kwargs(kwargs)

481 return Index._simple_new(data, name=name)

482

483 elif is_ea_or_datetimelike_dtype(data_dtype):

484 data_dtype = cast(DtypeObj, data_dtype)

485 klass = cls._dtype_to_subclass(data_dtype)

486 if klass is not Index:

487 result = klass(data, copy=copy, name=name, **kwargs)

488 if dtype is not None:

489 return result.astype(dtype, copy=False)

490 return result

491 elif dtype is not None:

492 # GH#45206

493 data = data.astype(dtype, copy=False)

494

495 disallow_kwargs(kwargs)

496 data = extract_array(data, extract_numpy=True)

497 return Index._simple_new(data, name=name)

498

499 # index-like

500 elif (

501 isinstance(data, Index)

502 and data._is_backward_compat_public_numeric_index

503 and dtype is None

504 ):

505 return data._constructor(data, name=name, copy=copy)

506 elif isinstance(data, (np.ndarray, Index, ABCSeries)):

507

508 if isinstance(data, ABCMultiIndex):

509 data = data._values

510

511 if dtype is not None:

512 # we need to avoid having numpy coerce

513 # things that look like ints/floats to ints unless

514 # they are actually ints, e.g. '0' and 0.0

515 # should not be coerced

516 # GH 11836

517 data = sanitize_array(data, None, dtype=dtype, copy=copy)

518

519 dtype = data.dtype

520

521 if data.dtype.kind in ["i", "u", "f"]:

522 # maybe coerce to a sub-class

523 arr = data

524 elif data.dtype.kind in ["b", "c"]:

525 # No special subclass, and Index._ensure_array won't do this

526 # for us.

527 arr = np.asarray(data)

528 else:

529 arr = com.asarray_tuplesafe(data, dtype=_dtype_obj)

530

531 if dtype is None:

532 arr = _maybe_cast_data_without_dtype(

533 arr, cast_numeric_deprecated=True

534 )

535 dtype = arr.dtype

536

537 if kwargs:

538 return cls(arr, dtype, copy=copy, name=name, **kwargs)

539

540 klass = cls._dtype_to_subclass(arr.dtype)

541 arr = klass._ensure_array(arr, dtype, copy)

542 disallow_kwargs(kwargs)

543 return klass._simple_new(arr, name)

544

545 elif is_scalar(data):

546 raise cls._scalar_data_error(data)

547 elif hasattr(data, "__array__"):

548 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)

549 else:

550

551 if tupleize_cols and is_list_like(data):

552 # GH21470: convert iterable to list before determining if empty

553 if is_iterator(data):

554 data = list(data)

555

556 if data and all(isinstance(e, tuple) for e in data):

557 # we must be all tuples, otherwise don't construct

558 # 10697

559 from pandas.core.indexes.multi import MultiIndex

560

561 return MultiIndex.from_tuples(

562 data, names=name or kwargs.get("names")

563 )

564 # other iterable of some kind

565

566 subarr = com.asarray_tuplesafe(data, dtype=_dtype_obj)

567 if dtype is None:

568 # with e.g. a list [1, 2, 3] casting to numeric is _not_ deprecated

569 subarr = _maybe_cast_data_without_dtype(

570 subarr, cast_numeric_deprecated=False

571 )

572 dtype = subarr.dtype

573 return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)

574

575 @classmethod

576 def _ensure_array(cls, data, dtype, copy: bool):

577 """

578 Ensure we have a valid array to pass to _simple_new.

579 """

580 if data.ndim > 1:

581 # GH#13601, GH#20285, GH#27125

582 raise ValueError("Index data must be 1-dimensional")

583 if copy:

584 # asarray_tuplesafe does not always copy underlying data,

585 # so need to make sure that this happens

586 data = data.copy()

587 return data

588

589 @final

590 @classmethod

591 def _dtype_to_subclass(cls, dtype: DtypeObj):

592 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423

593

594 if isinstance(dtype, ExtensionDtype):

595 if isinstance(dtype, DatetimeTZDtype):

596 from pandas import DatetimeIndex

597

598 return DatetimeIndex

599 elif isinstance(dtype, CategoricalDtype):

600 from pandas import CategoricalIndex

601

602 return CategoricalIndex

603 elif isinstance(dtype, IntervalDtype):

604 from pandas import IntervalIndex

605

606 return IntervalIndex

607 elif isinstance(dtype, PeriodDtype):

608 from pandas import PeriodIndex

609

610 return PeriodIndex

611

612 elif isinstance(dtype, SparseDtype):

613 warnings.warn(

614 "In a future version, passing a SparseArray to pd.Index "

615 "will store that array directly instead of converting to a "

616 "dense numpy ndarray. To retain the old behavior, use "

617 "pd.Index(arr.to_numpy()) instead",

618 FutureWarning,

619 stacklevel=find_stack_level(),

620 )

621 return cls._dtype_to_subclass(dtype.subtype)

622

623 return Index

624

625 if dtype.kind == "M":

626 from pandas import DatetimeIndex

627

628 return DatetimeIndex

629

630 elif dtype.kind == "m":

631 from pandas import TimedeltaIndex

632

633 return TimedeltaIndex

634

635 elif is_float_dtype(dtype):

636 from pandas.core.api import Float64Index

637

638 return Float64Index

639 elif is_unsigned_integer_dtype(dtype):

640 from pandas.core.api import UInt64Index

641

642 return UInt64Index

643 elif is_signed_integer_dtype(dtype):

644 from pandas.core.api import Int64Index

645

646 return Int64Index

647

648 elif dtype == _dtype_obj:

649 # NB: assuming away MultiIndex

650 return Index

651

652 elif issubclass(

653 dtype.type, (str, bool, np.bool_, complex, np.complex64, np.complex128)

654 ):

655 return Index

656

657 raise NotImplementedError(dtype)

658

659 """

660 NOTE for new Index creation:

661

662 - _simple_new: It returns new Index with the same type as the caller.

663 All metadata (such as name) must be provided by caller's responsibility.

664 Using _shallow_copy is recommended because it fills these metadata

665 otherwise specified.

666

667 - _shallow_copy: It returns new Index with the same type (using

668 _simple_new), but fills caller's metadata otherwise specified. Passed

669 kwargs will overwrite corresponding metadata.

670

671 See each method's docstring.

672 """

673

674 @property

675 def asi8(self):

676 """

677 Integer representation of the values.

678

679 Returns

680 -------

681 ndarray

682 An ndarray with int64 dtype.

683 """

684 warnings.warn(

685 "Index.asi8 is deprecated and will be removed in a future version.",

686 FutureWarning,

687 stacklevel=find_stack_level(),

688 )

689 return None

690

691 @classmethod

692 def _simple_new(cls: type[_IndexT], values, name: Hashable = None) -> _IndexT:

693 """

694 We require that we have a dtype compat for the values. If we are passed

695 a non-dtype compat, then coerce using the constructor.

696

697 Must be careful not to recurse.

698 """

699 assert isinstance(values, cls._data_cls), type(values)

700

701 result = object.__new__(cls)

702 result._data = values

703 result._name = name

704 result._cache = {}

705 result._reset_identity()

706

707 return result

708

709 @classmethod

710 def _with_infer(cls, *args, **kwargs):

711 """

712 Constructor that uses the 1.0.x behavior inferring numeric dtypes

713 for ndarray[object] inputs.

714 """

715 with warnings.catch_warnings():

716 warnings.filterwarnings("ignore", ".*the Index constructor", FutureWarning)

717 result = cls(*args, **kwargs)

718

719 if result.dtype == _dtype_obj and not result._is_multi:

720 # error: Argument 1 to "maybe_convert_objects" has incompatible type

721 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected

722 # "ndarray[Any, Any]"

723 values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]

724 if values.dtype.kind in ["i", "u", "f", "b"]:

725 return Index(values, name=result.name)

726

727 return result

728

729 @cache_readonly

730 def _constructor(self: _IndexT) -> type[_IndexT]:

731 return type(self)

732

733 @final

734 def _maybe_check_unique(self) -> None:

735 """

736 Check that an Index has no duplicates.

737

738 This is typically only called via

739 `NDFrame.flags.allows_duplicate_labels.setter` when it's set to

740 True (duplicates aren't allowed).

741

742 Raises

743 ------

744 DuplicateLabelError

745 When the index is not unique.

746 """

747 if not self.is_unique:

748 msg = """Index has duplicates."""

749 duplicates = self._format_duplicate_message()

750 msg += f"\n{duplicates}"

751

752 raise DuplicateLabelError(msg)

753

754 @final

755 def _format_duplicate_message(self) -> DataFrame:

756 """

757 Construct the DataFrame for a DuplicateLabelError.

758

759 This returns a DataFrame indicating the labels and positions

760 of duplicates in an index. This should only be called when it's

761 already known that duplicates are present.

762

763 Examples

764 --------

765 >>> idx = pd.Index(['a', 'b', 'a'])

766 >>> idx._format_duplicate_message()

767 positions

768 label

769 a [0, 2]

770 """

771 from pandas import Series

772

773 duplicates = self[self.duplicated(keep="first")].unique()

774 assert len(duplicates)

775

776 out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]

777 if self._is_multi:

778 # test_format_duplicate_labels_message_multi

779 # error: "Type[Index]" has no attribute "from_tuples" [attr-defined]

780 out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]

781

782 if self.nlevels == 1:

783 out = out.rename_axis("label")

784 return out.to_frame(name="positions")

785

786 # --------------------------------------------------------------------

787 # Index Internals Methods

788

789 @final

790 def _get_attributes_dict(self) -> dict[str_t, Any]:

791 """

792 Return an attributes dict for my class.

793

794 Temporarily added back for compatibility issue in dask, see

795 https://github.com/pandas-dev/pandas/pull/43895

796 """

797 warnings.warn(

798 "The Index._get_attributes_dict method is deprecated, and will be "

799 "removed in a future version",

800 DeprecationWarning,

801 stacklevel=find_stack_level(),

802 )

803 return {k: getattr(self, k, None) for k in self._attributes}

804

805 def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT:

806 """

807 Create a new Index with the same class as the caller, don't copy the

808 data, use the same object attributes with passed in attributes taking

809 precedence.

810

811 *this is an internal non-public method*

812

813 Parameters

814 ----------

815 values : the values to create the new Index, optional

816 name : Label, defaults to self.name

817 """

818 name = self._name if name is no_default else name

819

820 return self._simple_new(values, name=name)

821

822 def _view(self: _IndexT) -> _IndexT:

823 """

824 fastpath to make a shallow copy, i.e. new object with same data.

825 """

826 result = self._simple_new(self._values, name=self._name)

827

828 result._cache = self._cache

829 return result

830

831 @final

832 def _rename(self: _IndexT, name: Hashable) -> _IndexT:

833 """

834 fastpath for rename if new name is already validated.

835 """

836 result = self._view()

837 result._name = name

838 return result

839

840 @final

841 def is_(self, other) -> bool:

842 """

843 More flexible, faster check like ``is`` but that works through views.

844

845 Note: this is *not* the same as ``Index.identical()``, which checks

846 that metadata is also the same.

847

848 Parameters

849 ----------

850 other : object

851 Other object to compare against.

852

853 Returns

854 -------

855 bool

856 True if both have same underlying data, False otherwise.

857

858 See Also

859 --------

860 Index.identical : Works like ``Index.is_`` but also checks metadata.

861 """

862 if self is other:

863 return True

864 elif not hasattr(other, "_id"):

865 return False

866 elif self._id is None or other._id is None:

867 return False

868 else:

869 return self._id is other._id

870

871 @final

872 def _reset_identity(self) -> None:

873 """

874 Initializes or resets ``_id`` attribute with new object.

875 """

876 self._id = object()

877

878 @final

879 def _cleanup(self) -> None:

880 self._engine.clear_mapping()

881

882 @cache_readonly

883 def _engine(

884 self,

885 ) -> libindex.IndexEngine | libindex.ExtensionEngine:

886 # For base class (object dtype) we get ObjectEngine

887 target_values = self._get_engine_target()

888 if (

889 isinstance(target_values, ExtensionArray)

890 and self._engine_type is libindex.ObjectEngine

891 ):

892 return libindex.ExtensionEngine(target_values)

893

894 target_values = cast(np.ndarray, target_values)

895 # to avoid a reference cycle, bind `target_values` to a local variable, so

896 # `self` is not passed into the lambda.

897 if target_values.dtype == bool:

898 return libindex.BoolEngine(target_values)

899 elif target_values.dtype == np.complex64:

900 return libindex.Complex64Engine(target_values)

901 elif target_values.dtype == np.complex128:

902 return libindex.Complex128Engine(target_values)

903

904 # error: Argument 1 to "ExtensionEngine" has incompatible type

905 # "ndarray[Any, Any]"; expected "ExtensionArray"

906 return self._engine_type(target_values) # type: ignore[arg-type]

907

908 @final

909 @cache_readonly

910 def _dir_additions_for_owner(self) -> set[str_t]:

911 """

912 Add the string-like labels to the owner dataframe/series dir output.

913

914 If this is a MultiIndex, it's first level values are used.

915 """

916 return {

917 c

918 for c in self.unique(level=0)[: get_option("display.max_dir_items")]

919 if isinstance(c, str) and c.isidentifier()

920 }

921

922 # --------------------------------------------------------------------

923 # Array-Like Methods

924

925 # ndarray compat

926 def __len__(self) -> int:

927 """

928 Return the length of the Index.

929 """

930 return len(self._data)

931

932 def __array__(self, dtype=None) -> np.ndarray:

933 """

934 The array interface, return my values.

935 """

936 return np.asarray(self._data, dtype=dtype)

937

938 def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):

939 if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):

940 return NotImplemented

941

942 # TODO(2.0) the 'and', 'or' and 'xor' dunder methods are currently set

943 # operations and not logical operations, so don't dispatch

944 # This is deprecated, so this full 'if' clause can be removed once

945 # deprecation is enforced in 2.0

946 if not (

947 method == "__call__"

948 and ufunc in (np.bitwise_and, np.bitwise_or, np.bitwise_xor)

949 ):

950 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(

951 self, ufunc, method, *inputs, **kwargs

952 )

953 if result is not NotImplemented:

954 return result

955

956 if "out" in kwargs:

957 # e.g. test_dti_isub_tdi

958 return arraylike.dispatch_ufunc_with_out(

959 self, ufunc, method, *inputs, **kwargs

960 )

961

962 if method == "reduce":

963 result = arraylike.dispatch_reduction_ufunc(

964 self, ufunc, method, *inputs, **kwargs

965 )

966 if result is not NotImplemented:

967 return result

968

969 new_inputs = [x if x is not self else x._values for x in inputs]

970 result = getattr(ufunc, method)(*new_inputs, **kwargs)

971 if ufunc.nout == 2:

972 # i.e. np.divmod, np.modf, np.frexp

973 return tuple(self.__array_wrap__(x) for x in result)

974

975 return self.__array_wrap__(result)

976

977 def __array_wrap__(self, result, context=None):

978 """

979 Gets called after a ufunc and other functions e.g. np.split.

980 """

981 result = lib.item_from_zerodim(result)

982 if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:

983 return result

984

985 return Index(result, name=self.name)

986

987 @cache_readonly

988 def dtype(self) -> DtypeObj:

989 """

990 Return the dtype object of the underlying data.

991 """

992 return self._data.dtype

993

994 @final

995 def ravel(self, order="C"):

996 """

997 Return an ndarray of the flattened values of the underlying data.

998

999 Returns

1000 -------

1001 numpy.ndarray

1002 Flattened array.

1003

1004 See Also

1005 --------

1006 numpy.ndarray.ravel : Return a flattened array.

1007 """

1008 warnings.warn(

1009 "Index.ravel returning ndarray is deprecated; in a future version "

1010 "this will return a view on self.",

1011 FutureWarning,

1012 stacklevel=find_stack_level(),

1013 )

1014 if needs_i8_conversion(self.dtype):

1015 # Item "ndarray[Any, Any]" of "Union[ExtensionArray, ndarray[Any, Any]]"

1016 # has no attribute "_ndarray"

1017 values = self._data._ndarray # type: ignore[union-attr]

1018 elif is_interval_dtype(self.dtype):

1019 values = np.asarray(self._data)

1020 else:

1021 values = self._get_engine_target()

1022 return values.ravel(order=order)

1023

1024 def view(self, cls=None):

1025

1026 # we need to see if we are subclassing an

1027 # index type here

1028 if cls is not None and not hasattr(cls, "_typ"):

1029 dtype = cls

1030 if isinstance(cls, str):

1031 dtype = pandas_dtype(cls)

1032

1033 if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion(

1034 dtype

1035 ):

1036 if dtype.kind == "m" and dtype != "m8[ns]":

1037 # e.g. m8[s]

1038 return self._data.view(cls)

1039

1040 idx_cls = self._dtype_to_subclass(dtype)

1041 # NB: we only get here for subclasses that override

1042 # _data_cls such that it is a type and not a tuple

1043 # of types.

1044 arr_cls = idx_cls._data_cls

1045 arr = arr_cls(self._data.view("i8"), dtype=dtype)

1046 return idx_cls._simple_new(arr, name=self.name)

1047

1048 result = self._data.view(cls)

1049 else:

1050 result = self._view()

1051 if isinstance(result, Index):

1052 result._id = self._id

1053 return result

1054

1055 def astype(self, dtype, copy: bool = True):

1056 """

1057 Create an Index with values cast to dtypes.

1058

1059 The class of a new Index is determined by dtype. When conversion is

1060 impossible, a TypeError exception is raised.

1061

1062 Parameters

1063 ----------

1064 dtype : numpy dtype or pandas type

1065 Note that any signed integer `dtype` is treated as ``'int64'``,

1066 and any unsigned integer `dtype` is treated as ``'uint64'``,

1067 regardless of the size.

1068 copy : bool, default True

1069 By default, astype always returns a newly allocated object.

1070 If copy is set to False and internal requirements on dtype are

1071 satisfied, the original data is used to create a new Index

1072 or the original Index is returned.

1073

1074 Returns

1075 -------

1076 Index

1077 Index with values cast to specified dtype.

1078 """

1079 if dtype is not None:

1080 dtype = pandas_dtype(dtype)

1081

1082 if is_dtype_equal(self.dtype, dtype):

1083 # Ensure that self.astype(self.dtype) is self

1084 return self.copy() if copy else self

1085

1086 values = self._data

1087 if isinstance(values, ExtensionArray):

1088 if isinstance(dtype, np.dtype) and dtype.kind == "M" and is_unitless(dtype):

1089 # TODO(2.0): remove this special-casing once this is enforced

1090 # in DTA.astype

1091 raise TypeError(f"Cannot cast {type(self).__name__} to dtype")

1092

1093 with rewrite_exception(type(values).__name__, type(self).__name__):

1094 new_values = values.astype(dtype, copy=copy)

1095

1096 elif is_float_dtype(self.dtype) and needs_i8_conversion(dtype):

1097 # NB: this must come before the ExtensionDtype check below

1098 # TODO: this differs from Series behavior; can/should we align them?

1099 raise TypeError(

1100 f"Cannot convert Float64Index to dtype {dtype}; integer "

1101 "values are required for conversion"

1102 )

1103

1104 elif isinstance(dtype, ExtensionDtype):

1105 cls = dtype.construct_array_type()

1106 # Note: for RangeIndex and CategoricalDtype self vs self._values

1107 # behaves differently here.

1108 new_values = cls._from_sequence(self, dtype=dtype, copy=copy)

1109

1110 else:

1111 try:

1112 if dtype == str:

1113 # GH#38607

1114 new_values = values.astype(dtype, copy=copy)

1115 else:

1116 # GH#13149 specifically use astype_nansafe instead of astype

1117 new_values = astype_nansafe(values, dtype=dtype, copy=copy)

1118 except IntCastingNaNError:

1119 raise

1120 except (TypeError, ValueError) as err:

1121 if dtype.kind == "u" and "losslessly" in str(err):

1122 # keep the message from _astype_float_to_int_nansafe

1123 raise

1124 raise TypeError(

1125 f"Cannot cast {type(self).__name__} to dtype {dtype}"

1126 ) from err

1127

1128 # pass copy=False because any copying will be done in the astype above

1129 if self._is_backward_compat_public_numeric_index:

1130 # this block is needed so e.g. NumericIndex[int8].astype("int32") returns

1131 # NumericIndex[int32] and not Int64Index with dtype int64.

1132 # When Int64Index etc. are removed from the code base, removed this also.

1133 if isinstance(dtype, np.dtype) and is_numeric_dtype(dtype):

1134 return self._constructor(

1135 new_values, name=self.name, dtype=dtype, copy=False

1136 )

1137 return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)

1138

1139 _index_shared_docs[

1140 "take"

1141 ] = """

1142 Return a new %(klass)s of the values selected by the indices.

1143

1144 For internal compatibility with numpy arrays.

1145

1146 Parameters

1147 ----------

1148 indices : array-like

1149 Indices to be taken.

1150 axis : int, optional

1151 The axis over which to select values, always 0.

1152 allow_fill : bool, default True

1153 fill_value : scalar, default None

1154 If allow_fill=True and fill_value is not None, indices specified by

1155 -1 are regarded as NA. If Index doesn't hold NA, raise ValueError.

1156

1157 Returns

1158 -------

1159 Index

1160 An index formed of elements at the given indices. Will be the same

1161 type as self, except for RangeIndex.

1162

1163 See Also

1164 --------

1165 numpy.ndarray.take: Return an array formed from the

1166 elements of a at the given indices.

1167 """

1168

1169 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)

1170 def take(

1171 self, indices, axis: int = 0, allow_fill: bool = True, fill_value=None, **kwargs

1172 ):

1173 if kwargs:

1174 nv.validate_take((), kwargs)

1175 if is_scalar(indices):

1176 raise TypeError("Expected indices to be array-like")

1177 indices = ensure_platform_int(indices)

1178 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)

1179

1180 # Note: we discard fill_value and use self._na_value, only relevant

1181 # in the case where allow_fill is True and fill_value is not None

1182 values = self._values

1183 if isinstance(values, np.ndarray):

1184 taken = algos.take(

1185 values, indices, allow_fill=allow_fill, fill_value=self._na_value

1186 )

1187 else:

1188 # algos.take passes 'axis' keyword which not all EAs accept

1189 taken = values.take(

1190 indices, allow_fill=allow_fill, fill_value=self._na_value

1191 )

1192 # _constructor so RangeIndex->Int64Index

1193 return self._constructor._simple_new(taken, name=self.name)

1194

1195 @final

1196 def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:

1197 """

1198 We only use pandas-style take when allow_fill is True _and_

1199 fill_value is not None.

1200 """

1201 if allow_fill and fill_value is not None:

1202 # only fill if we are passing a non-None fill_value

1203 if self._can_hold_na:

1204 if (indices < -1).any():

1205 raise ValueError(

1206 "When allow_fill=True and fill_value is not None, "

1207 "all indices must be >= -1"

1208 )

1209 else:

1210 cls_name = type(self).__name__

1211 raise ValueError(

1212 f"Unable to fill values because {cls_name} cannot contain NA"

1213 )

1214 else:

1215 allow_fill = False

1216 return allow_fill

1217

1218 _index_shared_docs[

1219 "repeat"

1220 ] = """

1221 Repeat elements of a %(klass)s.

1222

1223 Returns a new %(klass)s where each element of the current %(klass)s

1224 is repeated consecutively a given number of times.

1225

1226 Parameters

1227 ----------

1228 repeats : int or array of ints

1229 The number of repetitions for each element. This should be a

1230 non-negative integer. Repeating 0 times will return an empty

1231 %(klass)s.

1232 axis : None

1233 Must be ``None``. Has no effect but is accepted for compatibility

1234 with numpy.

1235

1236 Returns

1237 -------

1238 repeated_index : %(klass)s

1239 Newly created %(klass)s with repeated elements.

1240

1241 See Also

1242 --------

1243 Series.repeat : Equivalent function for Series.

1244 numpy.repeat : Similar method for :class:`numpy.ndarray`.

1245

1246 Examples

1247 --------

1248 >>> idx = pd.Index(['a', 'b', 'c'])

1249 >>> idx

1250 Index(['a', 'b', 'c'], dtype='object')

1251 >>> idx.repeat(2)

1252 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')

1253 >>> idx.repeat([1, 2, 3])

1254 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')

1255 """

1256

1257 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)

1258 def repeat(self, repeats, axis=None):

1259 repeats = ensure_platform_int(repeats)

1260 nv.validate_repeat((), {"axis": axis})

1261 res_values = self._values.repeat(repeats)

1262

1263 # _constructor so RangeIndex->Int64Index

1264 return self._constructor._simple_new(res_values, name=self.name)

1265

1266 # --------------------------------------------------------------------

1267 # Copying Methods

1268

1269 def copy(

1270 self: _IndexT,

1271 name: Hashable | None = None,

1272 deep: bool = False,

1273 dtype: Dtype | None = None,

1274 names: Sequence[Hashable] | None = None,

1275 ) -> _IndexT:

1276 """

1277 Make a copy of this object.

1278

1279 Name and dtype sets those attributes on the new object.

1280

1281 Parameters

1282 ----------

1283 name : Label, optional

1284 Set name for new object.

1285 deep : bool, default False

1286 dtype : numpy dtype or pandas type, optional

1287 Set dtype for new object.

1288

1289 .. deprecated:: 1.2.0

1290 use ``astype`` method instead.

1291 names : list-like, optional

1292 Kept for compatibility with MultiIndex. Should not be used.

1293

1294 .. deprecated:: 1.4.0

1295 use ``name`` instead.

1296

1297 Returns

1298 -------

1299 Index

1300 Index refer to new object which is a copy of this object.

1301

1302 Notes

1303 -----

1304 In most cases, there should be no functional difference from using

1305 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.

1306 """

1307 if names is not None:

1308 warnings.warn(

1309 "parameter names is deprecated and will be removed in a future "

1310 "version. Use the name parameter instead.",

1311 FutureWarning,

1312 stacklevel=find_stack_level(),

1313 )

1314

1315 name = self._validate_names(name=name, names=names, deep=deep)[0]

1316 if deep:

1317 new_data = self._data.copy()

1318 new_index = type(self)._simple_new(new_data, name=name)

1319 else:

1320 new_index = self._rename(name=name)

1321

1322 if dtype:

1323 warnings.warn(

1324 "parameter dtype is deprecated and will be removed in a future "

1325 "version. Use the astype method instead.",

1326 FutureWarning,

1327 stacklevel=find_stack_level(),

1328 )

1329 new_index = new_index.astype(dtype)

1330 return new_index

1331

1332 @final

1333 def __copy__(self: _IndexT, **kwargs) -> _IndexT:

1334 return self.copy(**kwargs)

1335

1336 @final

1337 def __deepcopy__(self: _IndexT, memo=None) -> _IndexT:

1338 """

1339 Parameters

1340 ----------

1341 memo, default None

1342 Standard signature. Unused

1343 """

1344 return self.copy(deep=True)

1345

1346 # --------------------------------------------------------------------

1347 # Rendering Methods

1348

1349 @final

1350 def __repr__(self) -> str_t:

1351 """

1352 Return a string representation for this object.

1353 """

1354 klass_name = type(self).__name__

1355 data = self._format_data()

1356 attrs = self._format_attrs()

1357 space = self._format_space()

1358 attrs_str = [f"{k}={v}" for k, v in attrs]

1359 prepr = f",{space}".join(attrs_str)

1360

1361 # no data provided, just attributes

1362 if data is None:

1363 data = ""

1364

1365 return f"{klass_name}({data}{prepr})"

1366

1367 def _format_space(self) -> str_t:

1368

1369 # using space here controls if the attributes

1370 # are line separated or not (the default)

1371

1372 # max_seq_items = get_option('display.max_seq_items')

1373 # if len(self) > max_seq_items:

1374 # space = "\n%s" % (' ' * (len(klass) + 1))

1375 return " "

1376

1377 @property

1378 def _formatter_func(self):

1379 """

1380 Return the formatter function.

1381 """

1382 return default_pprint

1383

1384 def _format_data(self, name=None) -> str_t:

1385 """

1386 Return the formatted data as a unicode string.

1387 """

1388 # do we want to justify (only do so for non-objects)

1389 is_justify = True

1390

1391 if self.inferred_type == "string":

1392 is_justify = False

1393 elif self.inferred_type == "categorical":

1394 self = cast("CategoricalIndex", self)

1395 if is_object_dtype(self.categories):

1396 is_justify = False

1397

1398 return format_object_summary(

1399 self,

1400 self._formatter_func,

1401 is_justify=is_justify,

1402 name=name,

1403 line_break_each_value=self._is_multi,

1404 )

1405

1406 def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:

1407 """

1408 Return a list of tuples of the (attr,formatted_value).

1409 """

1410 attrs: list[tuple[str_t, str_t | int | bool | None]] = []

1411

1412 if not self._is_multi:

1413 attrs.append(("dtype", f"'{self.dtype}'"))

1414

1415 if self.name is not None:

1416 attrs.append(("name", default_pprint(self.name)))

1417 elif self._is_multi and any(x is not None for x in self.names):

1418 attrs.append(("names", default_pprint(self.names)))

1419

1420 max_seq_items = get_option("display.max_seq_items") or len(self)

1421 if len(self) > max_seq_items:

1422 attrs.append(("length", len(self)))

1423 return attrs

1424

1425 @final

1426 def _get_level_names(self) -> Hashable | Sequence[Hashable]:

1427 """

1428 Return a name or list of names with None replaced by the level number.

1429 """

1430 if self._is_multi:

1431 return [

1432 level if name is None else name for level, name in enumerate(self.names)

1433 ]

1434 else:

1435 return 0 if self.name is None else self.name

1436

1437 @final

1438 def _mpl_repr(self) -> np.ndarray:

1439 # how to represent ourselves to matplotlib

1440 if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M":

1441 return cast(np.ndarray, self.values)

1442 return self.astype(object, copy=False)._values

1443

1444 def format(

1445 self,

1446 name: bool = False,

1447 formatter: Callable | None = None,

1448 na_rep: str_t = "NaN",

1449 ) -> list[str_t]:

1450 """

1451 Render a string representation of the Index.

1452 """

1453 header = []

1454 if name:

1455 header.append(

1456 pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))

1457 if self.name is not None

1458 else ""

1459 )

1460

1461 if formatter is not None:

1462 return header + list(self.map(formatter))

1463

1464 return self._format_with_header(header, na_rep=na_rep)

1465

1466 def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]:

1467 from pandas.io.formats.format import format_array

1468

1469 values = self._values

1470

1471 if is_object_dtype(values.dtype):

1472 values = cast(np.ndarray, values)

1473 values = lib.maybe_convert_objects(values, safe=True)

1474

1475 result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]

1476

1477 # could have nans

1478 mask = is_float_nan(values)

1479 if mask.any():

1480 result_arr = np.array(result)

1481 result_arr[mask] = na_rep

1482 result = result_arr.tolist()

1483 else:

1484 result = trim_front(format_array(values, None, justify="left"))

1485 return header + result

1486

1487 @final

1488 def to_native_types(self, slicer=None, **kwargs) -> np.ndarray:

1489 """

1490 Format specified values of `self` and return them.

1491

1492 .. deprecated:: 1.2.0

1493

1494 Parameters

1495 ----------

1496 slicer : int, array-like

1497 An indexer into `self` that specifies which values

1498 are used in the formatting process.

1499 kwargs : dict

1500 Options for specifying how the values should be formatted.

1501 These options include the following:

1502

1503 1) na_rep : str

1504 The value that serves as a placeholder for NULL values

1505 2) quoting : bool or None

1506 Whether or not there are quoted values in `self`

1507 3) date_format : str

1508 The format used to represent date-like values.

1509

1510 Returns

1511 -------

1512 numpy.ndarray

1513 Formatted values.

1514 """

1515 warnings.warn(

1516 "The 'to_native_types' method is deprecated and will be removed in "

1517 "a future version. Use 'astype(str)' instead.",

1518 FutureWarning,

1519 stacklevel=find_stack_level(),

1520 )

1521 values = self

1522 if slicer is not None:

1523 values = values[slicer]

1524 return values._format_native_types(**kwargs)

1525

1526 def _format_native_types(

1527 self, *, na_rep="", quoting=None, **kwargs

1528 ) -> npt.NDArray[np.object_]:

1529 """

1530 Actually format specific types of the index.

1531 """

1532 mask = isna(self)

1533 if not self.is_object() and not quoting:

1534 values = np.asarray(self).astype(str)

1535 else:

1536 values = np.array(self, dtype=object, copy=True)

1537

1538 values[mask] = na_rep

1539 return values

1540

1541 def _summary(self, name=None) -> str_t:

1542 """

1543 Return a summarized representation.

1544

1545 Parameters

1546 ----------

1547 name : str

1548 name to use in the summary representation

1549

1550 Returns

1551 -------

1552 String with a summarized representation of the index

1553 """

1554 if len(self) > 0:

1555 head = self[0]

1556 if hasattr(head, "format") and not isinstance(head, str):

1557 head = head.format()

1558 elif needs_i8_conversion(self.dtype):

1559 # e.g. Timedelta, display as values, not quoted

1560 head = self._formatter_func(head).replace("'", "")

1561 tail = self[-1]

1562 if hasattr(tail, "format") and not isinstance(tail, str):

1563 tail = tail.format()

1564 elif needs_i8_conversion(self.dtype):

1565 # e.g. Timedelta, display as values, not quoted

1566 tail = self._formatter_func(tail).replace("'", "")

1567

1568 index_summary = f", {head} to {tail}"

1569 else:

1570 index_summary = ""

1571

1572 if name is None:

1573 name = type(self).__name__

1574 return f"{name}: {len(self)} entries{index_summary}"

1575

1576 # --------------------------------------------------------------------

1577 # Conversion Methods

1578

1579 def to_flat_index(self: _IndexT) -> _IndexT:

1580 """

1581 Identity method.

1582

1583 This is implemented for compatibility with subclass implementations

1584 when chaining.

1585

1586 Returns

1587 -------

1588 pd.Index

1589 Caller.

1590

1591 See Also

1592 --------

1593 MultiIndex.to_flat_index : Subclass implementation.

1594 """

1595 return self

1596

1597 def to_series(self, index=None, name: Hashable = None) -> Series:

1598 """

1599 Create a Series with both index and values equal to the index keys.

1600

1601 Useful with map for returning an indexer based on an index.

1602

1603 Parameters

1604 ----------

1605 index : Index, optional

1606 Index of resulting Series. If None, defaults to original index.

1607 name : str, optional

1608 Name of resulting Series. If None, defaults to name of original

1609 index.

1610

1611 Returns

1612 -------

1613 Series

1614 The dtype will be based on the type of the Index values.

1615

1616 See Also

1617 --------

1618 Index.to_frame : Convert an Index to a DataFrame.

1619 Series.to_frame : Convert Series to DataFrame.

1620

1621 Examples

1622 --------

1623 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')

1624

1625 By default, the original Index and original name is reused.

1626

1627 >>> idx.to_series()

1628 animal

1629 Ant Ant

1630 Bear Bear

1631 Cow Cow

1632 Name: animal, dtype: object

1633

1634 To enforce a new Index, specify new labels to ``index``:

1635

1636 >>> idx.to_series(index=[0, 1, 2])

1637 0 Ant

1638 1 Bear

1639 2 Cow

1640 Name: animal, dtype: object

1641

1642 To override the name of the resulting column, specify `name`:

1643

1644 >>> idx.to_series(name='zoo')

1645 animal

1646 Ant Ant

1647 Bear Bear

1648 Cow Cow

1649 Name: zoo, dtype: object

1650 """

1651 from pandas import Series

1652

1653 if index is None:

1654 index = self._view()

1655 if name is None:

1656 name = self.name

1657

1658 return Series(self._values.copy(), index=index, name=name)

1659

1660 def to_frame(

1661 self, index: bool = True, name: Hashable = lib.no_default

1662 ) -> DataFrame:

1663 """

1664 Create a DataFrame with a column containing the Index.

1665

1666 Parameters

1667 ----------

1668 index : bool, default True

1669 Set the index of the returned DataFrame as the original Index.

1670

1671 name : object, default None

1672 The passed name should substitute for the index name (if it has

1673 one).

1674

1675 Returns

1676 -------

1677 DataFrame

1678 DataFrame containing the original Index data.

1679

1680 See Also

1681 --------

1682 Index.to_series : Convert an Index to a Series.

1683 Series.to_frame : Convert Series to DataFrame.

1684

1685 Examples

1686 --------

1687 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')

1688 >>> idx.to_frame()

1689 animal

1690 animal

1691 Ant Ant

1692 Bear Bear

1693 Cow Cow

1694

1695 By default, the original Index is reused. To enforce a new Index:

1696

1697 >>> idx.to_frame(index=False)

1698 animal

1699 0 Ant

1700 1 Bear

1701 2 Cow

1702

1703 To override the name of the resulting column, specify `name`:

1704

1705 >>> idx.to_frame(index=False, name='zoo')

1706 zoo

1707 0 Ant

1708 1 Bear

1709 2 Cow

1710 """

1711 from pandas import DataFrame

1712

1713 if name is None:

1714 warnings.warn(

1715 "Explicitly passing `name=None` currently preserves the Index's name "

1716 "or uses a default name of 0. This behaviour is deprecated, and in "

1717 "the future `None` will be used as the name of the resulting "

1718 "DataFrame column.",

1719 FutureWarning,

1720 stacklevel=find_stack_level(),

1721 )

1722 name = lib.no_default

1723

1724 if name is lib.no_default:

1725 name = self._get_level_names()

1726 result = DataFrame({name: self._values.copy()})

1727

1728 if index:

1729 result.index = self

1730 return result

1731

1732 # --------------------------------------------------------------------

1733 # Name-Centric Methods

1734

1735 @property

1736 def name(self) -> Hashable:

1737 """

1738 Return Index or MultiIndex name.

1739 """

1740 return self._name

1741

1742 @name.setter

1743 def name(self, value: Hashable) -> None:

1744 if self._no_setting_name:

1745 # Used in MultiIndex.levels to avoid silently ignoring name updates.

1746 raise RuntimeError(

1747 "Cannot set name on a level of a MultiIndex. Use "

1748 "'MultiIndex.set_names' instead."

1749 )

1750 maybe_extract_name(value, None, type(self))

1751 self._name = value

1752

1753 @final

1754 def _validate_names(

1755 self, name=None, names=None, deep: bool = False

1756 ) -> list[Hashable]:

1757 """

1758 Handles the quirks of having a singular 'name' parameter for general

1759 Index and plural 'names' parameter for MultiIndex.

1760 """

1761 from copy import deepcopy

1762

1763 if names is not None and name is not None:

1764 raise TypeError("Can only provide one of `names` and `name`")

1765 elif names is None and name is None:

1766 new_names = deepcopy(self.names) if deep else self.names

1767 elif names is not None:

1768 if not is_list_like(names):

1769 raise TypeError("Must pass list-like as `names`.")

1770 new_names = names

1771 elif not is_list_like(name):

1772 new_names = [name]

1773 else:

1774 new_names = name

1775

1776 if len(new_names) != len(self.names):

1777 raise ValueError(

1778 f"Length of new names must be {len(self.names)}, got {len(new_names)}"

1779 )

1780

1781 # All items in 'new_names' need to be hashable

1782 validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name")

1783

1784 return new_names

1785

1786 def _get_default_index_names(

1787 self, names: Hashable | Sequence[Hashable] | None = None, default=None

1788 ) -> list[Hashable]:

1789 """

1790 Get names of index.

1791

1792 Parameters

1793 ----------

1794 names : int, str or 1-dimensional list, default None

1795 Index names to set.

1796 default : str

1797 Default name of index.

1798

1799 Raises

1800 ------

1801 TypeError

1802 if names not str or list-like

1803 """

1804 from pandas.core.indexes.multi import MultiIndex

1805

1806 if names is not None:

1807 if isinstance(names, str) or isinstance(names, int):

1808 names = [names]

1809

1810 if not isinstance(names, list) and names is not None:

1811 raise ValueError("Index names must be str or 1-dimensional list")

1812

1813 if not names:

1814 if isinstance(self, MultiIndex):

1815 names = com.fill_missing_names(self.names)

1816 else:

1817 names = [default] if self.name is None else [self.name]

1818

1819 return names

1820

1821 def _get_names(self) -> FrozenList:

1822 return FrozenList((self.name,))

1823

1824 def _set_names(self, values, *, level=None) -> None:

1825 """

1826 Set new names on index. Each name has to be a hashable type.

1827

1828 Parameters

1829 ----------

1830 values : str or sequence

1831 name(s) to set

1832 level : int, level name, or sequence of int/level names (default None)

1833 If the index is a MultiIndex (hierarchical), level(s) to set (None

1834 for all levels). Otherwise level must be None

1835

1836 Raises

1837 ------

1838 TypeError if each name is not hashable.

1839 """

1840 if not is_list_like(values):

1841 raise ValueError("Names must be a list-like")

1842 if len(values) != 1:

1843 raise ValueError(f"Length of new names must be 1, got {len(values)}")

1844

1845 # GH 20527

1846 # All items in 'name' need to be hashable:

1847 validate_all_hashable(*values, error_name=f"{type(self).__name__}.name")

1848

1849 self._name = values[0]

1850

1851 names = property(fset=_set_names, fget=_get_names)

1852

1853 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"])

1854 def set_names(self, names, level=None, inplace: bool = False):

1855 """

1856 Set Index or MultiIndex name.

1857

1858 Able to set new names partially and by level.

1859

1860 Parameters

1861 ----------

1862

1863 names : label or list of label or dict-like for MultiIndex

1864 Name(s) to set.

1865

1866 .. versionchanged:: 1.3.0

1867

1868 level : int, label or list of int or label, optional

1869 If the index is a MultiIndex and names is not dict-like, level(s) to set

1870 (None for all levels). Otherwise level must be None.

1871

1872 .. versionchanged:: 1.3.0

1873

1874 inplace : bool, default False

1875 Modifies the object directly, instead of creating a new Index or

1876 MultiIndex.

1877

1878 Returns

1879 -------

1880 Index or None

1881 The same type as the caller or None if ``inplace=True``.

1882

1883 See Also

1884 --------

1885 Index.rename : Able to set new names without level.

1886

1887 Examples

1888 --------

1889 >>> idx = pd.Index([1, 2, 3, 4])

1890 >>> idx

1891 Int64Index([1, 2, 3, 4], dtype='int64')

1892 >>> idx.set_names('quarter')

1893 Int64Index([1, 2, 3, 4], dtype='int64', name='quarter')

1894

1895 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],

1896 ... [2018, 2019]])

1897 >>> idx

1898 MultiIndex([('python', 2018),

1899 ('python', 2019),

1900 ( 'cobra', 2018),

1901 ( 'cobra', 2019)],

1902 )

1903 >>> idx.set_names(['kind', 'year'], inplace=True)

1904 >>> idx

1905 MultiIndex([('python', 2018),

1906 ('python', 2019),

1907 ( 'cobra', 2018),

1908 ( 'cobra', 2019)],

1909 names=['kind', 'year'])

1910 >>> idx.set_names('species', level=0)

1911 MultiIndex([('python', 2018),

1912 ('python', 2019),

1913 ( 'cobra', 2018),

1914 ( 'cobra', 2019)],

1915 names=['species', 'year'])

1916

1917 When renaming levels with a dict, levels can not be passed.

1918

1919 >>> idx.set_names({'kind': 'snake'})

1920 MultiIndex([('python', 2018),

1921 ('python', 2019),

1922 ( 'cobra', 2018),

1923 ( 'cobra', 2019)],

1924 names=['snake', 'year'])

1925 """

1926 if level is not None and not isinstance(self, ABCMultiIndex):

1927 raise ValueError("Level must be None for non-MultiIndex")

1928

1929 elif level is not None and not is_list_like(level) and is_list_like(names):

1930 raise TypeError("Names must be a string when a single level is provided.")

1931

1932 elif not is_list_like(names) and level is None and self.nlevels > 1:

1933 raise TypeError("Must pass list-like as `names`.")

1934

1935 elif is_dict_like(names) and not isinstance(self, ABCMultiIndex):

1936 raise TypeError("Can only pass dict-like as `names` for MultiIndex.")

1937

1938 elif is_dict_like(names) and level is not None:

1939 raise TypeError("Can not pass level for dictlike `names`.")

1940

1941 if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None:

1942 # Transform dict to list of new names and corresponding levels

1943 level, names_adjusted = [], []

1944 for i, name in enumerate(self.names):

1945 if name in names.keys():

1946 level.append(i)

1947 names_adjusted.append(names[name])

1948 names = names_adjusted

1949

1950 if not is_list_like(names):

1951 names = [names]

1952 if level is not None and not is_list_like(level):

1953 level = [level]

1954

1955 if inplace:

1956 idx = self

1957 else:

1958 idx = self._view()

1959

1960 idx._set_names(names, level=level)

1961 if not inplace:

1962 return idx

1963

1964 def rename(self, name, inplace=False):

1965 """

1966 Alter Index or MultiIndex name.

1967

1968 Able to set new names without level. Defaults to returning new index.

1969 Length of names must match number of levels in MultiIndex.

1970

1971 Parameters

1972 ----------

1973 name : label or list of labels

1974 Name(s) to set.

1975 inplace : bool, default False

1976 Modifies the object directly, instead of creating a new Index or

1977 MultiIndex.

1978

1979 Returns

1980 -------

1981 Index or None

1982 The same type as the caller or None if ``inplace=True``.

1983

1984 See Also

1985 --------

1986 Index.set_names : Able to set new names partially and by level.

1987

1988 Examples

1989 --------

1990 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')

1991 >>> idx.rename('grade')

1992 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')

1993

1994 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],

1995 ... [2018, 2019]],

1996 ... names=['kind', 'year'])

1997 >>> idx

1998 MultiIndex([('python', 2018),

1999 ('python', 2019),

2000 ( 'cobra', 2018),

2001 ( 'cobra', 2019)],

2002 names=['kind', 'year'])

2003 >>> idx.rename(['species', 'year'])

2004 MultiIndex([('python', 2018),

2005 ('python', 2019),

2006 ( 'cobra', 2018),

2007 ( 'cobra', 2019)],

2008 names=['species', 'year'])

2009 >>> idx.rename('species')

2010 Traceback (most recent call last):

2011 TypeError: Must pass list-like as `names`.

2012 """

2013 return self.set_names([name], inplace=inplace)

2014

2015 # --------------------------------------------------------------------

2016 # Level-Centric Methods

2017

2018 @property

2019 def nlevels(self) -> int:

2020 """

2021 Number of levels.

2022 """

2023 return 1

2024

2025 def _sort_levels_monotonic(self: _IndexT) -> _IndexT:

2026 """

2027 Compat with MultiIndex.

2028 """

2029 return self

2030

2031 @final

2032 def _validate_index_level(self, level) -> None:

2033 """

2034 Validate index level.

2035

2036 For single-level Index getting level number is a no-op, but some

2037 verification must be done like in MultiIndex.

2038

2039 """

2040 if isinstance(level, int):

2041 if level < 0 and level != -1:

2042 raise IndexError(

2043 "Too many levels: Index has only 1 level, "

2044 f"{level} is not a valid level number"

2045 )

2046 elif level > 0:

2047 raise IndexError(

2048 f"Too many levels: Index has only 1 level, not {level + 1}"

2049 )

2050 elif level != self.name:

2051 raise KeyError(

2052 f"Requested level ({level}) does not match index name ({self.name})"

2053 )

2054

2055 def _get_level_number(self, level) -> int:

2056 self._validate_index_level(level)

2057 return 0

2058

2059 def sortlevel(self, level=None, ascending=True, sort_remaining=None):

2060 """

2061 For internal compatibility with the Index API.

2062

2063 Sort the Index. This is for compat with MultiIndex

2064

2065 Parameters

2066 ----------

2067 ascending : bool, default True

2068 False to sort in descending order

2069

2070 level, sort_remaining are compat parameters

2071

2072 Returns

2073 -------

2074 Index

2075 """

2076 if not isinstance(ascending, (list, bool)):

2077 raise TypeError(

2078 "ascending must be a single bool value or"

2079 "a list of bool values of length 1"

2080 )

2081

2082 if isinstance(ascending, list):

2083 if len(ascending) != 1:

2084 raise TypeError("ascending must be a list of bool values of length 1")

2085 ascending = ascending[0]

2086

2087 if not isinstance(ascending, bool):

2088 raise TypeError("ascending must be a bool value")

2089

2090 return self.sort_values(return_indexer=True, ascending=ascending)

2091

2092 def _get_level_values(self, level) -> Index:

2093 """

2094 Return an Index of values for requested level.

2095

2096 This is primarily useful to get an individual level of values from a

2097 MultiIndex, but is provided on Index as well for compatibility.

2098

2099 Parameters

2100 ----------

2101 level : int or str

2102 It is either the integer position or the name of the level.

2103

2104 Returns

2105 -------

2106 Index

2107 Calling object, as there is only one level in the Index.

2108

2109 See Also

2110 --------

2111 MultiIndex.get_level_values : Get values for a level of a MultiIndex.

2112

2113 Notes

2114 -----

2115 For Index, level should be 0, since there are no multiple levels.

2116

2117 Examples

2118 --------

2119 >>> idx = pd.Index(list('abc'))

2120 >>> idx

2121 Index(['a', 'b', 'c'], dtype='object')

2122

2123 Get level values by supplying `level` as integer:

2124

2125 >>> idx.get_level_values(0)

2126 Index(['a', 'b', 'c'], dtype='object')

2127 """

2128 self._validate_index_level(level)

2129 return self

2130

2131 get_level_values = _get_level_values

2132

2133 @final

2134 def droplevel(self, level=0):

2135 """

2136 Return index with requested level(s) removed.

2137

2138 If resulting index has only 1 level left, the result will be

2139 of Index type, not MultiIndex.

2140

2141 Parameters

2142 ----------

2143 level : int, str, or list-like, default 0

2144 If a string is given, must be the name of a level

2145 If list-like, elements must be names or indexes of levels.

2146

2147 Returns

2148 -------

2149 Index or MultiIndex

2150

2151 Examples

2152 --------

2153 >>> mi = pd.MultiIndex.from_arrays(

2154 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])

2155 >>> mi

2156 MultiIndex([(1, 3, 5),

2157 (2, 4, 6)],

2158 names=['x', 'y', 'z'])

2159

2160 >>> mi.droplevel()

2161 MultiIndex([(3, 5),

2162 (4, 6)],

2163 names=['y', 'z'])

2164

2165 >>> mi.droplevel(2)

2166 MultiIndex([(1, 3),

2167 (2, 4)],

2168 names=['x', 'y'])

2169

2170 >>> mi.droplevel('z')

2171 MultiIndex([(1, 3),

2172 (2, 4)],

2173 names=['x', 'y'])

2174

2175 >>> mi.droplevel(['x', 'y'])

2176 Int64Index([5, 6], dtype='int64', name='z')

2177 """

2178 if not isinstance(level, (tuple, list)):

2179 level = [level]

2180

2181 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]

2182

2183 return self._drop_level_numbers(levnums)

2184

2185 @final

2186 def _drop_level_numbers(self, levnums: list[int]):

2187 """

2188 Drop MultiIndex levels by level _number_, not name.

2189 """

2190

2191 if not levnums and not isinstance(self, ABCMultiIndex):

2192 return self

2193 if len(levnums) >= self.nlevels:

2194 raise ValueError(

2195 f"Cannot remove {len(levnums)} levels from an index with "

2196 f"{self.nlevels} levels: at least one level must be left."

2197 )

2198 # The two checks above guarantee that here self is a MultiIndex

2199 self = cast("MultiIndex", self)

2200

2201 new_levels = list(self.levels)

2202 new_codes = list(self.codes)

2203 new_names = list(self.names)

2204

2205 for i in levnums:

2206 new_levels.pop(i)

2207 new_codes.pop(i)

2208 new_names.pop(i)

2209

2210 if len(new_levels) == 1:

2211 lev = new_levels[0]

2212

2213 if len(lev) == 0:

2214 # If lev is empty, lev.take will fail GH#42055

2215 if len(new_codes[0]) == 0:

2216 # GH#45230 preserve RangeIndex here

2217 # see test_reset_index_empty_rangeindex

2218 result = lev[:0]

2219 else:

2220 res_values = algos.take(lev._values, new_codes[0], allow_fill=True)

2221 # _constructor instead of type(lev) for RangeIndex compat GH#35230

2222 result = lev._constructor._simple_new(res_values, name=new_names[0])

2223 else:

2224 # set nan if needed

2225 mask = new_codes[0] == -1

2226 result = new_levels[0].take(new_codes[0])

2227 if mask.any():

2228 result = result.putmask(mask, np.nan)

2229

2230 result._name = new_names[0]

2231

2232 return result

2233 else:

2234 from pandas.core.indexes.multi import MultiIndex

2235

2236 return MultiIndex(

2237 levels=new_levels,

2238 codes=new_codes,

2239 names=new_names,

2240 verify_integrity=False,

2241 )

2242

2243 def _get_grouper_for_level(

2244 self,

2245 mapper,

2246 *,

2247 level=None,

2248 dropna: bool = True,

2249 ) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]:

2250 """

2251 Get index grouper corresponding to an index level

2252

2253 Parameters

2254 ----------

2255 mapper: Group mapping function or None

2256 Function mapping index values to groups

2257 level : int or None

2258 Index level, positional

2259 dropna : bool

2260 dropna from groupby

2261

2262 Returns

2263 -------

2264 grouper : Index

2265 Index of values to group on.

2266 labels : ndarray of int or None

2267 Array of locations in level_index.

2268 uniques : Index or None

2269 Index of unique values for level.

2270 """

2271 assert level is None or level == 0

2272 if mapper is None:

2273 grouper = self

2274 else:

2275 grouper = self.map(mapper)

2276

2277 return grouper, None, None

2278

2279 # --------------------------------------------------------------------

2280 # Introspection Methods

2281

2282 @cache_readonly

2283 @final

2284 def _can_hold_na(self) -> bool:

2285 if isinstance(self.dtype, ExtensionDtype):

2286 if isinstance(self.dtype, IntervalDtype):

2287 # FIXME(GH#45720): this is inaccurate for integer-backed

2288 # IntervalArray, but without it other.categories.take raises

2289 # in IntervalArray._cmp_method

2290 return True

2291 return self.dtype._can_hold_na

2292 if self.dtype.kind in ["i", "u", "b"]:

2293 return False

2294 return True

2295

2296 @final

2297 @property

2298 def is_monotonic(self) -> bool:

2299 """

2300 Alias for is_monotonic_increasing.

2301

2302 .. deprecated:: 1.5.0

2303 is_monotonic is deprecated and will be removed in a future version.

2304 Use is_monotonic_increasing instead.

2305 """

2306 warnings.warn(

2307 "is_monotonic is deprecated and will be removed in a future version. "

2308 "Use is_monotonic_increasing instead.",

2309 FutureWarning,

2310 stacklevel=find_stack_level(),

2311 )

2312 return self.is_monotonic_increasing

2313

2314 @property

2315 def is_monotonic_increasing(self) -> bool:

2316 """

2317 Return a boolean if the values are equal or increasing.

2318

2319 Examples

2320 --------

2321 >>> Index([1, 2, 3]).is_monotonic_increasing

2322 True

2323 >>> Index([1, 2, 2]).is_monotonic_increasing

2324 True

2325 >>> Index([1, 3, 2]).is_monotonic_increasing

2326 False

2327 """

2328 return self._engine.is_monotonic_increasing

2329

2330 @property

2331 def is_monotonic_decreasing(self) -> bool:

2332 """

2333 Return a boolean if the values are equal or decreasing.

2334

2335 Examples

2336 --------

2337 >>> Index([3, 2, 1]).is_monotonic_decreasing

2338 True

2339 >>> Index([3, 2, 2]).is_monotonic_decreasing

2340 True

2341 >>> Index([3, 1, 2]).is_monotonic_decreasing

2342 False

2343 """

2344 return self._engine.is_monotonic_decreasing

2345

2346 @final

2347 @property

2348 def _is_strictly_monotonic_increasing(self) -> bool:

2349 """

2350 Return if the index is strictly monotonic increasing

2351 (only increasing) values.

2352

2353 Examples

2354 --------

2355 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing

2356 True

2357 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing

2358 False

2359 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing

2360 False

2361 """

2362 return self.is_unique and self.is_monotonic_increasing

2363

2364 @final

2365 @property

2366 def _is_strictly_monotonic_decreasing(self) -> bool:

2367 """

2368 Return if the index is strictly monotonic decreasing

2369 (only decreasing) values.

2370

2371 Examples

2372 --------

2373 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing

2374 True

2375 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing

2376 False

2377 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing

2378 False

2379 """

2380 return self.is_unique and self.is_monotonic_decreasing

2381

2382 @cache_readonly

2383 def is_unique(self) -> bool:

2384 """

2385 Return if the index has unique values.

2386 """

2387 return self._engine.is_unique

2388

2389 @final

2390 @property

2391 def has_duplicates(self) -> bool:

2392 """

2393 Check if the Index has duplicate values.

2394

2395 Returns

2396 -------

2397 bool

2398 Whether or not the Index has duplicate values.

2399

2400 Examples

2401 --------

2402 >>> idx = pd.Index([1, 5, 7, 7])

2403 >>> idx.has_duplicates

2404 True

2405

2406 >>> idx = pd.Index([1, 5, 7])

2407 >>> idx.has_duplicates

2408 False

2409

2410 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2411 ... "Watermelon"]).astype("category")

2412 >>> idx.has_duplicates

2413 True

2414

2415 >>> idx = pd.Index(["Orange", "Apple",

2416 ... "Watermelon"]).astype("category")

2417 >>> idx.has_duplicates

2418 False

2419 """

2420 return not self.is_unique

2421

2422 @final

2423 def is_boolean(self) -> bool:

2424 """

2425 Check if the Index only consists of booleans.

2426

2427 Returns

2428 -------

2429 bool

2430 Whether or not the Index only consists of booleans.

2431

2432 See Also

2433 --------

2434 is_integer : Check if the Index only consists of integers.

2435 is_floating : Check if the Index is a floating type.

2436 is_numeric : Check if the Index only consists of numeric data.

2437 is_object : Check if the Index is of the object dtype.

2438 is_categorical : Check if the Index holds categorical data.

2439 is_interval : Check if the Index holds Interval objects.

2440 is_mixed : Check if the Index holds data with mixed data types.

2441

2442 Examples

2443 --------

2444 >>> idx = pd.Index([True, False, True])

2445 >>> idx.is_boolean()

2446 True

2447

2448 >>> idx = pd.Index(["True", "False", "True"])

2449 >>> idx.is_boolean()

2450 False

2451

2452 >>> idx = pd.Index([True, False, "True"])

2453 >>> idx.is_boolean()

2454 False

2455 """

2456 return self.inferred_type in ["boolean"]

2457

2458 @final

2459 def is_integer(self) -> bool:

2460 """

2461 Check if the Index only consists of integers.

2462

2463 Returns

2464 -------

2465 bool

2466 Whether or not the Index only consists of integers.

2467

2468 See Also

2469 --------

2470 is_boolean : Check if the Index only consists of booleans.

2471 is_floating : Check if the Index is a floating type.

2472 is_numeric : Check if the Index only consists of numeric data.

2473 is_object : Check if the Index is of the object dtype.

2474 is_categorical : Check if the Index holds categorical data.

2475 is_interval : Check if the Index holds Interval objects.

2476 is_mixed : Check if the Index holds data with mixed data types.

2477

2478 Examples

2479 --------

2480 >>> idx = pd.Index([1, 2, 3, 4])

2481 >>> idx.is_integer()

2482 True

2483

2484 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2485 >>> idx.is_integer()

2486 False

2487

2488 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])

2489 >>> idx.is_integer()

2490 False

2491 """

2492 return self.inferred_type in ["integer"]

2493

2494 @final

2495 def is_floating(self) -> bool:

2496 """

2497 Check if the Index is a floating type.

2498

2499 The Index may consist of only floats, NaNs, or a mix of floats,

2500 integers, or NaNs.

2501

2502 Returns

2503 -------

2504 bool

2505 Whether or not the Index only consists of only consists of floats, NaNs, or

2506 a mix of floats, integers, or NaNs.

2507

2508 See Also

2509 --------

2510 is_boolean : Check if the Index only consists of booleans.

2511 is_integer : Check if the Index only consists of integers.

2512 is_numeric : Check if the Index only consists of numeric data.

2513 is_object : Check if the Index is of the object dtype.

2514 is_categorical : Check if the Index holds categorical data.

2515 is_interval : Check if the Index holds Interval objects.

2516 is_mixed : Check if the Index holds data with mixed data types.

2517

2518 Examples

2519 --------

2520 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2521 >>> idx.is_floating()

2522 True

2523

2524 >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0])

2525 >>> idx.is_floating()

2526 True

2527

2528 >>> idx = pd.Index([1, 2, 3, 4, np.nan])

2529 >>> idx.is_floating()

2530 True

2531

2532 >>> idx = pd.Index([1, 2, 3, 4])

2533 >>> idx.is_floating()

2534 False

2535 """

2536 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]

2537

2538 @final

2539 def is_numeric(self) -> bool:

2540 """

2541 Check if the Index only consists of numeric data.

2542

2543 Returns

2544 -------

2545 bool

2546 Whether or not the Index only consists of numeric data.

2547

2548 See Also

2549 --------

2550 is_boolean : Check if the Index only consists of booleans.

2551 is_integer : Check if the Index only consists of integers.

2552 is_floating : Check if the Index is a floating type.

2553 is_object : Check if the Index is of the object dtype.

2554 is_categorical : Check if the Index holds categorical data.

2555 is_interval : Check if the Index holds Interval objects.

2556 is_mixed : Check if the Index holds data with mixed data types.

2557

2558 Examples

2559 --------

2560 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2561 >>> idx.is_numeric()

2562 True

2563

2564 >>> idx = pd.Index([1, 2, 3, 4.0])

2565 >>> idx.is_numeric()

2566 True

2567

2568 >>> idx = pd.Index([1, 2, 3, 4])

2569 >>> idx.is_numeric()

2570 True

2571

2572 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan])

2573 >>> idx.is_numeric()

2574 True

2575

2576 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"])

2577 >>> idx.is_numeric()

2578 False

2579 """

2580 return self.inferred_type in ["integer", "floating"]

2581

2582 @final

2583 def is_object(self) -> bool:

2584 """

2585 Check if the Index is of the object dtype.

2586

2587 Returns

2588 -------

2589 bool

2590 Whether or not the Index is of the object dtype.

2591

2592 See Also

2593 --------

2594 is_boolean : Check if the Index only consists of booleans.

2595 is_integer : Check if the Index only consists of integers.

2596 is_floating : Check if the Index is a floating type.

2597 is_numeric : Check if the Index only consists of numeric data.

2598 is_categorical : Check if the Index holds categorical data.

2599 is_interval : Check if the Index holds Interval objects.

2600 is_mixed : Check if the Index holds data with mixed data types.

2601

2602 Examples

2603 --------

2604 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])

2605 >>> idx.is_object()

2606 True

2607

2608 >>> idx = pd.Index(["Apple", "Mango", 2.0])

2609 >>> idx.is_object()

2610 True

2611

2612 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2613 ... "Watermelon"]).astype("category")

2614 >>> idx.is_object()

2615 False

2616

2617 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2618 >>> idx.is_object()

2619 False

2620 """

2621 return is_object_dtype(self.dtype)

2622

2623 @final

2624 def is_categorical(self) -> bool:

2625 """

2626 Check if the Index holds categorical data.

2627

2628 Returns

2629 -------

2630 bool

2631 True if the Index is categorical.

2632

2633 See Also

2634 --------

2635 CategoricalIndex : Index for categorical data.

2636 is_boolean : Check if the Index only consists of booleans.

2637 is_integer : Check if the Index only consists of integers.

2638 is_floating : Check if the Index is a floating type.

2639 is_numeric : Check if the Index only consists of numeric data.

2640 is_object : Check if the Index is of the object dtype.

2641 is_interval : Check if the Index holds Interval objects.

2642 is_mixed : Check if the Index holds data with mixed data types.

2643

2644 Examples

2645 --------

2646 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2647 ... "Watermelon"]).astype("category")

2648 >>> idx.is_categorical()

2649 True

2650

2651 >>> idx = pd.Index([1, 3, 5, 7])

2652 >>> idx.is_categorical()

2653 False

2654

2655 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])

2656 >>> s

2657 0 Peter

2658 1 Victor

2659 2 Elisabeth

2660 3 Mar

2661 dtype: object

2662 >>> s.index.is_categorical()

2663 False

2664 """

2665 return self.inferred_type in ["categorical"]

2666

2667 @final

2668 def is_interval(self) -> bool:

2669 """

2670 Check if the Index holds Interval objects.

2671

2672 Returns

2673 -------

2674 bool

2675 Whether or not the Index holds Interval objects.

2676

2677 See Also

2678 --------

2679 IntervalIndex : Index for Interval objects.

2680 is_boolean : Check if the Index only consists of booleans.

2681 is_integer : Check if the Index only consists of integers.

2682 is_floating : Check if the Index is a floating type.

2683 is_numeric : Check if the Index only consists of numeric data.

2684 is_object : Check if the Index is of the object dtype.

2685 is_categorical : Check if the Index holds categorical data.

2686 is_mixed : Check if the Index holds data with mixed data types.

2687

2688 Examples

2689 --------

2690 >>> idx = pd.Index([pd.Interval(left=0, right=5),

2691 ... pd.Interval(left=5, right=10)])

2692 >>> idx.is_interval()

2693 True

2694

2695 >>> idx = pd.Index([1, 3, 5, 7])

2696 >>> idx.is_interval()

2697 False

2698 """

2699 return self.inferred_type in ["interval"]

2700

2701 @final

2702 def is_mixed(self) -> bool:

2703 """

2704 Check if the Index holds data with mixed data types.

2705

2706 Returns

2707 -------

2708 bool

2709 Whether or not the Index holds data with mixed data types.

2710

2711 See Also

2712 --------

2713 is_boolean : Check if the Index only consists of booleans.

2714 is_integer : Check if the Index only consists of integers.

2715 is_floating : Check if the Index is a floating type.

2716 is_numeric : Check if the Index only consists of numeric data.

2717 is_object : Check if the Index is of the object dtype.

2718 is_categorical : Check if the Index holds categorical data.

2719 is_interval : Check if the Index holds Interval objects.

2720

2721 Examples

2722 --------

2723 >>> idx = pd.Index(['a', np.nan, 'b'])

2724 >>> idx.is_mixed()

2725 True

2726

2727 >>> idx = pd.Index([1.0, 2.0, 3.0, 5.0])

2728 >>> idx.is_mixed()

2729 False

2730 """

2731 warnings.warn(

2732 "Index.is_mixed is deprecated and will be removed in a future version. "

2733 "Check index.inferred_type directly instead.",

2734 FutureWarning,

2735 stacklevel=find_stack_level(),

2736 )

2737 return self.inferred_type in ["mixed"]

2738

2739 @final

2740 def holds_integer(self) -> bool:

2741 """

2742 Whether the type is an integer type.

2743 """

2744 return self.inferred_type in ["integer", "mixed-integer"]

2745

2746 @cache_readonly

2747 def inferred_type(self) -> str_t:

2748 """

2749 Return a string of the type inferred from the values.

2750 """

2751 return lib.infer_dtype(self._values, skipna=False)

2752

2753 @cache_readonly

2754 @final

2755 def _is_all_dates(self) -> bool:

2756 """

2757 Whether or not the index values only consist of dates.

2758 """

2759 if needs_i8_conversion(self.dtype):

2760 return True

2761 elif self.dtype != _dtype_obj:

2762 # TODO(ExtensionIndex): 3rd party EA might override?

2763 # Note: this includes IntervalIndex, even when the left/right

2764 # contain datetime-like objects.

2765 return False

2766 elif self._is_multi:

2767 return False

2768 return is_datetime_array(ensure_object(self._values))

2769

2770 @cache_readonly

2771 @final

2772 def is_all_dates(self) -> bool:

2773 """

2774 Whether or not the index values only consist of dates.

2775 """

2776 warnings.warn(

2777 "Index.is_all_dates is deprecated, will be removed in a future version. "

2778 "check index.inferred_type instead.",

2779 FutureWarning,

2780 stacklevel=find_stack_level(),

2781 )

2782 return self._is_all_dates

2783

2784 @final

2785 @cache_readonly

2786 def _is_multi(self) -> bool:

2787 """

2788 Cached check equivalent to isinstance(self, MultiIndex)

2789 """

2790 return isinstance(self, ABCMultiIndex)

2791

2792 # --------------------------------------------------------------------

2793 # Pickle Methods

2794

2795 def __reduce__(self):

2796 d = {"data": self._data, "name": self.name}

2797 return _new_Index, (type(self), d), None

2798

2799 # --------------------------------------------------------------------

2800 # Null Handling Methods

2801

2802 @cache_readonly

2803 def _na_value(self):

2804 """The expected NA value to use with this index."""

2805 dtype = self.dtype

2806 if isinstance(dtype, np.dtype):

2807 if dtype.kind in ["m", "M"]:

2808 return NaT

2809 return np.nan

2810 return dtype.na_value

2811

2812 @cache_readonly

2813 def _isnan(self) -> npt.NDArray[np.bool_]:

2814 """

2815 Return if each value is NaN.

2816 """

2817 if self._can_hold_na:

2818 return isna(self)

2819 else:

2820 # shouldn't reach to this condition by checking hasnans beforehand

2821 values = np.empty(len(self), dtype=np.bool_)

2822 values.fill(False)

2823 return values

2824

2825 @cache_readonly

2826 def hasnans(self) -> bool:

2827 """

2828 Return True if there are any NaNs.

2829

2830 Enables various performance speedups.

2831 """

2832 if self._can_hold_na:

2833 return bool(self._isnan.any())

2834 else:

2835 return False

2836

2837 @final

2838 def isna(self) -> npt.NDArray[np.bool_]:

2839 """

2840 Detect missing values.

2841

2842 Return a boolean same-sized object indicating if the values are NA.

2843 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get

2844 mapped to ``True`` values.

2845 Everything else get mapped to ``False`` values. Characters such as

2846 empty strings `''` or :attr:`numpy.inf` are not considered NA values

2847 (unless you set ``pandas.options.mode.use_inf_as_na = True``).

2848

2849 Returns

2850 -------

2851 numpy.ndarray[bool]

2852 A boolean array of whether my values are NA.

2853

2854 See Also

2855 --------

2856 Index.notna : Boolean inverse of isna.

2857 Index.dropna : Omit entries with missing values.

2858 isna : Top-level isna.

2859 Series.isna : Detect missing values in Series object.

2860

2861 Examples

2862 --------

2863 Show which entries in a pandas.Index are NA. The result is an

2864 array.

2865

2866 >>> idx = pd.Index([5.2, 6.0, np.NaN])

2867 >>> idx

2868 Float64Index([5.2, 6.0, nan], dtype='float64')

2869 >>> idx.isna()

2870 array([False, False, True])

2871

2872 Empty strings are not considered NA values. None is considered an NA

2873 value.

2874

2875 >>> idx = pd.Index(['black', '', 'red', None])

2876 >>> idx

2877 Index(['black', '', 'red', None], dtype='object')

2878 >>> idx.isna()

2879 array([False, False, False, True])

2880

2881 For datetimes, `NaT` (Not a Time) is considered as an NA value.

2882

2883 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),

2884 ... pd.Timestamp(''), None, pd.NaT])

2885 >>> idx

2886 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],

2887 dtype='datetime64[ns]', freq=None)

2888 >>> idx.isna()

2889 array([False, True, True, True])

2890 """

2891 return self._isnan

2892

2893 isnull = isna

2894

2895 @final

2896 def notna(self) -> npt.NDArray[np.bool_]:

2897 """

2898 Detect existing (non-missing) values.

2899

2900 Return a boolean same-sized object indicating if the values are not NA.

2901 Non-missing values get mapped to ``True``. Characters such as empty

2902 strings ``''`` or :attr:`numpy.inf` are not considered NA values

2903 (unless you set ``pandas.options.mode.use_inf_as_na = True``).

2904 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``

2905 values.

2906

2907 Returns

2908 -------

2909 numpy.ndarray[bool]

2910 Boolean array to indicate which entries are not NA.

2911

2912 See Also

2913 --------

2914 Index.notnull : Alias of notna.

2915 Index.isna: Inverse of notna.

2916 notna : Top-level notna.

2917

2918 Examples

2919 --------

2920 Show which entries in an Index are not NA. The result is an

2921 array.

2922

2923 >>> idx = pd.Index([5.2, 6.0, np.NaN])

2924 >>> idx

2925 Float64Index([5.2, 6.0, nan], dtype='float64')

2926 >>> idx.notna()

2927 array([ True, True, False])

2928

2929 Empty strings are not considered NA values. None is considered a NA

2930 value.

2931

2932 >>> idx = pd.Index(['black', '', 'red', None])

2933 >>> idx

2934 Index(['black', '', 'red', None], dtype='object')

2935 >>> idx.notna()

2936 array([ True, True, True, False])

2937 """

2938 return ~self.isna()

2939

2940 notnull = notna

2941

2942 def fillna(self, value=None, downcast=None):

2943 """

2944 Fill NA/NaN values with the specified value.

2945

2946 Parameters

2947 ----------

2948 value : scalar

2949 Scalar value to use to fill holes (e.g. 0).

2950 This value cannot be a list-likes.

2951 downcast : dict, default is None

2952 A dict of item->dtype of what to downcast if possible,

2953 or the string 'infer' which will try to downcast to an appropriate

2954 equal type (e.g. float64 to int64 if possible).

2955

2956 Returns

2957 -------

2958 Index

2959

2960 See Also

2961 --------

2962 DataFrame.fillna : Fill NaN values of a DataFrame.

2963 Series.fillna : Fill NaN Values of a Series.

2964 """

2965

2966 value = self._require_scalar(value)

2967 if self.hasnans:

2968 result = self.putmask(self._isnan, value)

2969 if downcast is None:

2970 # no need to care metadata other than name

2971 # because it can't have freq if it has NaTs

2972 return Index._with_infer(result, name=self.name)

2973 raise NotImplementedError(

2974 f"{type(self).__name__}.fillna does not support 'downcast' "

2975 "argument values other than 'None'."

2976 )

2977 return self._view()

2978

2979 def dropna(self: _IndexT, how: str_t = "any") -> _IndexT:

2980 """

2981 Return Index without NA/NaN values.

2982

2983 Parameters

2984 ----------

2985 how : {'any', 'all'}, default 'any'

2986 If the Index is a MultiIndex, drop the value when any or all levels

2987 are NaN.

2988

2989 Returns

2990 -------

2991 Index

2992 """

2993 if how not in ("any", "all"):

2994 raise ValueError(f"invalid how option: {how}")

2995

2996 if self.hasnans:

2997 res_values = self._values[~self._isnan]

2998 return type(self)._simple_new(res_values, name=self.name)

2999 return self._view()

3000

3001 # --------------------------------------------------------------------

3002 # Uniqueness Methods

3003

3004 def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT:

3005 """

3006 Return unique values in the index.

3007

3008 Unique values are returned in order of appearance, this does NOT sort.

3009

3010 Parameters

3011 ----------

3012 level : int or hashable, optional

3013 Only return values from specified level (for MultiIndex).

3014 If int, gets the level by integer position, else by level name.

3015

3016 Returns

3017 -------

3018 Index

3019

3020 See Also

3021 --------

3022 unique : Numpy array of unique values in that column.

3023 Series.unique : Return unique values of Series object.

3024 """

3025 if level is not None:

3026 self._validate_index_level(level)

3027

3028 if self.is_unique:

3029 return self._view()

3030

3031 result = super().unique()

3032 return self._shallow_copy(result)

3033

3034 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])

3035 def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT:

3036 """

3037 Return Index with duplicate values removed.

3038

3039 Parameters

3040 ----------

3041 keep : {'first', 'last', ``False``}, default 'first'

3042 - 'first' : Drop duplicates except for the first occurrence.

3043 - 'last' : Drop duplicates except for the last occurrence.

3044 - ``False`` : Drop all duplicates.

3045

3046 Returns

3047 -------

3048 deduplicated : Index

3049

3050 See Also

3051 --------

3052 Series.drop_duplicates : Equivalent method on Series.

3053 DataFrame.drop_duplicates : Equivalent method on DataFrame.

3054 Index.duplicated : Related method on Index, indicating duplicate

3055 Index values.

3056

3057 Examples

3058 --------

3059 Generate an pandas.Index with duplicate values.

3060

3061 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])

3062

3063 The `keep` parameter controls which duplicate values are removed.

3064 The value 'first' keeps the first occurrence for each

3065 set of duplicated entries. The default value of keep is 'first'.

3066

3067 >>> idx.drop_duplicates(keep='first')

3068 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')

3069

3070 The value 'last' keeps the last occurrence for each set of duplicated

3071 entries.

3072

3073 >>> idx.drop_duplicates(keep='last')

3074 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')

3075

3076 The value ``False`` discards all sets of duplicated entries.

3077

3078 >>> idx.drop_duplicates(keep=False)

3079 Index(['cow', 'beetle', 'hippo'], dtype='object')

3080 """

3081 if self.is_unique:

3082 return self._view()

3083

3084 return super().drop_duplicates(keep=keep)

3085

3086 def duplicated(

3087 self, keep: Literal["first", "last", False] = "first"

3088 ) -> npt.NDArray[np.bool_]:

3089 """

3090 Indicate duplicate index values.

3091

3092 Duplicated values are indicated as ``True`` values in the resulting

3093 array. Either all duplicates, all except the first, or all except the

3094 last occurrence of duplicates can be indicated.

3095

3096 Parameters

3097 ----------

3098 keep : {'first', 'last', False}, default 'first'

3099 The value or values in a set of duplicates to mark as missing.

3100

3101 - 'first' : Mark duplicates as ``True`` except for the first

3102 occurrence.

3103 - 'last' : Mark duplicates as ``True`` except for the last

3104 occurrence.

3105 - ``False`` : Mark all duplicates as ``True``.

3106

3107 Returns

3108 -------

3109 np.ndarray[bool]

3110

3111 See Also

3112 --------

3113 Series.duplicated : Equivalent method on pandas.Series.

3114 DataFrame.duplicated : Equivalent method on pandas.DataFrame.

3115 Index.drop_duplicates : Remove duplicate values from Index.

3116

3117 Examples

3118 --------

3119 By default, for each set of duplicated values, the first occurrence is

3120 set to False and all others to True:

3121

3122 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])

3123 >>> idx.duplicated()

3124 array([False, False, True, False, True])

3125

3126 which is equivalent to

3127

3128 >>> idx.duplicated(keep='first')

3129 array([False, False, True, False, True])

3130

3131 By using 'last', the last occurrence of each set of duplicated values

3132 is set on False and all others on True:

3133

3134 >>> idx.duplicated(keep='last')

3135 array([ True, False, True, False, False])

3136

3137 By setting keep on ``False``, all duplicates are True:

3138

3139 >>> idx.duplicated(keep=False)

3140 array([ True, False, True, False, True])

3141 """

3142 if self.is_unique:

3143 # fastpath available bc we are immutable

3144 return np.zeros(len(self), dtype=bool)

3145 return self._duplicated(keep=keep)

3146

3147 # --------------------------------------------------------------------

3148 # Arithmetic & Logical Methods

3149

3150 def __iadd__(self, other):

3151 # alias for __add__

3152 return self + other

3153

3154 @final

3155 def __and__(self, other):

3156 warnings.warn(

3157 "Index.__and__ operating as a set operation is deprecated, "

3158 "in the future this will be a logical operation matching "

3159 "Series.__and__. Use index.intersection(other) instead.",

3160 FutureWarning,

3161 stacklevel=find_stack_level(),

3162 )

3163 return self.intersection(other)

3164

3165 @final

3166 def __or__(self, other):

3167 warnings.warn(

3168 "Index.__or__ operating as a set operation is deprecated, "

3169 "in the future this will be a logical operation matching "

3170 "Series.__or__. Use index.union(other) instead.",

3171 FutureWarning,

3172 stacklevel=find_stack_level(),

3173 )

3174 return self.union(other)

3175

3176 @final

3177 def __xor__(self, other):

3178 warnings.warn(

3179 "Index.__xor__ operating as a set operation is deprecated, "

3180 "in the future this will be a logical operation matching "

3181 "Series.__xor__. Use index.symmetric_difference(other) instead.",

3182 FutureWarning,

3183 stacklevel=find_stack_level(),

3184 )

3185 return self.symmetric_difference(other)

3186

3187 @final

3188 def __nonzero__(self) -> NoReturn:

3189 raise ValueError(

3190 f"The truth value of a {type(self).__name__} is ambiguous. "

3191 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."

3192 )

3193

3194 __bool__ = __nonzero__

3195

3196 # --------------------------------------------------------------------

3197 # Set Operation Methods

3198

3199 def _get_reconciled_name_object(self, other):

3200 """

3201 If the result of a set operation will be self,

3202 return self, unless the name changes, in which

3203 case make a shallow copy of self.

3204 """

3205 name = get_op_result_name(self, other)

3206 if self.name is not name:

3207 return self.rename(name)

3208 return self

3209

3210 @final

3211 def _validate_sort_keyword(self, sort):

3212 if sort not in [None, False]:

3213 raise ValueError(

3214 "The 'sort' keyword only takes the values of "

3215 f"None or False; {sort} was passed."

3216 )

3217

3218 @final

3219 def _deprecate_dti_setop(self, other: Index, setop: str_t):

3220 """

3221 Deprecate setop behavior between timezone-aware DatetimeIndexes with

3222 mismatched timezones.

3223 """

3224 # Caller is responsibelf or checking

3225 # `not is_dtype_equal(self.dtype, other.dtype)`

3226 if (

3227 isinstance(self, ABCDatetimeIndex)

3228 and isinstance(other, ABCDatetimeIndex)

3229 and self.tz is not None

3230 and other.tz is not None

3231 ):

3232 # GH#39328, GH#45357

3233 warnings.warn(

3234 f"In a future version, the {setop} of DatetimeIndex objects "

3235 "with mismatched timezones will cast both to UTC instead of "

3236 "object dtype. To retain the old behavior, "

3237 f"use `index.astype(object).{setop}(other)`",

3238 FutureWarning,

3239 stacklevel=find_stack_level(),

3240 )

3241

3242 @final

3243 def union(self, other, sort=None):

3244 """

3245 Form the union of two Index objects.

3246

3247 If the Index objects are incompatible, both Index objects will be

3248 cast to dtype('object') first.

3249

3250 .. versionchanged:: 0.25.0

3251

3252 Parameters

3253 ----------

3254 other : Index or array-like

3255 sort : bool or None, default None

3256 Whether to sort the resulting Index.

3257

3258 * None : Sort the result, except when

3259

3260 1. `self` and `other` are equal.

3261 2. `self` or `other` has length 0.

3262 3. Some values in `self` or `other` cannot be compared.

3263 A RuntimeWarning is issued in this case.

3264

3265 * False : do not sort the result.

3266

3267 Returns

3268 -------

3269 union : Index

3270

3271 Examples

3272 --------

3273 Union matching dtypes

3274

3275 >>> idx1 = pd.Index([1, 2, 3, 4])

3276 >>> idx2 = pd.Index([3, 4, 5, 6])

3277 >>> idx1.union(idx2)

3278 Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')

3279

3280 Union mismatched dtypes

3281

3282 >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])

3283 >>> idx2 = pd.Index([1, 2, 3, 4])

3284 >>> idx1.union(idx2)

3285 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')

3286

3287 MultiIndex case

3288

3289 >>> idx1 = pd.MultiIndex.from_arrays(

3290 ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]

3291 ... )

3292 >>> idx1

3293 MultiIndex([(1, 'Red'),

3294 (1, 'Blue'),

3295 (2, 'Red'),

3296 (2, 'Blue')],

3297 )

3298 >>> idx2 = pd.MultiIndex.from_arrays(

3299 ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]

3300 ... )

3301 >>> idx2

3302 MultiIndex([(3, 'Red'),

3303 (3, 'Green'),

3304 (2, 'Red'),

3305 (2, 'Green')],

3306 )

3307 >>> idx1.union(idx2)

3308 MultiIndex([(1, 'Blue'),

3309 (1, 'Red'),

3310 (2, 'Blue'),

3311 (2, 'Green'),

3312 (2, 'Red'),

3313 (3, 'Green'),

3314 (3, 'Red')],

3315 )

3316 >>> idx1.union(idx2, sort=False)

3317 MultiIndex([(1, 'Red'),

3318 (1, 'Blue'),

3319 (2, 'Red'),

3320 (2, 'Blue'),

3321 (3, 'Red'),

3322 (3, 'Green'),

3323 (2, 'Green')],

3324 )

3325 """

3326 self._validate_sort_keyword(sort)

3327 self._assert_can_do_setop(other)

3328 other, result_name = self._convert_can_do_setop(other)

3329

3330 if not is_dtype_equal(self.dtype, other.dtype):

3331 if (

3332 isinstance(self, ABCMultiIndex)

3333 and not is_object_dtype(unpack_nested_dtype(other))

3334 and len(other) > 0

3335 ):

3336 raise NotImplementedError(

3337 "Can only union MultiIndex with MultiIndex or Index of tuples, "

3338 "try mi.to_flat_index().union(other) instead."

3339 )

3340 self._deprecate_dti_setop(other, "union")

3341

3342 dtype = self._find_common_type_compat(other)

3343 left = self.astype(dtype, copy=False)

3344 right = other.astype(dtype, copy=False)

3345 return left.union(right, sort=sort)

3346

3347 elif not len(other) or self.equals(other):

3348 # NB: whether this (and the `if not len(self)` check below) come before

3349 # or after the is_dtype_equal check above affects the returned dtype

3350 return self._get_reconciled_name_object(other)

3351

3352 elif not len(self):

3353 return other._get_reconciled_name_object(self)

3354

3355 result = self._union(other, sort=sort)

3356

3357 return self._wrap_setop_result(other, result)

3358

3359 def _union(self, other: Index, sort):

3360 """

3361 Specific union logic should go here. In subclasses, union behavior

3362 should be overwritten here rather than in `self.union`.

3363

3364 Parameters

3365 ----------

3366 other : Index or array-like

3367 sort : False or None, default False

3368 Whether to sort the resulting index.

3369

3370 * False : do not sort the result.

3371 * None : sort the result, except when `self` and `other` are equal

3372 or when the values cannot be compared.

3373

3374 Returns

3375 -------

3376 Index

3377 """

3378 lvals = self._values

3379 rvals = other._values

3380

3381 if (

3382 sort is None

3383 and self.is_monotonic_increasing

3384 and other.is_monotonic_increasing

3385 and not (self.has_duplicates and other.has_duplicates)

3386 and self._can_use_libjoin

3387 ):

3388 # Both are monotonic and at least one is unique, so can use outer join

3389 # (actually don't need either unique, but without this restriction

3390 # test_union_same_value_duplicated_in_both fails)

3391 try:

3392 return self._outer_indexer(other)[0]

3393 except (TypeError, IncompatibleFrequency):

3394 # incomparable objects; should only be for object dtype

3395 value_list = list(lvals)

3396

3397 # worth making this faster? a very unusual case

3398 value_set = set(lvals)

3399 value_list.extend([x for x in rvals if x not in value_set])

3400 # If objects are unorderable, we must have object dtype.

3401 return np.array(value_list, dtype=object)

3402

3403 elif not other.is_unique:

3404 # other has duplicates

3405 result = algos.union_with_duplicates(lvals, rvals)

3406 return _maybe_try_sort(result, sort)

3407

3408 # Self may have duplicates; other already checked as unique

3409 # find indexes of things in "other" that are not in "self"

3410 if self._index_as_unique:

3411 indexer = self.get_indexer(other)

3412 missing = (indexer == -1).nonzero()[0]

3413 else:

3414 missing = algos.unique1d(self.get_indexer_non_unique(other)[1])

3415

3416 if len(missing) > 0:

3417 other_diff = rvals.take(missing)

3418 result = concat_compat((lvals, other_diff))

3419 else:

3420 result = lvals

3421

3422 if not self.is_monotonic_increasing or not other.is_monotonic_increasing:

3423 # if both are monotonic then result should already be sorted

3424 result = _maybe_try_sort(result, sort)

3425

3426 return result

3427

3428 @final

3429 def _wrap_setop_result(self, other: Index, result) -> Index:

3430 name = get_op_result_name(self, other)

3431 if isinstance(result, Index):

3432 if result.name != name:

3433 result = result.rename(name)

3434 else:

3435 result = self._shallow_copy(result, name=name)

3436 return result

3437

3438 @final

3439 def intersection(self, other, sort=False):

3440 """

3441 Form the intersection of two Index objects.

3442

3443 This returns a new Index with elements common to the index and `other`.

3444

3445 Parameters

3446 ----------

3447 other : Index or array-like

3448 sort : False or None, default False

3449 Whether to sort the resulting index.

3450

3451 * False : do not sort the result.

3452 * None : sort the result, except when `self` and `other` are equal

3453 or when the values cannot be compared.

3454

3455 Returns

3456 -------

3457 intersection : Index

3458

3459 Examples

3460 --------

3461 >>> idx1 = pd.Index([1, 2, 3, 4])

3462 >>> idx2 = pd.Index([3, 4, 5, 6])

3463 >>> idx1.intersection(idx2)

3464 Int64Index([3, 4], dtype='int64')

3465 """

3466 self._validate_sort_keyword(sort)

3467 self._assert_can_do_setop(other)

3468 other, result_name = self._convert_can_do_setop(other)

3469

3470 if not is_dtype_equal(self.dtype, other.dtype):

3471 self._deprecate_dti_setop(other, "intersection")

3472

3473 if self.equals(other):

3474 if self.has_duplicates:

3475 return self.unique()._get_reconciled_name_object(other)

3476 return self._get_reconciled_name_object(other)

3477

3478 if len(self) == 0 or len(other) == 0:

3479 # fastpath; we need to be careful about having commutativity

3480

3481 if self._is_multi or other._is_multi:

3482 # _convert_can_do_setop ensures that we have both or neither

3483 # We retain self.levels

3484 return self[:0].rename(result_name)

3485

3486 dtype = self._find_common_type_compat(other)

3487 if is_dtype_equal(self.dtype, dtype):

3488 # Slicing allows us to retain DTI/TDI.freq, RangeIndex

3489

3490 # Note: self[:0] vs other[:0] affects

3491 # 1) which index's `freq` we get in DTI/TDI cases

3492 # This may be a historical artifact, i.e. no documented

3493 # reason for this choice.

3494 # 2) The `step` we get in RangeIndex cases

3495 if len(self) == 0:

3496 return self[:0].rename(result_name)

3497 else:

3498 return other[:0].rename(result_name)

3499

3500 return Index([], dtype=dtype, name=result_name)

3501

3502 elif not self._should_compare(other):

3503 # We can infer that the intersection is empty.

3504 if isinstance(self, ABCMultiIndex):

3505 return self[:0].rename(result_name)

3506 return Index([], name=result_name)

3507

3508 elif not is_dtype_equal(self.dtype, other.dtype):

3509 dtype = self._find_common_type_compat(other)

3510 this = self.astype(dtype, copy=False)

3511 other = other.astype(dtype, copy=False)

3512 return this.intersection(other, sort=sort)

3513

3514 result = self._intersection(other, sort=sort)

3515 return self._wrap_intersection_result(other, result)

3516

3517 def _intersection(self, other: Index, sort=False):

3518 """

3519 intersection specialized to the case with matching dtypes.

3520 """

3521 if (

3522 self.is_monotonic_increasing

3523 and other.is_monotonic_increasing

3524 and self._can_use_libjoin

3525 ):

3526 try:

3527 result = self._inner_indexer(other)[0]

3528 except TypeError:

3529 # non-comparable; should only be for object dtype

3530 pass

3531 else:

3532 # TODO: algos.unique1d should preserve DTA/TDA

3533 res = algos.unique1d(result)

3534 return ensure_wrapped_if_datetimelike(res)

3535

3536 res_values = self._intersection_via_get_indexer(other, sort=sort)

3537 res_values = _maybe_try_sort(res_values, sort)

3538 return res_values

3539

3540 def _wrap_intersection_result(self, other, result):

3541 # We will override for MultiIndex to handle empty results

3542 return self._wrap_setop_result(other, result)

3543

3544 @final

3545 def _intersection_via_get_indexer(self, other: Index, sort) -> ArrayLike:

3546 """

3547 Find the intersection of two Indexes using get_indexer.

3548

3549 Returns

3550 -------

3551 np.ndarray or ExtensionArray

3552 The returned array will be unique.

3553 """

3554 left_unique = self.unique()

3555 right_unique = other.unique()

3556

3557 # even though we are unique, we need get_indexer_for for IntervalIndex

3558 indexer = left_unique.get_indexer_for(right_unique)

3559

3560 mask = indexer != -1

3561

3562 taker = indexer.take(mask.nonzero()[0])

3563 if sort is False:

3564 # sort bc we want the elements in the same order they are in self

3565 # unnecessary in the case with sort=None bc we will sort later

3566 taker = np.sort(taker)

3567

3568 result = left_unique.take(taker)._values

3569 return result

3570

3571 @final

3572 def difference(self, other, sort=None):

3573 """

3574 Return a new Index with elements of index not in `other`.

3575

3576 This is the set difference of two Index objects.

3577

3578 Parameters

3579 ----------

3580 other : Index or array-like

3581 sort : False or None, default None

3582 Whether to sort the resulting index. By default, the

3583 values are attempted to be sorted, but any TypeError from

3584 incomparable elements is caught by pandas.

3585

3586 * None : Attempt to sort the result, but catch any TypeErrors

3587 from comparing incomparable elements.

3588 * False : Do not sort the result.

3589

3590 Returns

3591 -------

3592 difference : Index

3593

3594 Examples

3595 --------

3596 >>> idx1 = pd.Index([2, 1, 3, 4])

3597 >>> idx2 = pd.Index([3, 4, 5, 6])

3598 >>> idx1.difference(idx2)

3599 Int64Index([1, 2], dtype='int64')

3600 >>> idx1.difference(idx2, sort=False)

3601 Int64Index([2, 1], dtype='int64')

3602 """

3603 self._validate_sort_keyword(sort)

3604 self._assert_can_do_setop(other)

3605 other, result_name = self._convert_can_do_setop(other)

3606

3607 # Note: we do NOT call _deprecate_dti_setop here, as there

3608 # is no requirement that .difference be commutative, so it does

3609 # not cast to object.

3610

3611 if self.equals(other):

3612 # Note: we do not (yet) sort even if sort=None GH#24959

3613 return self[:0].rename(result_name)

3614

3615 if len(other) == 0:

3616 # Note: we do not (yet) sort even if sort=None GH#24959

3617 return self.rename(result_name)

3618

3619 if not self._should_compare(other):

3620 # Nothing matches -> difference is everything

3621 return self.rename(result_name)

3622

3623 result = self._difference(other, sort=sort)

3624 return self._wrap_difference_result(other, result)

3625

3626 def _difference(self, other, sort):

3627 # overridden by RangeIndex

3628

3629 this = self.unique()

3630

3631 indexer = this.get_indexer_for(other)

3632 indexer = indexer.take((indexer != -1).nonzero()[0])

3633

3634 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)

3635 the_diff = this._values.take(label_diff)

3636 the_diff = _maybe_try_sort(the_diff, sort)

3637

3638 return the_diff

3639

3640 def _wrap_difference_result(self, other, result):

3641 # We will override for MultiIndex to handle empty results

3642 return self._wrap_setop_result(other, result)

3643

3644 def symmetric_difference(self, other, result_name=None, sort=None):

3645 """

3646 Compute the symmetric difference of two Index objects.

3647

3648 Parameters

3649 ----------

3650 other : Index or array-like

3651 result_name : str

3652 sort : False or None, default None

3653 Whether to sort the resulting index. By default, the

3654 values are attempted to be sorted, but any TypeError from

3655 incomparable elements is caught by pandas.

3656

3657 * None : Attempt to sort the result, but catch any TypeErrors

3658 from comparing incomparable elements.

3659 * False : Do not sort the result.

3660

3661 Returns

3662 -------

3663 symmetric_difference : Index

3664

3665 Notes

3666 -----

3667 ``symmetric_difference`` contains elements that appear in either

3668 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by

3669 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates

3670 dropped.

3671

3672 Examples

3673 --------

3674 >>> idx1 = pd.Index([1, 2, 3, 4])

3675 >>> idx2 = pd.Index([2, 3, 4, 5])

3676 >>> idx1.symmetric_difference(idx2)

3677 Int64Index([1, 5], dtype='int64')

3678 """

3679 self._validate_sort_keyword(sort)

3680 self._assert_can_do_setop(other)

3681 other, result_name_update = self._convert_can_do_setop(other)

3682 if result_name is None:

3683 result_name = result_name_update

3684

3685 if not is_dtype_equal(self.dtype, other.dtype):

3686 self._deprecate_dti_setop(other, "symmetric_difference")

3687

3688 if not self._should_compare(other):

3689 return self.union(other, sort=sort).rename(result_name)

3690

3691 elif not is_dtype_equal(self.dtype, other.dtype):

3692 dtype = self._find_common_type_compat(other)

3693 this = self.astype(dtype, copy=False)

3694 that = other.astype(dtype, copy=False)

3695 return this.symmetric_difference(that, sort=sort).rename(result_name)

3696

3697 this = self.unique()

3698 other = other.unique()

3699 indexer = this.get_indexer_for(other)

3700

3701 # {this} minus {other}

3702 common_indexer = indexer.take((indexer != -1).nonzero()[0])

3703 left_indexer = np.setdiff1d(

3704 np.arange(this.size), common_indexer, assume_unique=True

3705 )

3706 left_diff = this._values.take(left_indexer)

3707

3708 # {other} minus {this}

3709 right_indexer = (indexer == -1).nonzero()[0]

3710 right_diff = other._values.take(right_indexer)

3711

3712 res_values = concat_compat([left_diff, right_diff])

3713 res_values = _maybe_try_sort(res_values, sort)

3714

3715 # pass dtype so we retain object dtype

3716 result = Index(res_values, name=result_name, dtype=res_values.dtype)

3717

3718 if self._is_multi:

3719 self = cast("MultiIndex", self)

3720 if len(result) == 0:

3721 # On equal symmetric_difference MultiIndexes the difference is empty.

3722 # Therefore, an empty MultiIndex is returned GH#13490

3723 return type(self)(

3724 levels=[[] for _ in range(self.nlevels)],

3725 codes=[[] for _ in range(self.nlevels)],

3726 names=result.name,

3727 )

3728 return type(self).from_tuples(result, names=result.name)

3729

3730 return result

3731

3732 @final

3733 def _assert_can_do_setop(self, other) -> bool:

3734 if not is_list_like(other):

3735 raise TypeError("Input must be Index or array-like")

3736 return True

3737

3738 def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:

3739 if not isinstance(other, Index):

3740 # TODO(2.0): no need to special-case here once _with_infer

3741 # deprecation is enforced

3742 if hasattr(other, "dtype"):

3743 other = Index(other, name=self.name, dtype=other.dtype)

3744 else:

3745 # e.g. list

3746 other = Index(other, name=self.name)

3747 result_name = self.name

3748 else:

3749 result_name = get_op_result_name(self, other)

3750 return other, result_name

3751

3752 # --------------------------------------------------------------------

3753 # Indexing Methods

3754

3755 def get_loc(self, key, method=None, tolerance=None):

3756 """

3757 Get integer location, slice or boolean mask for requested label.

3758

3759 Parameters

3760 ----------

3761 key : label

3762 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional

3763 * default: exact matches only.

3764 * pad / ffill: find the PREVIOUS index value if no exact match.

3765 * backfill / bfill: use NEXT index value if no exact match

3766 * nearest: use the NEAREST index value if no exact match. Tied

3767 distances are broken by preferring the larger index value.

3768

3769 .. deprecated:: 1.4

3770 Use index.get_indexer([item], method=...) instead.

3771

3772 tolerance : int or float, optional

3773 Maximum distance from index value for inexact matches. The value of

3774 the index at the matching location must satisfy the equation

3775 ``abs(index[loc] - key) <= tolerance``.

3776

3777 Returns

3778 -------

3779 loc : int if unique index, slice if monotonic index, else mask

3780

3781 Examples

3782 --------

3783 >>> unique_index = pd.Index(list('abc'))

3784 >>> unique_index.get_loc('b')

3785 1

3786

3787 >>> monotonic_index = pd.Index(list('abbc'))

3788 >>> monotonic_index.get_loc('b')

3789 slice(1, 3, None)

3790

3791 >>> non_monotonic_index = pd.Index(list('abcb'))

3792 >>> non_monotonic_index.get_loc('b')

3793 array([False, True, False, True])

3794 """

3795 if method is None:

3796 if tolerance is not None:

3797 raise ValueError(

3798 "tolerance argument only valid if using pad, "

3799 "backfill or nearest lookups"

3800 )

3801 casted_key = self._maybe_cast_indexer(key)

3802 try:

3803 return self._engine.get_loc(casted_key)

3804 except KeyError as err:

3805 raise KeyError(key) from err

3806 except TypeError:

3807 # If we have a listlike key, _check_indexing_error will raise

3808 # InvalidIndexError. Otherwise we fall through and re-raise

3809 # the TypeError.

3810 self._check_indexing_error(key)

3811 raise

3812

3813 # GH#42269

3814 warnings.warn(

3815 f"Passing method to {type(self).__name__}.get_loc is deprecated "

3816 "and will raise in a future version. Use "

3817 "index.get_indexer([item], method=...) instead.",

3818 FutureWarning,

3819 stacklevel=find_stack_level(),

3820 )

3821

3822 if is_scalar(key) and isna(key) and not self.hasnans:

3823 raise KeyError(key)

3824

3825 if tolerance is not None:

3826 tolerance = self._convert_tolerance(tolerance, np.asarray(key))

3827

3828 indexer = self.get_indexer([key], method=method, tolerance=tolerance)

3829 if indexer.ndim > 1 or indexer.size > 1:

3830 raise TypeError("get_loc requires scalar valued input")

3831 loc = indexer.item()

3832 if loc == -1:

3833 raise KeyError(key)

3834 return loc

3835

3836 _index_shared_docs[

3837 "get_indexer"

3838 ] = """

3839 Compute indexer and mask for new index given the current index.

3840

3841 The indexer should be then used as an input to ndarray.take to align the

3842 current data to the new index.

3843

3844 Parameters

3845 ----------

3846 target : %(target_klass)s

3847 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional

3848 * default: exact matches only.

3849 * pad / ffill: find the PREVIOUS index value if no exact match.

3850 * backfill / bfill: use NEXT index value if no exact match

3851 * nearest: use the NEAREST index value if no exact match. Tied

3852 distances are broken by preferring the larger index value.

3853 limit : int, optional

3854 Maximum number of consecutive labels in ``target`` to match for

3855 inexact matches.

3856 tolerance : optional

3857 Maximum distance between original and new labels for inexact

3858 matches. The values of the index at the matching locations must

3859 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.

3860

3861 Tolerance may be a scalar value, which applies the same tolerance

3862 to all values, or list-like, which applies variable tolerance per

3863 element. List-like includes list, tuple, array, Series, and must be

3864 the same size as the index and its dtype must exactly match the

3865 index's type.

3866

3867 Returns

3868 -------

3869 indexer : np.ndarray[np.intp]

3870 Integers from 0 to n - 1 indicating that the index at these

3871 positions matches the corresponding target values. Missing values

3872 in the target are marked by -1.

3873 %(raises_section)s

3874 Notes

3875 -----

3876 Returns -1 for unmatched values, for further explanation see the

3877 example below.

3878

3879 Examples

3880 --------

3881 >>> index = pd.Index(['c', 'a', 'b'])

3882 >>> index.get_indexer(['a', 'b', 'x'])

3883 array([ 1, 2, -1])

3884

3885 Notice that the return value is an array of locations in ``index``

3886 and ``x`` is marked by -1, as it is not in ``index``.

3887 """

3888

3889 @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)

3890 @final

3891 def get_indexer(

3892 self,

3893 target,

3894 method: str_t | None = None,

3895 limit: int | None = None,

3896 tolerance=None,

3897 ) -> npt.NDArray[np.intp]:

3898 method = missing.clean_reindex_fill_method(method)

3899 orig_target = target

3900 target = self._maybe_cast_listlike_indexer(target)

3901

3902 self._check_indexing_method(method, limit, tolerance)

3903

3904 if not self._index_as_unique:

3905 raise InvalidIndexError(self._requires_unique_msg)

3906

3907 if len(target) == 0:

3908 return np.array([], dtype=np.intp)

3909

3910 if not self._should_compare(target) and not self._should_partial_index(target):

3911 # IntervalIndex get special treatment bc numeric scalars can be

3912 # matched to Interval scalars

3913 return self._get_indexer_non_comparable(target, method=method, unique=True)

3914

3915 if is_categorical_dtype(self.dtype):

3916 # _maybe_cast_listlike_indexer ensures target has our dtype

3917 # (could improve perf by doing _should_compare check earlier?)

3918 assert is_dtype_equal(self.dtype, target.dtype)

3919

3920 indexer = self._engine.get_indexer(target.codes)

3921 if self.hasnans and target.hasnans:

3922 # After _maybe_cast_listlike_indexer, target elements which do not

3923 # belong to some category are changed to NaNs

3924 # Mask to track actual NaN values compared to inserted NaN values

3925 # GH#45361

3926 target_nans = isna(orig_target)

3927 loc = self.get_loc(np.nan)

3928 mask = target.isna()

3929 indexer[target_nans] = loc

3930 indexer[mask & ~target_nans] = -1

3931 return indexer

3932

3933 if is_categorical_dtype(target.dtype):

3934 # potential fastpath

3935 # get an indexer for unique categories then propagate to codes via take_nd

3936 # get_indexer instead of _get_indexer needed for MultiIndex cases

3937 # e.g. test_append_different_columns_types

3938 categories_indexer = self.get_indexer(target.categories)

3939

3940 indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)

3941

3942 if (not self._is_multi and self.hasnans) and target.hasnans:

3943 # Exclude MultiIndex because hasnans raises NotImplementedError

3944 # we should only get here if we are unique, so loc is an integer

3945 # GH#41934

3946 loc = self.get_loc(np.nan)

3947 mask = target.isna()

3948 indexer[mask] = loc

3949

3950 return ensure_platform_int(indexer)

3951

3952 pself, ptarget = self._maybe_promote(target)

3953 if pself is not self or ptarget is not target:

3954 return pself.get_indexer(

3955 ptarget, method=method, limit=limit, tolerance=tolerance

3956 )

3957

3958 if is_dtype_equal(self.dtype, target.dtype) and self.equals(target):

3959 # Only call equals if we have same dtype to avoid inference/casting

3960 return np.arange(len(target), dtype=np.intp)

3961

3962 if not is_dtype_equal(self.dtype, target.dtype) and not is_interval_dtype(

3963 self.dtype

3964 ):

3965 # IntervalIndex gets special treatment for partial-indexing

3966 dtype = self._find_common_type_compat(target)

3967

3968 this = self.astype(dtype, copy=False)

3969 target = target.astype(dtype, copy=False)

3970 return this._get_indexer(

3971 target, method=method, limit=limit, tolerance=tolerance

3972 )

3973

3974 return self._get_indexer(target, method, limit, tolerance)

3975

3976 def _get_indexer(

3977 self,

3978 target: Index,

3979 method: str_t | None = None,

3980 limit: int | None = None,

3981 tolerance=None,

3982 ) -> npt.NDArray[np.intp]:

3983 if tolerance is not None:

3984 tolerance = self._convert_tolerance(tolerance, target)

3985

3986 if method in ["pad", "backfill"]:

3987 indexer = self._get_fill_indexer(target, method, limit, tolerance)

3988 elif method == "nearest":

3989 indexer = self._get_nearest_indexer(target, limit, tolerance)

3990 else:

3991 if target._is_multi and self._is_multi:

3992 engine = self._engine

3993 # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]"

3994 # has no attribute "_extract_level_codes"

3995 tgt_values = engine._extract_level_codes( # type: ignore[union-attr]

3996 target

3997 )

3998 else:

3999 tgt_values = target._get_engine_target()

4000

4001 indexer = self._engine.get_indexer(tgt_values)

4002

4003 return ensure_platform_int(indexer)

4004

4005 @final

4006 def _should_partial_index(self, target: Index) -> bool:

4007 """

4008 Should we attempt partial-matching indexing?

4009 """

4010 if is_interval_dtype(self.dtype):

4011 if is_interval_dtype(target.dtype):

4012 return False

4013 # See https://github.com/pandas-dev/pandas/issues/47772 the commented

4014 # out code can be restored (instead of hardcoding `return True`)

4015 # once that issue if fixed

4016 # "Index" has no attribute "left"

4017 # return self.left._should_compare(target) # type: ignore[attr-defined]

4018 return True

4019 return False

4020

4021 @final

4022 def _check_indexing_method(

4023 self,

4024 method: str_t | None,

4025 limit: int | None = None,

4026 tolerance=None,

4027 ) -> None:

4028 """

4029 Raise if we have a get_indexer `method` that is not supported or valid.

4030 """

4031 if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]:

4032 # in practice the clean_reindex_fill_method call would raise

4033 # before we get here

4034 raise ValueError("Invalid fill method") # pragma: no cover

4035

4036 if self._is_multi:

4037 if method == "nearest":

4038 raise NotImplementedError(

4039 "method='nearest' not implemented yet "

4040 "for MultiIndex; see GitHub issue 9365"

4041 )

4042 elif method == "pad" or method == "backfill":

4043 if tolerance is not None:

4044 raise NotImplementedError(

4045 "tolerance not implemented yet for MultiIndex"

4046 )

4047

4048 if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype):

4049 # GH#37871 for now this is only for IntervalIndex and CategoricalIndex

4050 if method is not None:

4051 raise NotImplementedError(

4052 f"method {method} not yet implemented for {type(self).__name__}"

4053 )

4054

4055 if method is None:

4056 if tolerance is not None:

4057 raise ValueError(

4058 "tolerance argument only valid if doing pad, "

4059 "backfill or nearest reindexing"

4060 )

4061 if limit is not None:

4062 raise ValueError(

4063 "limit argument only valid if doing pad, "

4064 "backfill or nearest reindexing"

4065 )

4066

4067 def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray:

4068 # override this method on subclasses

4069 tolerance = np.asarray(tolerance)

4070 if target.size != tolerance.size and tolerance.size > 1:

4071 raise ValueError("list-like tolerance size must match target index size")

4072 return tolerance

4073

4074 @final

4075 def _get_fill_indexer(

4076 self, target: Index, method: str_t, limit: int | None = None, tolerance=None

4077 ) -> npt.NDArray[np.intp]:

4078

4079 if self._is_multi:

4080 # TODO: get_indexer_with_fill docstring says values must be _sorted_

4081 # but that doesn't appear to be enforced

4082 # error: "IndexEngine" has no attribute "get_indexer_with_fill"

4083 engine = self._engine

4084 return engine.get_indexer_with_fill( # type: ignore[union-attr]

4085 target=target._values, values=self._values, method=method, limit=limit

4086 )

4087

4088 if self.is_monotonic_increasing and target.is_monotonic_increasing:

4089 target_values = target._get_engine_target()

4090 own_values = self._get_engine_target()

4091 if not isinstance(target_values, np.ndarray) or not isinstance(

4092 own_values, np.ndarray

4093 ):

4094 raise NotImplementedError

4095

4096 if method == "pad":

4097 indexer = libalgos.pad(own_values, target_values, limit=limit)

4098 else:

4099 # i.e. "backfill"

4100 indexer = libalgos.backfill(own_values, target_values, limit=limit)

4101 else:

4102 indexer = self._get_fill_indexer_searchsorted(target, method, limit)

4103 if tolerance is not None and len(self):

4104 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)

4105 return indexer

4106

4107 @final

4108 def _get_fill_indexer_searchsorted(

4109 self, target: Index, method: str_t, limit: int | None = None

4110 ) -> npt.NDArray[np.intp]:

4111 """

4112 Fallback pad/backfill get_indexer that works for monotonic decreasing

4113 indexes and non-monotonic targets.

4114 """

4115 if limit is not None:

4116 raise ValueError(

4117 f"limit argument for {repr(method)} method only well-defined "

4118 "if index and target are monotonic"

4119 )

4120

4121 side: Literal["left", "right"] = "left" if method == "pad" else "right"

4122

4123 # find exact matches first (this simplifies the algorithm)

4124 indexer = self.get_indexer(target)

4125 nonexact = indexer == -1

4126 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)

4127 if side == "left":

4128 # searchsorted returns "indices into a sorted array such that,

4129 # if the corresponding elements in v were inserted before the

4130 # indices, the order of a would be preserved".

4131 # Thus, we need to subtract 1 to find values to the left.

4132 indexer[nonexact] -= 1

4133 # This also mapped not found values (values of 0 from

4134 # np.searchsorted) to -1, which conveniently is also our

4135 # sentinel for missing values

4136 else:

4137 # Mark indices to the right of the largest value as not found

4138 indexer[indexer == len(self)] = -1

4139 return indexer

4140

4141 @final

4142 def _get_nearest_indexer(

4143 self, target: Index, limit: int | None, tolerance

4144 ) -> npt.NDArray[np.intp]:

4145 """

4146 Get the indexer for the nearest index labels; requires an index with

4147 values that can be subtracted from each other (e.g., not strings or

4148 tuples).

4149 """

4150 if not len(self):

4151 return self._get_fill_indexer(target, "pad")

4152

4153 left_indexer = self.get_indexer(target, "pad", limit=limit)

4154 right_indexer = self.get_indexer(target, "backfill", limit=limit)

4155

4156 left_distances = self._difference_compat(target, left_indexer)

4157 right_distances = self._difference_compat(target, right_indexer)

4158

4159 op = operator.lt if self.is_monotonic_increasing else operator.le

4160 indexer = np.where(

4161 # error: Argument 1&2 has incompatible type "Union[ExtensionArray,

4162 # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE,

4163 # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]"

4164 op(left_distances, right_distances) # type: ignore[arg-type]

4165 | (right_indexer == -1),

4166 left_indexer,

4167 right_indexer,

4168 )

4169 if tolerance is not None:

4170 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)

4171 return indexer

4172

4173 @final

4174 def _filter_indexer_tolerance(

4175 self,

4176 target: Index,

4177 indexer: npt.NDArray[np.intp],

4178 tolerance,

4179 ) -> npt.NDArray[np.intp]:

4180

4181 distance = self._difference_compat(target, indexer)

4182

4183 return np.where(distance <= tolerance, indexer, -1)

4184

4185 @final

4186 def _difference_compat(

4187 self, target: Index, indexer: npt.NDArray[np.intp]

4188 ) -> ArrayLike:

4189 # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object]

4190 # of DateOffset objects, which do not support __abs__ (and would be slow

4191 # if they did)

4192

4193 if isinstance(self.dtype, PeriodDtype):

4194 # Note: we only get here with matching dtypes

4195 own_values = cast("PeriodArray", self._data)._ndarray

4196 target_values = cast("PeriodArray", target._data)._ndarray

4197 diff = own_values[indexer] - target_values

4198 else:

4199 # error: Unsupported left operand type for - ("ExtensionArray")

4200 diff = self._values[indexer] - target._values # type: ignore[operator]

4201 return abs(diff)

4202

4203 # --------------------------------------------------------------------

4204 # Indexer Conversion Methods

4205

4206 @final

4207 def _validate_positional_slice(self, key: slice) -> None:

4208 """

4209 For positional indexing, a slice must have either int or None

4210 for each of start, stop, and step.

4211 """

4212 self._validate_indexer("positional", key.start, "iloc")

4213 self._validate_indexer("positional", key.stop, "iloc")

4214 self._validate_indexer("positional", key.step, "iloc")

4215

4216 def _convert_slice_indexer(self, key: slice, kind: str_t, is_frame: bool = False):

4217 """

4218 Convert a slice indexer.

4219

4220 By definition, these are labels unless 'iloc' is passed in.

4221 Floats are not allowed as the start, step, or stop of the slice.

4222

4223 Parameters

4224 ----------

4225 key : label of the slice bound

4226 kind : {'loc', 'getitem'}

4227 is_frame : bool, default False

4228 Whether this is a slice called on DataFrame.__getitem__

4229 as opposed to Series.__getitem__

4230 """

4231 assert kind in ["loc", "getitem"], kind

4232

4233 # potentially cast the bounds to integers

4234 start, stop, step = key.start, key.stop, key.step

4235

4236 # figure out if this is a positional indexer

4237 def is_int(v):

4238 return v is None or is_integer(v)

4239

4240 is_index_slice = is_int(start) and is_int(stop) and is_int(step)

4241

4242 # special case for interval_dtype bc we do not do partial-indexing

4243 # on integer Intervals when slicing

4244 # TODO: write this in terms of e.g. should_partial_index?

4245 ints_are_positional = self._should_fallback_to_positional or is_interval_dtype(

4246 self.dtype

4247 )

4248 is_positional = is_index_slice and ints_are_positional

4249

4250 if kind == "getitem":

4251 """

4252 called from the getitem slicers, validate that we are in fact

4253 integers

4254 """

4255 if self.is_integer():

4256 if is_frame:

4257 # unambiguously positional, no deprecation

4258 pass

4259 elif start is None and stop is None:

4260 # label-based vs positional is irrelevant

4261 pass

4262 elif isinstance(self, ABCRangeIndex) and self._range == range(

4263 len(self)

4264 ):

4265 # In this case there is no difference between label-based

4266 # and positional, so nothing will change.

4267 pass

4268 elif (

4269 self.dtype.kind in ["i", "u"]

4270 and self._is_strictly_monotonic_increasing

4271 and len(self) > 0

4272 and self[0] == 0

4273 and self[-1] == len(self) - 1

4274 ):

4275 # We are range-like, e.g. created with Index(np.arange(N))

4276 pass

4277 elif not is_index_slice:

4278 # we're going to raise, so don't bother warning, e.g.

4279 # test_integer_positional_indexing

4280 pass

4281 else:

4282 warnings.warn(

4283 "The behavior of `series[i:j]` with an integer-dtype index "

4284 "is deprecated. In a future version, this will be treated "

4285 "as *label-based* indexing, consistent with e.g. `series[i]` "

4286 "lookups. To retain the old behavior, use `series.iloc[i:j]`. "

4287 "To get the future behavior, use `series.loc[i:j]`.",

4288 FutureWarning,

4289 stacklevel=find_stack_level(),

4290 )

4291 if self.is_integer() or is_index_slice:

4292 # Note: these checks are redundant if we know is_index_slice

4293 self._validate_indexer("slice", key.start, "getitem")

4294 self._validate_indexer("slice", key.stop, "getitem")

4295 self._validate_indexer("slice", key.step, "getitem")

4296 return key

4297

4298 # convert the slice to an indexer here

4299

4300 # if we are mixed and have integers

4301 if is_positional:

4302 try:

4303 # Validate start & stop

4304 if start is not None:

4305 self.get_loc(start)

4306 if stop is not None:

4307 self.get_loc(stop)

4308 is_positional = False

4309 except KeyError:

4310 pass

4311

4312 if com.is_null_slice(key):

4313 # It doesn't matter if we are positional or label based

4314 indexer = key

4315 elif is_positional:

4316 if kind == "loc":

4317 # GH#16121, GH#24612, GH#31810

4318 warnings.warn(

4319 "Slicing a positional slice with .loc is not supported, "

4320 "and will raise TypeError in a future version. "

4321 "Use .loc with labels or .iloc with positions instead.",

4322 FutureWarning,

4323 stacklevel=find_stack_level(),

4324 )

4325 indexer = key

4326 else:

4327 indexer = self.slice_indexer(start, stop, step)

4328

4329 return indexer

4330

4331 @final

4332 def _invalid_indexer(self, form: str_t, key) -> TypeError:

4333 """

4334 Consistent invalid indexer message.

4335 """

4336 return TypeError(

4337 f"cannot do {form} indexing on {type(self).__name__} with these "

4338 f"indexers [{key}] of type {type(key).__name__}"

4339 )

4340

4341 # --------------------------------------------------------------------

4342 # Reindex Methods

4343

4344 @final

4345 def _validate_can_reindex(self, indexer: np.ndarray) -> None:

4346 """

4347 Check if we are allowing reindexing with this particular indexer.

4348

4349 Parameters

4350 ----------

4351 indexer : an integer ndarray

4352

4353 Raises

4354 ------

4355 ValueError if its a duplicate axis

4356 """

4357 # trying to reindex on an axis with duplicates

4358 if not self._index_as_unique and len(indexer):

4359 raise ValueError("cannot reindex on an axis with duplicate labels")

4360

4361 def reindex(

4362 self, target, method=None, level=None, limit=None, tolerance=None

4363 ) -> tuple[Index, npt.NDArray[np.intp] | None]:

4364 """

4365 Create index with target's values.

4366

4367 Parameters

4368 ----------

4369 target : an iterable

4370 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional

4371 * default: exact matches only.

4372 * pad / ffill: find the PREVIOUS index value if no exact match.

4373 * backfill / bfill: use NEXT index value if no exact match

4374 * nearest: use the NEAREST index value if no exact match. Tied

4375 distances are broken by preferring the larger index value.

4376 level : int, optional

4377 Level of multiindex.

4378 limit : int, optional

4379 Maximum number of consecutive labels in ``target`` to match for

4380 inexact matches.

4381 tolerance : int or float, optional

4382 Maximum distance between original and new labels for inexact

4383 matches. The values of the index at the matching locations must

4384 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.

4385

4386 Tolerance may be a scalar value, which applies the same tolerance

4387 to all values, or list-like, which applies variable tolerance per

4388 element. List-like includes list, tuple, array, Series, and must be

4389 the same size as the index and its dtype must exactly match the

4390 index's type.

4391

4392 Returns

4393 -------

4394 new_index : pd.Index

4395 Resulting index.

4396 indexer : np.ndarray[np.intp] or None

4397 Indices of output values in original index.

4398

4399 Raises

4400 ------

4401 TypeError

4402 If ``method`` passed along with ``level``.

4403 ValueError

4404 If non-unique multi-index

4405 ValueError

4406 If non-unique index and ``method`` or ``limit`` passed.

4407

4408 See Also

4409 --------

4410 Series.reindex : Conform Series to new index with optional filling logic.

4411 DataFrame.reindex : Conform DataFrame to new index with optional filling logic.

4412

4413 Examples

4414 --------

4415 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])

4416 >>> idx

4417 Index(['car', 'bike', 'train', 'tractor'], dtype='object')

4418 >>> idx.reindex(['car', 'bike'])

4419 (Index(['car', 'bike'], dtype='object'), array([0, 1]))

4420 """

4421 # GH6552: preserve names when reindexing to non-named target

4422 # (i.e. neither Index nor Series).

4423 preserve_names = not hasattr(target, "name")

4424

4425 # GH7774: preserve dtype/tz if target is empty and not an Index.

4426 target = ensure_has_len(target) # target may be an iterator

4427

4428 if not isinstance(target, Index) and len(target) == 0:

4429 if level is not None and self._is_multi:

4430 # "Index" has no attribute "levels"; maybe "nlevels"?

4431 idx = self.levels[level] # type: ignore[attr-defined]

4432 else:

4433 idx = self

4434 target = idx[:0]

4435 else:

4436 target = ensure_index(target)

4437

4438 if level is not None and (

4439 isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex)

4440 ):

4441 if method is not None:

4442 raise TypeError("Fill method not supported if level passed")

4443

4444 # TODO: tests where passing `keep_order=not self._is_multi`

4445 # makes a difference for non-MultiIndex case

4446 target, indexer, _ = self._join_level(

4447 target, level, how="right", keep_order=not self._is_multi

4448 )

4449

4450 else:

4451 if self.equals(target):

4452 indexer = None

4453 else:

4454 if self._index_as_unique:

4455 indexer = self.get_indexer(

4456 target, method=method, limit=limit, tolerance=tolerance

4457 )

4458 elif self._is_multi:

4459 raise ValueError("cannot handle a non-unique multi-index!")

4460 else:

4461 if method is not None or limit is not None:

4462 raise ValueError(

4463 "cannot reindex a non-unique index "

4464 "with a method or limit"

4465 )

4466 indexer, _ = self.get_indexer_non_unique(target)

4467

4468 if not self.is_unique:

4469 # GH#42568

4470 warnings.warn(

4471 "reindexing with a non-unique Index is deprecated and "

4472 "will raise in a future version.",

4473 FutureWarning,

4474 stacklevel=find_stack_level(),

4475 )

4476

4477 target = self._wrap_reindex_result(target, indexer, preserve_names)

4478 return target, indexer

4479

4480 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):

4481 target = self._maybe_preserve_names(target, preserve_names)

4482 return target

4483

4484 def _maybe_preserve_names(self, target: Index, preserve_names: bool):

4485 if preserve_names and target.nlevels == 1 and target.name != self.name:

4486 target = target.copy(deep=False)

4487 target.name = self.name

4488 return target

4489

4490 @final

4491 def _reindex_non_unique(

4492 self, target: Index

4493 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]:

4494 """

4495 Create a new index with target's values (move/add/delete values as

4496 necessary) use with non-unique Index and a possibly non-unique target.

4497

4498 Parameters

4499 ----------

4500 target : an iterable

4501

4502 Returns

4503 -------

4504 new_index : pd.Index

4505 Resulting index.

4506 indexer : np.ndarray[np.intp]

4507 Indices of output values in original index.

4508 new_indexer : np.ndarray[np.intp] or None

4509

4510 """

4511 target = ensure_index(target)

4512 if len(target) == 0:

4513 # GH#13691

4514 return self[:0], np.array([], dtype=np.intp), None

4515

4516 indexer, missing = self.get_indexer_non_unique(target)

4517 check = indexer != -1

4518 new_labels = self.take(indexer[check])

4519 new_indexer = None

4520

4521 if len(missing):

4522 length = np.arange(len(indexer), dtype=np.intp)

4523

4524 missing = ensure_platform_int(missing)

4525 missing_labels = target.take(missing)

4526 missing_indexer = length[~check]

4527 cur_labels = self.take(indexer[check]).values

4528 cur_indexer = length[check]

4529

4530 # Index constructor below will do inference

4531 new_labels = np.empty((len(indexer),), dtype=object)

4532 new_labels[cur_indexer] = cur_labels

4533 new_labels[missing_indexer] = missing_labels

4534

4535 # GH#38906

4536 if not len(self):

4537

4538 new_indexer = np.arange(0, dtype=np.intp)

4539

4540 # a unique indexer

4541 elif target.is_unique:

4542

4543 # see GH5553, make sure we use the right indexer

4544 new_indexer = np.arange(len(indexer), dtype=np.intp)

4545 new_indexer[cur_indexer] = np.arange(len(cur_labels))

4546 new_indexer[missing_indexer] = -1

4547

4548 # we have a non_unique selector, need to use the original

4549 # indexer here

4550 else:

4551

4552 # need to retake to have the same size as the indexer

4553 indexer[~check] = -1

4554

4555 # reset the new indexer to account for the new size

4556 new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp)

4557 new_indexer[~check] = -1

4558

4559 if isinstance(self, ABCMultiIndex):

4560 new_index = type(self).from_tuples(new_labels, names=self.names)

4561 else:

4562 new_index = Index._with_infer(new_labels, name=self.name)

4563 return new_index, indexer, new_indexer

4564

4565 # --------------------------------------------------------------------

4566 # Join Methods

4567

4568 @overload

4569 def join(

4570 self,

4571 other: Index,

4572 *,

4573 how: str_t = ...,

4574 level: Level = ...,

4575 return_indexers: Literal[True],

4576 sort: bool = ...,

4577 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4578 ...

4579

4580 @overload

4581 def join(

4582 self,

4583 other: Index,

4584 *,

4585 how: str_t = ...,

4586 level: Level = ...,

4587 return_indexers: Literal[False] = ...,

4588 sort: bool = ...,

4589 ) -> Index:

4590 ...

4591

4592 @overload

4593 def join(

4594 self,

4595 other: Index,

4596 *,

4597 how: str_t = ...,

4598 level: Level = ...,

4599 return_indexers: bool = ...,

4600 sort: bool = ...,

4601 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4602 ...

4603

4604 @final

4605 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "other"])

4606 @_maybe_return_indexers

4607 def join(

4608 self,

4609 other: Index,

4610 how: str_t = "left",

4611 level: Level = None,

4612 return_indexers: bool = False,

4613 sort: bool = False,

4614 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4615 """

4616 Compute join_index and indexers to conform data structures to the new index.

4617

4618 Parameters

4619 ----------

4620 other : Index

4621 how : {'left', 'right', 'inner', 'outer'}

4622 level : int or level name, default None

4623 return_indexers : bool, default False

4624 sort : bool, default False

4625 Sort the join keys lexicographically in the result Index. If False,

4626 the order of the join keys depends on the join type (how keyword).

4627

4628 Returns

4629 -------

4630 join_index, (left_indexer, right_indexer)

4631 """

4632 other = ensure_index(other)

4633

4634 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):

4635 if (self.tz is None) ^ (other.tz is None):

4636 # Raise instead of casting to object below.

4637 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

4638

4639 if not self._is_multi and not other._is_multi:

4640 # We have specific handling for MultiIndex below

4641 pself, pother = self._maybe_promote(other)

4642 if pself is not self or pother is not other:

4643 return pself.join(

4644 pother, how=how, level=level, return_indexers=True, sort=sort

4645 )

4646

4647 lindexer: np.ndarray | None

4648 rindexer: np.ndarray | None

4649

4650 # try to figure out the join level

4651 # GH3662

4652 if level is None and (self._is_multi or other._is_multi):

4653

4654 # have the same levels/names so a simple join

4655 if self.names == other.names:

4656 pass

4657 else:

4658 return self._join_multi(other, how=how)

4659

4660 # join on the level

4661 if level is not None and (self._is_multi or other._is_multi):

4662 return self._join_level(other, level, how=how)

4663

4664 if len(other) == 0:

4665 if how in ("left", "outer"):

4666 join_index = self._view()

4667 rindexer = np.broadcast_to(np.intp(-1), len(join_index))

4668 return join_index, None, rindexer

4669 elif how in ("right", "inner", "cross"):

4670 join_index = other._view()

4671 lindexer = np.array([])

4672 return join_index, lindexer, None

4673

4674 if len(self) == 0:

4675 if how in ("right", "outer"):

4676 join_index = other._view()

4677 lindexer = np.broadcast_to(np.intp(-1), len(join_index))

4678 return join_index, lindexer, None

4679 elif how in ("left", "inner", "cross"):

4680 join_index = self._view()

4681 rindexer = np.array([])

4682 return join_index, None, rindexer

4683

4684 if self._join_precedence < other._join_precedence:

4685 how = {"right": "left", "left": "right"}.get(how, how)

4686 join_index, lidx, ridx = other.join(

4687 self, how=how, level=level, return_indexers=True

4688 )

4689 lidx, ridx = ridx, lidx

4690 return join_index, lidx, ridx

4691

4692 if not is_dtype_equal(self.dtype, other.dtype):

4693 dtype = self._find_common_type_compat(other)

4694 this = self.astype(dtype, copy=False)

4695 other = other.astype(dtype, copy=False)

4696 return this.join(other, how=how, return_indexers=True)

4697

4698 _validate_join_method(how)

4699

4700 if not self.is_unique and not other.is_unique:

4701 return self._join_non_unique(other, how=how)

4702 elif not self.is_unique or not other.is_unique:

4703 if self.is_monotonic_increasing and other.is_monotonic_increasing:

4704 if not is_interval_dtype(self.dtype):

4705 # otherwise we will fall through to _join_via_get_indexer

4706 # GH#39133

4707 # go through object dtype for ea till engine is supported properly

4708 return self._join_monotonic(other, how=how)

4709 else:

4710 return self._join_non_unique(other, how=how)

4711 elif (

4712 self.is_monotonic_increasing

4713 and other.is_monotonic_increasing

4714 and self._can_use_libjoin

4715 and (

4716 not isinstance(self, ABCMultiIndex)

4717 or not any(is_categorical_dtype(dtype) for dtype in self.dtypes)

4718 )

4719 and not is_categorical_dtype(self.dtype)

4720 ):

4721 # Categorical is monotonic if data are ordered as categories, but join can

4722 # not handle this in case of not lexicographically monotonic GH#38502

4723 try:

4724 return self._join_monotonic(other, how=how)

4725 except TypeError:

4726 # object dtype; non-comparable objects

4727 pass

4728

4729 return self._join_via_get_indexer(other, how, sort)

4730

4731 @final

4732 def _join_via_get_indexer(

4733 self, other: Index, how: str_t, sort: bool

4734 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4735 # Fallback if we do not have any fastpaths available based on

4736 # uniqueness/monotonicity

4737

4738 # Note: at this point we have checked matching dtypes

4739

4740 if how == "left":

4741 join_index = self

4742 elif how == "right":

4743 join_index = other

4744 elif how == "inner":

4745 # TODO: sort=False here for backwards compat. It may

4746 # be better to use the sort parameter passed into join

4747 join_index = self.intersection(other, sort=False)

4748 elif how == "outer":

4749 # TODO: sort=True here for backwards compat. It may

4750 # be better to use the sort parameter passed into join

4751 join_index = self.union(other)

4752

4753 if sort:

4754 join_index = join_index.sort_values()

4755

4756 if join_index is self:

4757 lindexer = None

4758 else:

4759 lindexer = self.get_indexer_for(join_index)

4760 if join_index is other:

4761 rindexer = None

4762 else:

4763 rindexer = other.get_indexer_for(join_index)

4764 return join_index, lindexer, rindexer

4765

4766 @final

4767 def _join_multi(self, other: Index, how: str_t):

4768 from pandas.core.indexes.multi import MultiIndex

4769 from pandas.core.reshape.merge import restore_dropped_levels_multijoin

4770

4771 # figure out join names

4772 self_names_list = list(com.not_none(*self.names))

4773 other_names_list = list(com.not_none(*other.names))

4774 self_names_order = self_names_list.index

4775 other_names_order = other_names_list.index

4776 self_names = set(self_names_list)

4777 other_names = set(other_names_list)

4778 overlap = self_names & other_names

4779

4780 # need at least 1 in common

4781 if not overlap:

4782 raise ValueError("cannot join with no overlapping index names")

4783

4784 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):

4785

4786 # Drop the non-matching levels from left and right respectively

4787 ldrop_names = sorted(self_names - overlap, key=self_names_order)

4788 rdrop_names = sorted(other_names - overlap, key=other_names_order)

4789

4790 # if only the order differs

4791 if not len(ldrop_names + rdrop_names):

4792 self_jnlevels = self

4793 other_jnlevels = other.reorder_levels(self.names)

4794 else:

4795 self_jnlevels = self.droplevel(ldrop_names)

4796 other_jnlevels = other.droplevel(rdrop_names)

4797

4798 # Join left and right

4799 # Join on same leveled multi-index frames is supported

4800 join_idx, lidx, ridx = self_jnlevels.join(

4801 other_jnlevels, how=how, return_indexers=True

4802 )

4803

4804 # Restore the dropped levels

4805 # Returned index level order is

4806 # common levels, ldrop_names, rdrop_names

4807 dropped_names = ldrop_names + rdrop_names

4808

4809 # error: Argument 5/6 to "restore_dropped_levels_multijoin" has

4810 # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any

4811 # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]"

4812 levels, codes, names = restore_dropped_levels_multijoin(

4813 self,

4814 other,

4815 dropped_names,

4816 join_idx,

4817 lidx, # type: ignore[arg-type]

4818 ridx, # type: ignore[arg-type]

4819 )

4820

4821 # Re-create the multi-index

4822 multi_join_idx = MultiIndex(

4823 levels=levels, codes=codes, names=names, verify_integrity=False

4824 )

4825

4826 multi_join_idx = multi_join_idx.remove_unused_levels()

4827

4828 return multi_join_idx, lidx, ridx

4829

4830 jl = list(overlap)[0]

4831

4832 # Case where only one index is multi

4833 # make the indices into mi's that match

4834 flip_order = False

4835 if isinstance(self, MultiIndex):

4836 self, other = other, self

4837 flip_order = True

4838 # flip if join method is right or left

4839 how = {"right": "left", "left": "right"}.get(how, how)

4840

4841 level = other.names.index(jl)

4842 result = self._join_level(other, level, how=how)

4843

4844 if flip_order:

4845 return result[0], result[2], result[1]

4846 return result

4847

4848 @final

4849 def _join_non_unique(

4850 self, other: Index, how: str_t = "left"

4851 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

4852 from pandas.core.reshape.merge import get_join_indexers

4853

4854 # We only get here if dtypes match

4855 assert self.dtype == other.dtype

4856

4857 left_idx, right_idx = get_join_indexers(

4858 [self._values], [other._values], how=how, sort=True

4859 )

4860 mask = left_idx == -1

4861

4862 join_array = self._values.take(left_idx)

4863 right = other._values.take(right_idx)

4864

4865 if isinstance(join_array, np.ndarray):

4866 # error: Argument 3 to "putmask" has incompatible type

4867 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected

4868 # "Union[_SupportsArray[dtype[Any]], _NestedSequence[

4869 # _SupportsArray[dtype[Any]]], bool, int, float, complex,

4870 # str, bytes, _NestedSequence[Union[bool, int, float,

4871 # complex, str, bytes]]]"

4872 np.putmask(join_array, mask, right) # type: ignore[arg-type]

4873 else:

4874 join_array._putmask(mask, right)

4875

4876 join_index = self._wrap_joined_index(join_array, other)

4877

4878 return join_index, left_idx, right_idx

4879

4880 @final

4881 def _join_level(

4882 self, other: Index, level, how: str_t = "left", keep_order: bool = True

4883 ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4884 """

4885 The join method *only* affects the level of the resulting

4886 MultiIndex. Otherwise it just exactly aligns the Index data to the

4887 labels of the level in the MultiIndex.

4888

4889 If ```keep_order == True```, the order of the data indexed by the

4890 MultiIndex will not be changed; otherwise, it will tie out

4891 with `other`.

4892 """

4893 from pandas.core.indexes.multi import MultiIndex

4894

4895 def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:

4896 """

4897 Returns sorter for the inner most level while preserving the

4898 order of higher levels.

4899

4900 Parameters

4901 ----------

4902 labels : list[np.ndarray]

4903 Each ndarray has signed integer dtype, not necessarily identical.

4904

4905 Returns

4906 -------

4907 np.ndarray[np.intp]

4908 """

4909 if labels[0].size == 0:

4910 return np.empty(0, dtype=np.intp)

4911

4912 if len(labels) == 1:

4913 return get_group_index_sorter(ensure_platform_int(labels[0]))

4914

4915 # find indexers of beginning of each set of

4916 # same-key labels w.r.t all but last level

4917 tic = labels[0][:-1] != labels[0][1:]

4918 for lab in labels[1:-1]:

4919 tic |= lab[:-1] != lab[1:]

4920

4921 starts = np.hstack(([True], tic, [True])).nonzero()[0]

4922 lab = ensure_int64(labels[-1])

4923 return lib.get_level_sorter(lab, ensure_platform_int(starts))

4924

4925 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):

4926 raise TypeError("Join on level between two MultiIndex objects is ambiguous")

4927

4928 left, right = self, other

4929

4930 flip_order = not isinstance(self, MultiIndex)

4931 if flip_order:

4932 left, right = right, left

4933 how = {"right": "left", "left": "right"}.get(how, how)

4934

4935 assert isinstance(left, MultiIndex)

4936

4937 level = left._get_level_number(level)

4938 old_level = left.levels[level]

4939

4940 if not right.is_unique:

4941 raise NotImplementedError(

4942 "Index._join_level on non-unique index is not implemented"

4943 )

4944

4945 new_level, left_lev_indexer, right_lev_indexer = old_level.join(

4946 right, how=how, return_indexers=True

4947 )

4948

4949 if left_lev_indexer is None:

4950 if keep_order or len(left) == 0:

4951 left_indexer = None

4952 join_index = left

4953 else: # sort the leaves

4954 left_indexer = _get_leaf_sorter(left.codes[: level + 1])

4955 join_index = left[left_indexer]

4956

4957 else:

4958 left_lev_indexer = ensure_platform_int(left_lev_indexer)

4959 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))

4960 old_codes = left.codes[level]

4961

4962 taker = old_codes[old_codes != -1]

4963 new_lev_codes = rev_indexer.take(taker)

4964

4965 new_codes = list(left.codes)

4966 new_codes[level] = new_lev_codes

4967

4968 new_levels = list(left.levels)

4969 new_levels[level] = new_level

4970

4971 if keep_order: # just drop missing values. o.w. keep order

4972 left_indexer = np.arange(len(left), dtype=np.intp)

4973 left_indexer = cast(np.ndarray, left_indexer)

4974 mask = new_lev_codes != -1

4975 if not mask.all():

4976 new_codes = [lab[mask] for lab in new_codes]

4977 left_indexer = left_indexer[mask]

4978

4979 else: # tie out the order with other

4980 if level == 0: # outer most level, take the fast route

4981 max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max()

4982 ngroups = 1 + max_new_lev

4983 left_indexer, counts = libalgos.groupsort_indexer(

4984 new_lev_codes, ngroups

4985 )

4986

4987 # missing values are placed first; drop them!

4988 left_indexer = left_indexer[counts[0] :]

4989 new_codes = [lab[left_indexer] for lab in new_codes]

4990

4991 else: # sort the leaves

4992 mask = new_lev_codes != -1

4993 mask_all = mask.all()

4994 if not mask_all:

4995 new_codes = [lab[mask] for lab in new_codes]

4996

4997 left_indexer = _get_leaf_sorter(new_codes[: level + 1])

4998 new_codes = [lab[left_indexer] for lab in new_codes]

4999

5000 # left_indexers are w.r.t masked frame.

5001 # reverse to original frame!

5002 if not mask_all:

5003 left_indexer = mask.nonzero()[0][left_indexer]

5004

5005 join_index = MultiIndex(

5006 levels=new_levels,

5007 codes=new_codes,

5008 names=left.names,

5009 verify_integrity=False,

5010 )

5011

5012 if right_lev_indexer is not None:

5013 right_indexer = right_lev_indexer.take(join_index.codes[level])

5014 else:

5015 right_indexer = join_index.codes[level]

5016

5017 if flip_order:

5018 left_indexer, right_indexer = right_indexer, left_indexer

5019

5020 left_indexer = (

5021 None if left_indexer is None else ensure_platform_int(left_indexer)

5022 )

5023 right_indexer = (

5024 None if right_indexer is None else ensure_platform_int(right_indexer)

5025 )

5026 return join_index, left_indexer, right_indexer

5027

5028 @final

5029 def _join_monotonic(

5030 self, other: Index, how: str_t = "left"

5031 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

5032 # We only get here with matching dtypes and both monotonic increasing

5033 assert other.dtype == self.dtype

5034

5035 if self.equals(other):

5036 ret_index = other if how == "right" else self

5037 return ret_index, None, None

5038

5039 ridx: np.ndarray | None

5040 lidx: np.ndarray | None

5041

5042 if self.is_unique and other.is_unique:

5043 # We can perform much better than the general case

5044 if how == "left":

5045 join_index = self

5046 lidx = None

5047 ridx = self._left_indexer_unique(other)

5048 elif how == "right":

5049 join_index = other

5050 lidx = other._left_indexer_unique(self)

5051 ridx = None

5052 elif how == "inner":

5053 join_array, lidx, ridx = self._inner_indexer(other)

5054 join_index = self._wrap_joined_index(join_array, other)

5055 elif how == "outer":

5056 join_array, lidx, ridx = self._outer_indexer(other)

5057 join_index = self._wrap_joined_index(join_array, other)

5058 else:

5059 if how == "left":

5060 join_array, lidx, ridx = self._left_indexer(other)

5061 elif how == "right":

5062 join_array, ridx, lidx = other._left_indexer(self)

5063 elif how == "inner":

5064 join_array, lidx, ridx = self._inner_indexer(other)

5065 elif how == "outer":

5066 join_array, lidx, ridx = self._outer_indexer(other)

5067

5068 join_index = self._wrap_joined_index(join_array, other)

5069

5070 lidx = None if lidx is None else ensure_platform_int(lidx)

5071 ridx = None if ridx is None else ensure_platform_int(ridx)

5072 return join_index, lidx, ridx

5073

5074 def _wrap_joined_index(self: _IndexT, joined: ArrayLike, other: _IndexT) -> _IndexT:

5075 assert other.dtype == self.dtype

5076

5077 if isinstance(self, ABCMultiIndex):

5078 name = self.names if self.names == other.names else None

5079 # error: Incompatible return value type (got "MultiIndex",

5080 # expected "_IndexT")

5081 return self._constructor(joined, name=name) # type: ignore[return-value]

5082 else:

5083 name = get_op_result_name(self, other)

5084 return self._constructor._with_infer(joined, name=name, dtype=self.dtype)

5085

5086 @cache_readonly

5087 def _can_use_libjoin(self) -> bool:

5088 """

5089 Whether we can use the fastpaths implement in _libs.join

5090 """

5091 if type(self) is Index:

5092 # excludes EAs

5093 return isinstance(self.dtype, np.dtype)

5094 return not is_interval_dtype(self.dtype)

5095

5096 # --------------------------------------------------------------------

5097 # Uncategorized Methods

5098

5099 @property

5100 def values(self) -> ArrayLike:

5101 """

5102 Return an array representing the data in the Index.

5103

5104 .. warning::

5105

5106 We recommend using :attr:`Index.array` or

5107 :meth:`Index.to_numpy`, depending on whether you need

5108 a reference to the underlying data or a NumPy array.

5109

5110 Returns

5111 -------

5112 array: numpy.ndarray or ExtensionArray

5113

5114 See Also

5115 --------

5116 Index.array : Reference to the underlying data.

5117 Index.to_numpy : A NumPy array representing the underlying data.

5118 """

5119 return self._data

5120

5121 # error: Decorated property not supported

5122 # https://github.com/python/mypy/issues/1362

5123 @cache_readonly # type: ignore[misc]

5124 @doc(IndexOpsMixin.array)

5125 def array(self) -> ExtensionArray:

5126 array = self._data

5127 if isinstance(array, np.ndarray):

5128 from pandas.core.arrays.numpy_ import PandasArray

5129

5130 array = PandasArray(array)

5131 return array

5132

5133 @property

5134 def _values(self) -> ExtensionArray | np.ndarray:

5135 """

5136 The best array representation.

5137

5138 This is an ndarray or ExtensionArray.

5139

5140 ``_values`` are consistent between ``Series`` and ``Index``.

5141

5142 It may differ from the public '.values' method.

5143

5144 index | values | _values |

5145 ----------------- | --------------- | ------------- |

5146 Index | ndarray | ndarray |

5147 CategoricalIndex | Categorical | Categorical |

5148 DatetimeIndex | ndarray[M8ns] | DatetimeArray |

5149 DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |

5150 PeriodIndex | ndarray[object] | PeriodArray |

5151 IntervalIndex | IntervalArray | IntervalArray |

5152

5153 See Also

5154 --------

5155 values : Values

5156 """

5157 return self._data

5158

5159 def _get_engine_target(self) -> ArrayLike:

5160 """

5161 Get the ndarray or ExtensionArray that we can pass to the IndexEngine

5162 constructor.

5163 """

5164 vals = self._values

5165 if isinstance(vals, StringArray):

5166 # GH#45652 much more performant than ExtensionEngine

5167 return vals._ndarray

5168 if type(self) is Index and isinstance(self._values, ExtensionArray):

5169 # TODO(ExtensionIndex): remove special-case, just use self._values

5170 return self._values.astype(object)

5171 return vals

5172

5173 def _from_join_target(self, result: np.ndarray) -> ArrayLike:

5174 """

5175 Cast the ndarray returned from one of the libjoin.foo_indexer functions

5176 back to type(self)._data.

5177 """

5178 return result

5179

5180 @doc(IndexOpsMixin._memory_usage)

5181 def memory_usage(self, deep: bool = False) -> int:

5182 result = self._memory_usage(deep=deep)

5183

5184 # include our engine hashtable

5185 result += self._engine.sizeof(deep=deep)

5186 return result

5187

5188 @final

5189 def where(self, cond, other=None) -> Index:

5190 """

5191 Replace values where the condition is False.

5192

5193 The replacement is taken from other.

5194

5195 Parameters

5196 ----------

5197 cond : bool array-like with the same length as self

5198 Condition to select the values on.

5199 other : scalar, or array-like, default None

5200 Replacement if the condition is False.

5201

5202 Returns

5203 -------

5204 pandas.Index

5205 A copy of self with values replaced from other

5206 where the condition is False.

5207

5208 See Also

5209 --------

5210 Series.where : Same method for Series.

5211 DataFrame.where : Same method for DataFrame.

5212

5213 Examples

5214 --------

5215 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])

5216 >>> idx

5217 Index(['car', 'bike', 'train', 'tractor'], dtype='object')

5218 >>> idx.where(idx.isin(['car', 'train']), 'other')

5219 Index(['car', 'other', 'train', 'other'], dtype='object')

5220 """

5221 if isinstance(self, ABCMultiIndex):

5222 raise NotImplementedError(

5223 ".where is not supported for MultiIndex operations"

5224 )

5225 cond = np.asarray(cond, dtype=bool)

5226 return self.putmask(~cond, other)

5227

5228 # construction helpers

5229 @final

5230 @classmethod

5231 def _scalar_data_error(cls, data):

5232 # We return the TypeError so that we can raise it from the constructor

5233 # in order to keep mypy happy

5234 return TypeError(

5235 f"{cls.__name__}(...) must be called with a collection of some "

5236 f"kind, {repr(data)} was passed"

5237 )

5238

5239 @final

5240 @classmethod

5241 def _string_data_error(cls, data):

5242 raise TypeError(

5243 "String dtype not supported, you may need "

5244 "to explicitly cast to a numeric type"

5245 )

5246

5247 def _validate_fill_value(self, value):

5248 """

5249 Check if the value can be inserted into our array without casting,

5250 and convert it to an appropriate native type if necessary.

5251

5252 Raises

5253 ------

5254 TypeError

5255 If the value cannot be inserted into an array of this dtype.

5256 """

5257 dtype = self.dtype

5258 if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]:

5259 # return np_can_hold_element(dtype, value)

5260 try:

5261 return np_can_hold_element(dtype, value)

5262 except LossySetitemError as err:

5263 # re-raise as TypeError for consistency

5264 raise TypeError from err

5265 elif not can_hold_element(self._values, value):

5266 raise TypeError

5267 return value

5268

5269 @final

5270 def _require_scalar(self, value):

5271 """

5272 Check that this is a scalar value that we can use for setitem-like

5273 operations without changing dtype.

5274 """

5275 if not is_scalar(value):

5276 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")

5277 return value

5278

5279 def _is_memory_usage_qualified(self) -> bool:

5280 """

5281 Return a boolean if we need a qualified .info display.

5282 """

5283 return self.is_object()

5284

5285 def is_type_compatible(self, kind: str_t) -> bool:

5286 """

5287 Whether the index type is compatible with the provided type.

5288 """

5289 warnings.warn(

5290 "Index.is_type_compatible is deprecated and will be removed in a "

5291 "future version.",

5292 FutureWarning,

5293 stacklevel=find_stack_level(),

5294 )

5295 return kind == self.inferred_type

5296

5297 def __contains__(self, key: Any) -> bool:

5298 """

5299 Return a boolean indicating whether the provided key is in the index.

5300

5301 Parameters

5302 ----------

5303 key : label

5304 The key to check if it is present in the index.

5305

5306 Returns

5307 -------

5308 bool

5309 Whether the key search is in the index.

5310

5311 Raises

5312 ------

5313 TypeError

5314 If the key is not hashable.

5315

5316 See Also

5317 --------

5318 Index.isin : Returns an ndarray of boolean dtype indicating whether the

5319 list-like key is in the index.

5320

5321 Examples

5322 --------

5323 >>> idx = pd.Index([1, 2, 3, 4])

5324 >>> idx

5325 Int64Index([1, 2, 3, 4], dtype='int64')

5326

5327 >>> 2 in idx

5328 True

5329 >>> 6 in idx

5330 False

5331 """

5332 hash(key)

5333 try:

5334 return key in self._engine

5335 except (OverflowError, TypeError, ValueError):

5336 return False

5337

5338 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318

5339 # Incompatible types in assignment (expression has type "None", base class

5340 # "object" defined the type as "Callable[[object], int]")

5341 __hash__: ClassVar[None] # type: ignore[assignment]

5342

5343 @final

5344 def __setitem__(self, key, value):

5345 raise TypeError("Index does not support mutable operations")

5346

5347 def __getitem__(self, key):

5348 """

5349 Override numpy.ndarray's __getitem__ method to work as desired.

5350

5351 This function adds lists and Series as valid boolean indexers

5352 (ndarrays only supports ndarray with dtype=bool).

5353

5354 If resulting ndim != 1, plain ndarray is returned instead of

5355 corresponding `Index` subclass.

5356

5357 """

5358 getitem = self._data.__getitem__

5359

5360 if is_integer(key) or is_float(key):

5361 # GH#44051 exclude bool, which would return a 2d ndarray

5362 key = com.cast_scalar_indexer(key, warn_float=True)

5363 return getitem(key)

5364

5365 if isinstance(key, slice):

5366 # This case is separated from the conditional above to avoid

5367 # pessimization com.is_bool_indexer and ndim checks.

5368 result = getitem(key)

5369 # Going through simple_new for performance.

5370 return type(self)._simple_new(result, name=self._name)

5371

5372 if com.is_bool_indexer(key):

5373 # if we have list[bools, length=1e5] then doing this check+convert

5374 # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__

5375 # time below from 3.8 ms to 496 µs

5376 # if we already have ndarray[bool], the overhead is 1.4 µs or .25%

5377 if is_extension_array_dtype(getattr(key, "dtype", None)):

5378 key = key.to_numpy(dtype=bool, na_value=False)

5379 else:

5380 key = np.asarray(key, dtype=bool)

5381

5382 result = getitem(key)

5383 # Because we ruled out integer above, we always get an arraylike here

5384 if result.ndim > 1:

5385 deprecate_ndim_indexing(result)

5386 if hasattr(result, "_ndarray"):

5387 # i.e. NDArrayBackedExtensionArray

5388 # Unpack to ndarray for MPL compat

5389 # error: Item "ndarray[Any, Any]" of

5390 # "Union[ExtensionArray, ndarray[Any, Any]]"

5391 # has no attribute "_ndarray"

5392 return result._ndarray # type: ignore[union-attr]

5393 return result

5394

5395 # NB: Using _constructor._simple_new would break if MultiIndex

5396 # didn't override __getitem__

5397 return self._constructor._simple_new(result, name=self._name)

5398

5399 def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT:

5400 """

5401 Fastpath for __getitem__ when we know we have a slice.

5402 """

5403 res = self._data[slobj]

5404 return type(self)._simple_new(res, name=self._name)

5405

5406 @final

5407 def _can_hold_identifiers_and_holds_name(self, name) -> bool:

5408 """

5409 Faster check for ``name in self`` when we know `name` is a Python

5410 identifier (e.g. in NDFrame.__getattr__, which hits this to support

5411 . key lookup). For indexes that can't hold identifiers (everything

5412 but object & categorical) we just return False.

5413

5414 https://github.com/pandas-dev/pandas/issues/19764

5415 """

5416 if self.is_object() or is_string_dtype(self.dtype) or self.is_categorical():

5417 return name in self

5418 return False

5419

5420 def append(self, other: Index | Sequence[Index]) -> Index:

5421 """

5422 Append a collection of Index options together.

5423

5424 Parameters

5425 ----------

5426 other : Index or list/tuple of indices

5427

5428 Returns

5429 -------

5430 Index

5431 """

5432 to_concat = [self]

5433

5434 if isinstance(other, (list, tuple)):

5435 to_concat += list(other)

5436 else:

5437 # error: Argument 1 to "append" of "list" has incompatible type

5438 # "Union[Index, Sequence[Index]]"; expected "Index"

5439 to_concat.append(other) # type: ignore[arg-type]

5440

5441 for obj in to_concat:

5442 if not isinstance(obj, Index):

5443 raise TypeError("all inputs must be Index")

5444

5445 names = {obj.name for obj in to_concat}

5446 name = None if len(names) > 1 else self.name

5447

5448 return self._concat(to_concat, name)

5449

5450 def _concat(self, to_concat: list[Index], name: Hashable) -> Index:

5451 """

5452 Concatenate multiple Index objects.

5453 """

5454 to_concat_vals = [x._values for x in to_concat]

5455

5456 result = concat_compat(to_concat_vals)

5457

5458 is_numeric = result.dtype.kind in ["i", "u", "f"]

5459 if self._is_backward_compat_public_numeric_index and is_numeric:

5460 return type(self)._simple_new(result, name=name)

5461

5462 return Index._with_infer(result, name=name)

5463

5464 @final

5465 def putmask(self, mask, value) -> Index:

5466 """

5467 Return a new Index of the values set with the mask.

5468

5469 Returns

5470 -------

5471 Index

5472

5473 See Also

5474 --------

5475 numpy.ndarray.putmask : Changes elements of an array

5476 based on conditional and input values.

5477 """

5478 mask, noop = validate_putmask(self._values, mask)

5479 if noop:

5480 return self.copy()

5481

5482 if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):

5483 # e.g. None -> np.nan, see also Block._standardize_fill_value

5484 value = self._na_value

5485 try:

5486 converted = self._validate_fill_value(value)

5487 except (LossySetitemError, ValueError, TypeError) as err:

5488 if is_object_dtype(self): # pragma: no cover

5489 raise err

5490

5491 dtype = self._find_common_type_compat(value)

5492 return self.astype(dtype).putmask(mask, value)

5493

5494 values = self._values.copy()

5495

5496 if isinstance(values, np.ndarray):

5497 converted = setitem_datetimelike_compat(values, mask.sum(), converted)

5498 np.putmask(values, mask, converted)

5499

5500 else:

5501 # Note: we use the original value here, not converted, as

5502 # _validate_fill_value is not idempotent

5503 values._putmask(mask, value)

5504

5505 return self._shallow_copy(values)

5506

5507 def equals(self, other: Any) -> bool:

5508 """

5509 Determine if two Index object are equal.

5510

5511 The things that are being compared are:

5512

5513 * The elements inside the Index object.

5514 * The order of the elements inside the Index object.

5515

5516 Parameters

5517 ----------

5518 other : Any

5519 The other object to compare against.

5520

5521 Returns

5522 -------

5523 bool

5524 True if "other" is an Index and it has the same elements and order

5525 as the calling index; False otherwise.

5526

5527 Examples

5528 --------

5529 >>> idx1 = pd.Index([1, 2, 3])

5530 >>> idx1

5531 Int64Index([1, 2, 3], dtype='int64')

5532 >>> idx1.equals(pd.Index([1, 2, 3]))

5533 True

5534

5535 The elements inside are compared

5536

5537 >>> idx2 = pd.Index(["1", "2", "3"])

5538 >>> idx2

5539 Index(['1', '2', '3'], dtype='object')

5540

5541 >>> idx1.equals(idx2)

5542 False

5543

5544 The order is compared

5545

5546 >>> ascending_idx = pd.Index([1, 2, 3])

5547 >>> ascending_idx

5548 Int64Index([1, 2, 3], dtype='int64')

5549 >>> descending_idx = pd.Index([3, 2, 1])

5550 >>> descending_idx

5551 Int64Index([3, 2, 1], dtype='int64')

5552 >>> ascending_idx.equals(descending_idx)

5553 False

5554

5555 The dtype is *not* compared

5556

5557 >>> int64_idx = pd.Index([1, 2, 3], dtype='int64')

5558 >>> int64_idx

5559 Int64Index([1, 2, 3], dtype='int64')

5560 >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64')

5561 >>> uint64_idx

5562 UInt64Index([1, 2, 3], dtype='uint64')

5563 >>> int64_idx.equals(uint64_idx)

5564 True

5565 """

5566 if self.is_(other):

5567 return True

5568

5569 if not isinstance(other, Index):

5570 return False

5571

5572 if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):

5573 # if other is not object, use other's logic for coercion

5574 return other.equals(self)

5575

5576 if isinstance(other, ABCMultiIndex):

5577 # d-level MultiIndex can equal d-tuple Index

5578 return other.equals(self)

5579

5580 if isinstance(self._values, ExtensionArray):

5581 # Dispatch to the ExtensionArray's .equals method.

5582 if not isinstance(other, type(self)):

5583 return False

5584

5585 earr = cast(ExtensionArray, self._data)

5586 return earr.equals(other._data)

5587

5588 if is_extension_array_dtype(other.dtype):

5589 # All EA-backed Index subclasses override equals

5590 return other.equals(self)

5591

5592 return array_equivalent(self._values, other._values)

5593

5594 @final

5595 def identical(self, other) -> bool:

5596 """

5597 Similar to equals, but checks that object attributes and types are also equal.

5598

5599 Returns

5600 -------

5601 bool

5602 If two Index objects have equal elements and same type True,

5603 otherwise False.

5604 """

5605 return (

5606 self.equals(other)

5607 and all(

5608 getattr(self, c, None) == getattr(other, c, None)

5609 for c in self._comparables

5610 )

5611 and type(self) == type(other)

5612 )

5613

5614 @final

5615 def asof(self, label):

5616 """

5617 Return the label from the index, or, if not present, the previous one.

5618

5619 Assuming that the index is sorted, return the passed index label if it

5620 is in the index, or return the previous index label if the passed one

5621 is not in the index.

5622

5623 Parameters

5624 ----------

5625 label : object

5626 The label up to which the method returns the latest index label.

5627

5628 Returns

5629 -------

5630 object

5631 The passed label if it is in the index. The previous label if the

5632 passed label is not in the sorted index or `NaN` if there is no

5633 such label.

5634

5635 See Also

5636 --------

5637 Series.asof : Return the latest value in a Series up to the

5638 passed index.

5639 merge_asof : Perform an asof merge (similar to left join but it

5640 matches on nearest key rather than equal key).

5641 Index.get_loc : An `asof` is a thin wrapper around `get_loc`

5642 with method='pad'.

5643

5644 Examples

5645 --------

5646 `Index.asof` returns the latest index label up to the passed label.

5647

5648 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])

5649 >>> idx.asof('2014-01-01')

5650 '2013-12-31'

5651

5652 If the label is in the index, the method returns the passed label.

5653

5654 >>> idx.asof('2014-01-02')

5655 '2014-01-02'

5656

5657 If all of the labels in the index are later than the passed label,

5658 NaN is returned.

5659

5660 >>> idx.asof('1999-01-02')

5661 nan

5662

5663 If the index is not sorted, an error is raised.

5664

5665 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',

5666 ... '2014-01-03'])

5667 >>> idx_not_sorted.asof('2013-12-31')

5668 Traceback (most recent call last):

5669 ValueError: index must be monotonic increasing or decreasing

5670 """

5671 self._searchsorted_monotonic(label) # validate sortedness

5672 try:

5673 loc = self.get_loc(label)

5674 except (KeyError, TypeError):

5675 # KeyError -> No exact match, try for padded

5676 # TypeError -> passed e.g. non-hashable, fall through to get

5677 # the tested exception message

5678 indexer = self.get_indexer([label], method="pad")

5679 if indexer.ndim > 1 or indexer.size > 1:

5680 raise TypeError("asof requires scalar valued input")

5681 loc = indexer.item()

5682 if loc == -1:

5683 return self._na_value

5684 else:

5685 if isinstance(loc, slice):

5686 loc = loc.indices(len(self))[-1]

5687

5688 return self[loc]

5689

5690 def asof_locs(

5691 self, where: Index, mask: npt.NDArray[np.bool_]

5692 ) -> npt.NDArray[np.intp]:

5693 """

5694 Return the locations (indices) of labels in the index.

5695

5696 As in the `asof` function, if the label (a particular entry in

5697 `where`) is not in the index, the latest index label up to the

5698 passed label is chosen and its index returned.

5699

5700 If all of the labels in the index are later than a label in `where`,

5701 -1 is returned.

5702

5703 `mask` is used to ignore NA values in the index during calculation.

5704

5705 Parameters

5706 ----------

5707 where : Index

5708 An Index consisting of an array of timestamps.

5709 mask : np.ndarray[bool]

5710 Array of booleans denoting where values in the original

5711 data are not NA.

5712

5713 Returns

5714 -------

5715 np.ndarray[np.intp]

5716 An array of locations (indices) of the labels from the Index

5717 which correspond to the return values of the `asof` function

5718 for every element in `where`.

5719 """

5720 # error: No overload variant of "searchsorted" of "ndarray" matches argument

5721 # types "Union[ExtensionArray, ndarray[Any, Any]]", "str"

5722 # TODO: will be fixed when ExtensionArray.searchsorted() is fixed

5723 locs = self._values[mask].searchsorted(

5724 where._values, side="right" # type: ignore[call-overload]

5725 )

5726 locs = np.where(locs > 0, locs - 1, 0)

5727

5728 result = np.arange(len(self), dtype=np.intp)[mask].take(locs)

5729

5730 first_value = self._values[mask.argmax()]

5731 result[(locs == 0) & (where._values < first_value)] = -1

5732

5733 return result

5734

5735 def sort_values(

5736 self,

5737 return_indexer: bool = False,

5738 ascending: bool = True,

5739 na_position: str_t = "last",

5740 key: Callable | None = None,

5741 ):

5742 """

5743 Return a sorted copy of the index.

5744

5745 Return a sorted copy of the index, and optionally return the indices

5746 that sorted the index itself.

5747

5748 Parameters

5749 ----------

5750 return_indexer : bool, default False

5751 Should the indices that would sort the index be returned.

5752 ascending : bool, default True

5753 Should the index values be sorted in an ascending order.

5754 na_position : {'first' or 'last'}, default 'last'

5755 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at

5756 the end.

5757

5758 .. versionadded:: 1.2.0

5759

5760 key : callable, optional

5761 If not None, apply the key function to the index values

5762 before sorting. This is similar to the `key` argument in the

5763 builtin :meth:`sorted` function, with the notable difference that

5764 this `key` function should be *vectorized*. It should expect an

5765 ``Index`` and return an ``Index`` of the same shape.

5766

5767 .. versionadded:: 1.1.0

5768

5769 Returns

5770 -------

5771 sorted_index : pandas.Index

5772 Sorted copy of the index.

5773 indexer : numpy.ndarray, optional

5774 The indices that the index itself was sorted by.

5775

5776 See Also

5777 --------

5778 Series.sort_values : Sort values of a Series.

5779 DataFrame.sort_values : Sort values in a DataFrame.

5780

5781 Examples

5782 --------

5783 >>> idx = pd.Index([10, 100, 1, 1000])

5784 >>> idx

5785 Int64Index([10, 100, 1, 1000], dtype='int64')

5786

5787 Sort values in ascending order (default behavior).

5788

5789 >>> idx.sort_values()

5790 Int64Index([1, 10, 100, 1000], dtype='int64')

5791

5792 Sort values in descending order, and also get the indices `idx` was

5793 sorted by.

5794

5795 >>> idx.sort_values(ascending=False, return_indexer=True)

5796 (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))

5797 """

5798 idx = ensure_key_mapped(self, key)

5799

5800 # GH 35584. Sort missing values according to na_position kwarg

5801 # ignore na_position for MultiIndex

5802 if not isinstance(self, ABCMultiIndex):

5803 _as = nargsort(

5804 items=idx, ascending=ascending, na_position=na_position, key=key

5805 )

5806 else:

5807 _as = idx.argsort()

5808 if not ascending:

5809 _as = _as[::-1]

5810

5811 sorted_index = self.take(_as)

5812

5813 if return_indexer:

5814 return sorted_index, _as

5815 else:

5816 return sorted_index

5817

5818 @final

5819 def sort(self, *args, **kwargs):

5820 """

5821 Use sort_values instead.

5822 """

5823 raise TypeError("cannot sort an Index object in-place, use sort_values instead")

5824

5825 def shift(self, periods=1, freq=None):

5826 """

5827 Shift index by desired number of time frequency increments.

5828

5829 This method is for shifting the values of datetime-like indexes

5830 by a specified time increment a given number of times.

5831

5832 Parameters

5833 ----------

5834 periods : int, default 1

5835 Number of periods (or increments) to shift by,

5836 can be positive or negative.

5837 freq : pandas.DateOffset, pandas.Timedelta or str, optional

5838 Frequency increment to shift by.

5839 If None, the index is shifted by its own `freq` attribute.

5840 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.

5841

5842 Returns

5843 -------

5844 pandas.Index

5845 Shifted index.

5846

5847 See Also

5848 --------

5849 Series.shift : Shift values of Series.

5850

5851 Notes

5852 -----

5853 This method is only implemented for datetime-like index classes,

5854 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.

5855

5856 Examples

5857 --------

5858 Put the first 5 month starts of 2011 into an index.

5859

5860 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')

5861 >>> month_starts

5862 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',

5863 '2011-05-01'],

5864 dtype='datetime64[ns]', freq='MS')

5865

5866 Shift the index by 10 days.

5867

5868 >>> month_starts.shift(10, freq='D')

5869 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',

5870 '2011-05-11'],

5871 dtype='datetime64[ns]', freq=None)

5872

5873 The default value of `freq` is the `freq` attribute of the index,

5874 which is 'MS' (month start) in this example.

5875

5876 >>> month_starts.shift(10)

5877 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',

5878 '2012-03-01'],

5879 dtype='datetime64[ns]', freq='MS')

5880 """

5881 raise NotImplementedError(

5882 f"This method is only implemented for DatetimeIndex, PeriodIndex and "

5883 f"TimedeltaIndex; Got type {type(self).__name__}"

5884 )

5885

5886 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:

5887 """

5888 Return the integer indices that would sort the index.

5889

5890 Parameters

5891 ----------

5892 *args

5893 Passed to `numpy.ndarray.argsort`.

5894 **kwargs

5895 Passed to `numpy.ndarray.argsort`.

5896

5897 Returns

5898 -------

5899 np.ndarray[np.intp]

5900 Integer indices that would sort the index if used as

5901 an indexer.

5902

5903 See Also

5904 --------

5905 numpy.argsort : Similar method for NumPy arrays.

5906 Index.sort_values : Return sorted copy of Index.

5907

5908 Examples

5909 --------

5910 >>> idx = pd.Index(['b', 'a', 'd', 'c'])

5911 >>> idx

5912 Index(['b', 'a', 'd', 'c'], dtype='object')

5913

5914 >>> order = idx.argsort()

5915 >>> order

5916 array([1, 0, 3, 2])

5917

5918 >>> idx[order]

5919 Index(['a', 'b', 'c', 'd'], dtype='object')

5920 """

5921 # This works for either ndarray or EA, is overridden

5922 # by RangeIndex, MultIIndex

5923 return self._data.argsort(*args, **kwargs)

5924

5925 @final

5926 def get_value(self, series: Series, key):

5927 """

5928 Fast lookup of value from 1-dimensional ndarray.

5929

5930 Only use this if you know what you're doing.

5931

5932 Returns

5933 -------

5934 scalar or Series

5935 """

5936 warnings.warn(

5937 "get_value is deprecated and will be removed in a future version. "

5938 "Use Series[key] instead.",

5939 FutureWarning,

5940 stacklevel=find_stack_level(),

5941 )

5942

5943 self._check_indexing_error(key)

5944

5945 try:

5946 # GH 20882, 21257

5947 # First try to convert the key to a location

5948 # If that fails, raise a KeyError if an integer

5949 # index, otherwise, see if key is an integer, and

5950 # try that

5951 loc = self.get_loc(key)

5952 except KeyError:

5953 if not self._should_fallback_to_positional:

5954 raise

5955 elif is_integer(key):

5956 # If the Index cannot hold integer, then this is unambiguously

5957 # a locational lookup.

5958 loc = key

5959 else:

5960 raise

5961

5962 return self._get_values_for_loc(series, loc, key)

5963

5964 def _check_indexing_error(self, key):

5965 if not is_scalar(key):

5966 # if key is not a scalar, directly raise an error (the code below

5967 # would convert to numpy arrays and raise later any way) - GH29926

5968 raise InvalidIndexError(key)

5969

5970 @cache_readonly

5971 def _should_fallback_to_positional(self) -> bool:

5972 """

5973 Should an integer key be treated as positional?

5974 """

5975 return not self.holds_integer()

5976

5977 def _get_values_for_loc(self, series: Series, loc, key):

5978 """

5979 Do a positional lookup on the given Series, returning either a scalar

5980 or a Series.

5981

5982 Assumes that `series.index is self`

5983

5984 key is included for MultiIndex compat.

5985 """

5986 if is_integer(loc):

5987 return series._values[loc]

5988

5989 return series.iloc[loc]

5990

5991 @final

5992 def set_value(self, arr, key, value) -> None:

5993 """

5994 Fast lookup of value from 1-dimensional ndarray.

5995

5996 .. deprecated:: 1.0

5997

5998 Notes

5999 -----

6000 Only use this if you know what you're doing.

6001 """

6002 warnings.warn(

6003 (

6004 "The 'set_value' method is deprecated, and "

6005 "will be removed in a future version."

6006 ),

6007 FutureWarning,

6008 stacklevel=find_stack_level(),

6009 )

6010 loc = self._engine.get_loc(key)

6011 if not can_hold_element(arr, value):

6012 raise ValueError

6013 arr[loc] = value

6014

6015 _index_shared_docs[

6016 "get_indexer_non_unique"

6017 ] = """

6018 Compute indexer and mask for new index given the current index.

6019

6020 The indexer should be then used as an input to ndarray.take to align the

6021 current data to the new index.

6022

6023 Parameters

6024 ----------

6025 target : %(target_klass)s

6026

6027 Returns

6028 -------

6029 indexer : np.ndarray[np.intp]

6030 Integers from 0 to n - 1 indicating that the index at these

6031 positions matches the corresponding target values. Missing values

6032 in the target are marked by -1.

6033 missing : np.ndarray[np.intp]

6034 An indexer into the target of the values not found.

6035 These correspond to the -1 in the indexer array.

6036 """

6037

6038 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)

6039 def get_indexer_non_unique(

6040 self, target

6041 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

6042 target = ensure_index(target)

6043 target = self._maybe_cast_listlike_indexer(target)

6044

6045 if not self._should_compare(target) and not is_interval_dtype(self.dtype):

6046 # IntervalIndex get special treatment bc numeric scalars can be

6047 # matched to Interval scalars

6048 return self._get_indexer_non_comparable(target, method=None, unique=False)

6049

6050 pself, ptarget = self._maybe_promote(target)

6051 if pself is not self or ptarget is not target:

6052 return pself.get_indexer_non_unique(ptarget)

6053

6054 if not is_dtype_equal(self.dtype, target.dtype):

6055 # TODO: if object, could use infer_dtype to preempt costly

6056 # conversion if still non-comparable?

6057 dtype = self._find_common_type_compat(target)

6058

6059 this = self.astype(dtype, copy=False)

6060 that = target.astype(dtype, copy=False)

6061 return this.get_indexer_non_unique(that)

6062

6063 # Note: _maybe_promote ensures we never get here with MultiIndex

6064 # self and non-Multi target

6065 tgt_values = target._get_engine_target()

6066 if self._is_multi and target._is_multi:

6067 engine = self._engine

6068 # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has

6069 # no attribute "_extract_level_codes"

6070 tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr]

6071

6072 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)

6073 return ensure_platform_int(indexer), ensure_platform_int(missing)

6074

6075 @final

6076 def get_indexer_for(self, target) -> npt.NDArray[np.intp]:

6077 """

6078 Guaranteed return of an indexer even when non-unique.

6079

6080 This dispatches to get_indexer or get_indexer_non_unique

6081 as appropriate.

6082

6083 Returns

6084 -------

6085 np.ndarray[np.intp]

6086 List of indices.

6087

6088 Examples

6089 --------

6090 >>> idx = pd.Index([np.nan, 'var1', np.nan])

6091 >>> idx.get_indexer_for([np.nan])

6092 array([0, 2])

6093 """

6094 if self._index_as_unique:

6095 return self.get_indexer(target)

6096 indexer, _ = self.get_indexer_non_unique(target)

6097 return indexer

6098

6099 def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]:

6100 """

6101 Analogue to get_indexer that raises if any elements are missing.

6102 """

6103 keyarr = key

6104 if not isinstance(keyarr, Index):

6105 keyarr = com.asarray_tuplesafe(keyarr)

6106

6107 if self._index_as_unique:

6108 indexer = self.get_indexer_for(keyarr)

6109 keyarr = self.reindex(keyarr)[0]

6110 else:

6111 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)

6112

6113 self._raise_if_missing(keyarr, indexer, axis_name)

6114

6115 keyarr = self.take(indexer)

6116 if isinstance(key, Index):

6117 # GH 42790 - Preserve name from an Index

6118 keyarr.name = key.name

6119 if keyarr.dtype.kind in ["m", "M"]:

6120 # DTI/TDI.take can infer a freq in some cases when we dont want one

6121 if isinstance(key, list) or (

6122 isinstance(key, type(self))

6123 # "Index" has no attribute "freq"

6124 and key.freq is None # type: ignore[attr-defined]

6125 ):

6126 keyarr = keyarr._with_freq(None)

6127

6128 return keyarr, indexer

6129

6130 def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None:

6131 """

6132 Check that indexer can be used to return a result.

6133

6134 e.g. at least one element was found,

6135 unless the list of keys was actually empty.

6136

6137 Parameters

6138 ----------

6139 key : list-like

6140 Targeted labels (only used to show correct error message).

6141 indexer: array-like of booleans

6142 Indices corresponding to the key,

6143 (with -1 indicating not found).

6144 axis_name : str

6145

6146 Raises

6147 ------

6148 KeyError

6149 If at least one key was requested but none was found.

6150 """

6151 if len(key) == 0:

6152 return

6153

6154 # Count missing values

6155 missing_mask = indexer < 0

6156 nmissing = missing_mask.sum()

6157

6158 if nmissing:

6159

6160 # TODO: remove special-case; this is just to keep exception

6161 # message tests from raising while debugging

6162 use_interval_msg = is_interval_dtype(self.dtype) or (

6163 is_categorical_dtype(self.dtype)

6164 # "Index" has no attribute "categories" [attr-defined]

6165 and is_interval_dtype(

6166 self.categories.dtype # type: ignore[attr-defined]

6167 )

6168 )

6169

6170 if nmissing == len(indexer):

6171 if use_interval_msg:

6172 key = list(key)

6173 raise KeyError(f"None of [{key}] are in the [{axis_name}]")

6174

6175 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())

6176 raise KeyError(f"{not_found} not in index")

6177

6178 @overload

6179 def _get_indexer_non_comparable(

6180 self, target: Index, method, unique: Literal[True] = ...

6181 ) -> npt.NDArray[np.intp]:

6182 ...

6183

6184 @overload

6185 def _get_indexer_non_comparable(

6186 self, target: Index, method, unique: Literal[False]

6187 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

6188 ...

6189

6190 @overload

6191 def _get_indexer_non_comparable(

6192 self, target: Index, method, unique: bool = True

6193 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

6194 ...

6195

6196 @final

6197 def _get_indexer_non_comparable(

6198 self, target: Index, method, unique: bool = True

6199 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

6200 """

6201 Called from get_indexer or get_indexer_non_unique when the target

6202 is of a non-comparable dtype.

6203

6204 For get_indexer lookups with method=None, get_indexer is an _equality_

6205 check, so non-comparable dtypes mean we will always have no matches.

6206

6207 For get_indexer lookups with a method, get_indexer is an _inequality_

6208 check, so non-comparable dtypes mean we will always raise TypeError.

6209

6210 Parameters

6211 ----------

6212 target : Index

6213 method : str or None

6214 unique : bool, default True

6215 * True if called from get_indexer.

6216 * False if called from get_indexer_non_unique.

6217

6218 Raises

6219 ------

6220 TypeError

6221 If doing an inequality check, i.e. method is not None.

6222 """

6223 if method is not None:

6224 other = unpack_nested_dtype(target)

6225 raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")

6226

6227 no_matches = -1 * np.ones(target.shape, dtype=np.intp)

6228 if unique:

6229 # This is for get_indexer

6230 return no_matches

6231 else:

6232 # This is for get_indexer_non_unique

6233 missing = np.arange(len(target), dtype=np.intp)

6234 return no_matches, missing

6235

6236 @property

6237 def _index_as_unique(self) -> bool:

6238 """

6239 Whether we should treat this as unique for the sake of

6240 get_indexer vs get_indexer_non_unique.

6241

6242 For IntervalIndex compat.

6243 """

6244 return self.is_unique

6245

6246 _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects"

6247

6248 @final

6249 def _maybe_promote(self, other: Index) -> tuple[Index, Index]:

6250 """

6251 When dealing with an object-dtype Index and a non-object Index, see

6252 if we can upcast the object-dtype one to improve performance.

6253 """

6254

6255 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):

6256 if (

6257 self.tz is not None

6258 and other.tz is not None

6259 and not tz_compare(self.tz, other.tz)

6260 ):

6261 # standardize on UTC

6262 return self.tz_convert("UTC"), other.tz_convert("UTC")

6263

6264 elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):

6265 try:

6266 return type(other)(self), other

6267 except OutOfBoundsDatetime:

6268 return self, other

6269 elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex):

6270 # TODO: we dont have tests that get here

6271 return type(other)(self), other

6272

6273 elif self.dtype.kind == "u" and other.dtype.kind == "i":

6274 # GH#41873

6275 if other.min() >= 0:

6276 # lookup min as it may be cached

6277 # TODO: may need itemsize check if we have non-64-bit Indexes

6278 return self, other.astype(self.dtype)

6279

6280 elif self._is_multi and not other._is_multi:

6281 try:

6282 # "Type[Index]" has no attribute "from_tuples"

6283 other = type(self).from_tuples(other) # type: ignore[attr-defined]

6284 except (TypeError, ValueError):

6285 # let's instead try with a straight Index

6286 self = Index(self._values)

6287

6288 if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):

6289 # Reverse op so we dont need to re-implement on the subclasses

6290 other, self = other._maybe_promote(self)

6291

6292 return self, other

6293

6294 @final

6295 def _find_common_type_compat(self, target) -> DtypeObj:

6296 """

6297 Implementation of find_common_type that adjusts for Index-specific

6298 special cases.

6299 """

6300 if is_valid_na_for_dtype(target, self.dtype):

6301 # e.g. setting NA value into IntervalArray[int64]

6302 dtype = ensure_dtype_can_hold_na(self.dtype)

6303 if is_dtype_equal(self.dtype, dtype):

6304 raise NotImplementedError(

6305 "This should not be reached. Please report a bug at "

6306 "github.com/pandas-dev/pandas"

6307 )

6308 return dtype

6309

6310 target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)

6311

6312 # special case: if one dtype is uint64 and the other a signed int, return object

6313 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion

6314 # Now it's:

6315 # * float | [u]int -> float

6316 # * uint64 | signed int -> object

6317 # We may change union(float | [u]int) to go to object.

6318 if self.dtype == "uint64" or target_dtype == "uint64":

6319 if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype(

6320 target_dtype

6321 ):

6322 return _dtype_obj

6323

6324 dtype = find_common_type([self.dtype, target_dtype])

6325 dtype = common_dtype_categorical_compat([self, target], dtype)

6326 return dtype

6327

6328 @final

6329 def _should_compare(self, other: Index) -> bool:

6330 """

6331 Check if `self == other` can ever have non-False entries.

6332 """

6333

6334 if (other.is_boolean() and self.is_numeric()) or (

6335 self.is_boolean() and other.is_numeric()

6336 ):

6337 # GH#16877 Treat boolean labels passed to a numeric index as not

6338 # found. Without this fix False and True would be treated as 0 and 1

6339 # respectively.

6340 return False

6341

6342 other = unpack_nested_dtype(other)

6343 dtype = other.dtype

6344 return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)

6345

6346 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

6347 """

6348 Can we compare values of the given dtype to our own?

6349 """

6350 if self.dtype.kind == "b":

6351 return dtype.kind == "b"

6352 elif is_numeric_dtype(self.dtype):

6353 return is_numeric_dtype(dtype)

6354 return True

6355

6356 @final

6357 def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:

6358 """

6359 Group the index labels by a given array of values.

6360

6361 Parameters

6362 ----------

6363 values : array

6364 Values used to determine the groups.

6365

6366 Returns

6367 -------

6368 dict

6369 {group name -> group labels}

6370 """

6371 # TODO: if we are a MultiIndex, we can do better

6372 # that converting to tuples

6373 if isinstance(values, ABCMultiIndex):

6374 values = values._values

6375 values = Categorical(values)

6376 result = values._reverse_indexer()

6377

6378 # map to the label

6379 result = {k: self.take(v) for k, v in result.items()}

6380

6381 return PrettyDict(result)

6382

6383 def map(self, mapper, na_action=None):

6384 """

6385 Map values using an input mapping or function.

6386

6387 Parameters

6388 ----------

6389 mapper : function, dict, or Series

6390 Mapping correspondence.

6391 na_action : {None, 'ignore'}

6392 If 'ignore', propagate NA values, without passing them to the

6393 mapping correspondence.

6394

6395 Returns

6396 -------

6397 applied : Union[Index, MultiIndex], inferred

6398 The output of the mapping function applied to the index.

6399 If the function returns a tuple with more than one element

6400 a MultiIndex will be returned.

6401 """

6402 from pandas.core.indexes.multi import MultiIndex

6403

6404 new_values = self._map_values(mapper, na_action=na_action)

6405

6406 # we can return a MultiIndex

6407 if new_values.size and isinstance(new_values[0], tuple):

6408 if isinstance(self, MultiIndex):

6409 names = self.names

6410 elif self.name:

6411 names = [self.name] * len(new_values[0])

6412 else:

6413 names = None

6414 return MultiIndex.from_tuples(new_values, names=names)

6415

6416 dtype = None

6417 if not new_values.size:

6418 # empty

6419 dtype = self.dtype

6420

6421 # e.g. if we are floating and new_values is all ints, then we

6422 # don't want to cast back to floating. But if we are UInt64

6423 # and new_values is all ints, we want to try.

6424 same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type

6425 if same_dtype:

6426 new_values = maybe_cast_pointwise_result(

6427 new_values, self.dtype, same_dtype=same_dtype

6428 )

6429

6430 if self._is_backward_compat_public_numeric_index and is_numeric_dtype(

6431 new_values.dtype

6432 ):

6433 return self._constructor(

6434 new_values, dtype=dtype, copy=False, name=self.name

6435 )

6436

6437 return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)

6438

6439 # TODO: De-duplicate with map, xref GH#32349

6440 @final

6441 def _transform_index(self, func, *, level=None) -> Index:

6442 """

6443 Apply function to all values found in index.

6444

6445 This includes transforming multiindex entries separately.

6446 Only apply function to one level of the MultiIndex if level is specified.

6447 """

6448 if isinstance(self, ABCMultiIndex):

6449 if level is not None:

6450 # Caller is responsible for ensuring level is positional.

6451 items = [

6452 tuple(func(y) if i == level else y for i, y in enumerate(x))

6453 for x in self

6454 ]

6455 else:

6456 items = [tuple(func(y) for y in x) for x in self]

6457 return type(self).from_tuples(items, names=self.names)

6458 else:

6459 items = [func(x) for x in self]

6460 return Index(items, name=self.name, tupleize_cols=False)

6461

6462 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:

6463 """

6464 Return a boolean array where the index values are in `values`.

6465

6466 Compute boolean array of whether each index value is found in the

6467 passed set of values. The length of the returned boolean array matches

6468 the length of the index.

6469

6470 Parameters

6471 ----------

6472 values : set or list-like

6473 Sought values.

6474 level : str or int, optional

6475 Name or position of the index level to use (if the index is a

6476 `MultiIndex`).

6477

6478 Returns

6479 -------

6480 np.ndarray[bool]

6481 NumPy array of boolean values.

6482

6483 See Also

6484 --------

6485 Series.isin : Same for Series.

6486 DataFrame.isin : Same method for DataFrames.

6487

6488 Notes

6489 -----

6490 In the case of `MultiIndex` you must either specify `values` as a

6491 list-like object containing tuples that are the same length as the

6492 number of levels, or specify `level`. Otherwise it will raise a

6493 ``ValueError``.

6494

6495 If `level` is specified:

6496

6497 - if it is the name of one *and only one* index level, use that level;

6498 - otherwise it should be a number indicating level position.

6499

6500 Examples

6501 --------

6502 >>> idx = pd.Index([1,2,3])

6503 >>> idx

6504 Int64Index([1, 2, 3], dtype='int64')

6505

6506 Check whether each index value in a list of values.

6507

6508 >>> idx.isin([1, 4])

6509 array([ True, False, False])

6510

6511 >>> midx = pd.MultiIndex.from_arrays([[1,2,3],

6512 ... ['red', 'blue', 'green']],

6513 ... names=('number', 'color'))

6514 >>> midx

6515 MultiIndex([(1, 'red'),

6516 (2, 'blue'),

6517 (3, 'green')],

6518 names=['number', 'color'])

6519

6520 Check whether the strings in the 'color' level of the MultiIndex

6521 are in a list of colors.

6522

6523 >>> midx.isin(['red', 'orange', 'yellow'], level='color')

6524 array([ True, False, False])

6525

6526 To check across the levels of a MultiIndex, pass a list of tuples:

6527

6528 >>> midx.isin([(1, 'red'), (3, 'red')])

6529 array([ True, False, False])

6530

6531 For a DatetimeIndex, string values in `values` are converted to

6532 Timestamps.

6533

6534 >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']

6535 >>> dti = pd.to_datetime(dates)

6536 >>> dti

6537 DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],

6538 dtype='datetime64[ns]', freq=None)

6539

6540 >>> dti.isin(['2000-03-11'])

6541 array([ True, False, False])

6542 """

6543 if level is not None:

6544 self._validate_index_level(level)

6545 return algos.isin(self._values, values)

6546

6547 def _get_string_slice(self, key: str_t):

6548 # this is for partial string indexing,

6549 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex

6550 raise NotImplementedError

6551

6552 def slice_indexer(

6553 self,

6554 start: Hashable | None = None,

6555 end: Hashable | None = None,

6556 step: int | None = None,

6557 kind=no_default,

6558 ) -> slice:

6559 """

6560 Compute the slice indexer for input labels and step.

6561

6562 Index needs to be ordered and unique.

6563

6564 Parameters

6565 ----------

6566 start : label, default None

6567 If None, defaults to the beginning.

6568 end : label, default None

6569 If None, defaults to the end.

6570 step : int, default None

6571 kind : str, default None

6572

6573 .. deprecated:: 1.4.0

6574

6575 Returns

6576 -------

6577 indexer : slice

6578

6579 Raises

6580 ------

6581 KeyError : If key does not exist, or key is not unique and index is

6582 not ordered.

6583

6584 Notes

6585 -----

6586 This function assumes that the data is sorted, so use at your own peril

6587

6588 Examples

6589 --------

6590 This is a method on all index types. For example you can do:

6591

6592 >>> idx = pd.Index(list('abcd'))

6593 >>> idx.slice_indexer(start='b', end='c')

6594 slice(1, 3, None)

6595

6596 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])

6597 >>> idx.slice_indexer(start='b', end=('c', 'g'))

6598 slice(1, 3, None)

6599 """

6600 self._deprecated_arg(kind, "kind", "slice_indexer")

6601

6602 start_slice, end_slice = self.slice_locs(start, end, step=step)

6603

6604 # return a slice

6605 if not is_scalar(start_slice):

6606 raise AssertionError("Start slice bound is non-scalar")

6607 if not is_scalar(end_slice):

6608 raise AssertionError("End slice bound is non-scalar")

6609

6610 return slice(start_slice, end_slice, step)

6611

6612 def _maybe_cast_indexer(self, key):

6613 """

6614 If we have a float key and are not a floating index, then try to cast

6615 to an int if equivalent.

6616 """

6617 return key

6618

6619 def _maybe_cast_listlike_indexer(self, target) -> Index:

6620 """

6621 Analogue to maybe_cast_indexer for get_indexer instead of get_loc.

6622 """

6623 return ensure_index(target)

6624

6625 @final

6626 def _validate_indexer(self, form: str_t, key, kind: str_t):

6627 """

6628 If we are positional indexer, validate that we have appropriate

6629 typed bounds must be an integer.

6630 """

6631 assert kind in ["getitem", "iloc"]

6632

6633 if key is not None and not is_integer(key):

6634 raise self._invalid_indexer(form, key)

6635

6636 def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default):

6637 """

6638 This function should be overloaded in subclasses that allow non-trivial

6639 casting on label-slice bounds, e.g. datetime-like indices allowing

6640 strings containing formatted datetimes.

6641

6642 Parameters

6643 ----------

6644 label : object

6645 side : {'left', 'right'}

6646 kind : {'loc', 'getitem'} or None

6647

6648 .. deprecated:: 1.3.0

6649

6650 Returns

6651 -------

6652 label : object

6653

6654 Notes

6655 -----

6656 Value of `side` parameter should be validated in caller.

6657 """

6658 assert kind in ["loc", "getitem", None, no_default]

6659 self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound")

6660

6661 # We are a plain index here (sub-class override this method if they

6662 # wish to have special treatment for floats/ints, e.g. Float64Index and

6663 # datetimelike Indexes

6664 # reject them, if index does not contain label

6665 if (is_float(label) or is_integer(label)) and label not in self:

6666 raise self._invalid_indexer("slice", label)

6667

6668 return label

6669

6670 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):

6671 if self.is_monotonic_increasing:

6672 return self.searchsorted(label, side=side)

6673 elif self.is_monotonic_decreasing:

6674 # np.searchsorted expects ascending sort order, have to reverse

6675 # everything for it to work (element ordering, search side and

6676 # resulting value).

6677 pos = self[::-1].searchsorted(

6678 label, side="right" if side == "left" else "left"

6679 )

6680 return len(self) - pos

6681

6682 raise ValueError("index must be monotonic increasing or decreasing")

6683

6684 def get_slice_bound(

6685 self, label, side: Literal["left", "right"], kind=no_default

6686 ) -> int:

6687 """

6688 Calculate slice bound that corresponds to given label.

6689

6690 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position

6691 of given label.

6692

6693 Parameters

6694 ----------

6695 label : object

6696 side : {'left', 'right'}

6697 kind : {'loc', 'getitem'} or None

6698

6699 .. deprecated:: 1.4.0

6700

6701 Returns

6702 -------

6703 int

6704 Index of label.

6705 """

6706 assert kind in ["loc", "getitem", None, no_default]

6707 self._deprecated_arg(kind, "kind", "get_slice_bound")

6708

6709 if side not in ("left", "right"):

6710 raise ValueError(

6711 "Invalid value for side kwarg, must be either "

6712 f"'left' or 'right': {side}"

6713 )

6714

6715 original_label = label

6716

6717 # For datetime indices label may be a string that has to be converted

6718 # to datetime boundary according to its resolution.

6719 label = self._maybe_cast_slice_bound(label, side)

6720

6721 # we need to look up the label

6722 try:

6723 slc = self.get_loc(label)

6724 except KeyError as err:

6725 try:

6726 return self._searchsorted_monotonic(label, side)

6727 except ValueError:

6728 # raise the original KeyError

6729 raise err

6730

6731 if isinstance(slc, np.ndarray):

6732 # get_loc may return a boolean array, which

6733 # is OK as long as they are representable by a slice.

6734 assert is_bool_dtype(slc.dtype)

6735 slc = lib.maybe_booleans_to_slice(slc.view("u1"))

6736 if isinstance(slc, np.ndarray):

6737 raise KeyError(

6738 f"Cannot get {side} slice bound for non-unique "

6739 f"label: {repr(original_label)}"

6740 )

6741

6742 if isinstance(slc, slice):

6743 if side == "left":

6744 return slc.start

6745 else:

6746 return slc.stop

6747 else:

6748 if side == "right":

6749 return slc + 1

6750 else:

6751 return slc

6752

6753 def slice_locs(

6754 self, start=None, end=None, step=None, kind=no_default

6755 ) -> tuple[int, int]:

6756 """

6757 Compute slice locations for input labels.

6758

6759 Parameters

6760 ----------

6761 start : label, default None

6762 If None, defaults to the beginning.

6763 end : label, default None

6764 If None, defaults to the end.

6765 step : int, defaults None

6766 If None, defaults to 1.

6767 kind : {'loc', 'getitem'} or None

6768

6769 .. deprecated:: 1.4.0

6770

6771 Returns

6772 -------

6773 start, end : int

6774

6775 See Also

6776 --------

6777 Index.get_loc : Get location for a single label.

6778

6779 Notes

6780 -----

6781 This method only works if the index is monotonic or unique.

6782

6783 Examples

6784 --------

6785 >>> idx = pd.Index(list('abcd'))

6786 >>> idx.slice_locs(start='b', end='c')

6787 (1, 3)

6788 """

6789 self._deprecated_arg(kind, "kind", "slice_locs")

6790 inc = step is None or step >= 0

6791

6792 if not inc:

6793 # If it's a reverse slice, temporarily swap bounds.

6794 start, end = end, start

6795

6796 # GH 16785: If start and end happen to be date strings with UTC offsets

6797 # attempt to parse and check that the offsets are the same

6798 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)):

6799 try:

6800 ts_start = Timestamp(start)

6801 ts_end = Timestamp(end)

6802 except (ValueError, TypeError):

6803 pass

6804 else:

6805 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):

6806 raise ValueError("Both dates must have the same UTC offset")

6807

6808 start_slice = None

6809 if start is not None:

6810 start_slice = self.get_slice_bound(start, "left")

6811 if start_slice is None:

6812 start_slice = 0

6813

6814 end_slice = None

6815 if end is not None:

6816 end_slice = self.get_slice_bound(end, "right")

6817 if end_slice is None:

6818 end_slice = len(self)

6819

6820 if not inc:

6821 # Bounds at this moment are swapped, swap them back and shift by 1.

6822 #

6823 # slice_locs('B', 'A', step=-1): s='B', e='A'

6824 #

6825 # s='A' e='B'

6826 # AFTER SWAP: | |

6827 # v ------------------> V

6828 # -----------------------------------

6829 # | | |A|A|A|A| | | | | |B|B| | | | |

6830 # -----------------------------------

6831 # ^ <------------------ ^

6832 # SHOULD BE: | |

6833 # end=s-1 start=e-1

6834 #

6835 end_slice, start_slice = start_slice - 1, end_slice - 1

6836

6837 # i == -1 triggers ``len(self) + i`` selection that points to the

6838 # last element, not before-the-first one, subtracting len(self)

6839 # compensates that.

6840 if end_slice == -1:

6841 end_slice -= len(self)

6842 if start_slice == -1:

6843 start_slice -= len(self)

6844

6845 return start_slice, end_slice

6846

6847 def delete(self: _IndexT, loc) -> _IndexT:

6848 """

6849 Make new Index with passed location(-s) deleted.

6850

6851 Parameters

6852 ----------

6853 loc : int or list of int

6854 Location of item(-s) which will be deleted.

6855 Use a list of locations to delete more than one value at the same time.

6856

6857 Returns

6858 -------

6859 Index

6860 Will be same type as self, except for RangeIndex.

6861

6862 See Also

6863 --------

6864 numpy.delete : Delete any rows and column from NumPy array (ndarray).

6865

6866 Examples

6867 --------

6868 >>> idx = pd.Index(['a', 'b', 'c'])

6869 >>> idx.delete(1)

6870 Index(['a', 'c'], dtype='object')

6871

6872 >>> idx = pd.Index(['a', 'b', 'c'])

6873 >>> idx.delete([0, 2])

6874 Index(['b'], dtype='object')

6875 """

6876 values = self._values

6877 res_values: ArrayLike

6878 if isinstance(values, np.ndarray):

6879 # TODO(__array_function__): special casing will be unnecessary

6880 res_values = np.delete(values, loc)

6881 else:

6882 res_values = values.delete(loc)

6883

6884 # _constructor so RangeIndex->Int64Index

6885 return self._constructor._simple_new(res_values, name=self.name)

6886

6887 def insert(self, loc: int, item) -> Index:

6888 """

6889 Make new Index inserting new item at location.

6890

6891 Follows Python numpy.insert semantics for negative values.

6892

6893 Parameters

6894 ----------

6895 loc : int

6896 item : object

6897

6898 Returns

6899 -------

6900 new_index : Index

6901 """

6902 item = lib.item_from_zerodim(item)

6903 if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:

6904 item = self._na_value

6905

6906 arr = self._values

6907

6908 try:

6909 if isinstance(arr, ExtensionArray):

6910 res_values = arr.insert(loc, item)

6911 return type(self)._simple_new(res_values, name=self.name)

6912 else:

6913 item = self._validate_fill_value(item)

6914 except (TypeError, ValueError, LossySetitemError):

6915 # e.g. trying to insert an integer into a DatetimeIndex

6916 # We cannot keep the same dtype, so cast to the (often object)

6917 # minimal shared dtype before doing the insert.

6918 dtype = self._find_common_type_compat(item)

6919 return self.astype(dtype).insert(loc, item)

6920

6921 if arr.dtype != object or not isinstance(

6922 item, (tuple, np.datetime64, np.timedelta64)

6923 ):

6924 # with object-dtype we need to worry about numpy incorrectly casting

6925 # dt64/td64 to integer, also about treating tuples as sequences

6926 # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550

6927 casted = arr.dtype.type(item)

6928 new_values = np.insert(arr, loc, casted)

6929

6930 else:

6931 # error: No overload variant of "insert" matches argument types

6932 # "ndarray[Any, Any]", "int", "None"

6933 new_values = np.insert(arr, loc, None) # type: ignore[call-overload]

6934 loc = loc if loc >= 0 else loc - 1

6935 new_values[loc] = item

6936

6937 if self._typ == "numericindex":

6938 # Use self._constructor instead of Index to retain NumericIndex GH#43921

6939 # TODO(2.0) can use Index instead of self._constructor

6940 return self._constructor._with_infer(new_values, name=self.name)

6941 else:

6942 return Index._with_infer(new_values, name=self.name)

6943

6944 def drop(

6945 self,

6946 labels: Index | np.ndarray | Iterable[Hashable],

6947 errors: IgnoreRaise = "raise",

6948 ) -> Index:

6949 """

6950 Make new Index with passed list of labels deleted.

6951

6952 Parameters

6953 ----------

6954 labels : array-like or scalar

6955 errors : {'ignore', 'raise'}, default 'raise'

6956 If 'ignore', suppress error and existing labels are dropped.

6957

6958 Returns

6959 -------

6960 dropped : Index

6961 Will be same type as self, except for RangeIndex.

6962

6963 Raises

6964 ------

6965 KeyError

6966 If not all of the labels are found in the selected axis

6967 """

6968 if not isinstance(labels, Index):

6969 # avoid materializing e.g. RangeIndex

6970 arr_dtype = "object" if self.dtype == "object" else None

6971 labels = com.index_labels_to_array(labels, dtype=arr_dtype)

6972

6973 indexer = self.get_indexer_for(labels)

6974 mask = indexer == -1

6975 if mask.any():

6976 if errors != "ignore":

6977 raise KeyError(f"{list(labels[mask])} not found in axis")

6978 indexer = indexer[~mask]

6979 return self.delete(indexer)

6980

6981 # --------------------------------------------------------------------

6982 # Generated Arithmetic, Comparison, and Unary Methods

6983

6984 def _cmp_method(self, other, op):

6985 """

6986 Wrapper used to dispatch comparison operations.

6987 """

6988 if self.is_(other):

6989 # fastpath

6990 if op in {operator.eq, operator.le, operator.ge}:

6991 arr = np.ones(len(self), dtype=bool)

6992 if self._can_hold_na and not isinstance(self, ABCMultiIndex):

6993 # TODO: should set MultiIndex._can_hold_na = False?

6994 arr[self.isna()] = False

6995 return arr

6996 elif op is operator.ne:

6997 arr = np.zeros(len(self), dtype=bool)

6998 if self._can_hold_na and not isinstance(self, ABCMultiIndex):

6999 arr[self.isna()] = True

7000 return arr

7001

7002 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len(

7003 self

7004 ) != len(other):

7005 raise ValueError("Lengths must match to compare")

7006

7007 if not isinstance(other, ABCMultiIndex):

7008 other = extract_array(other, extract_numpy=True)

7009 else:

7010 other = np.asarray(other)

7011

7012 if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):

7013 # e.g. PeriodArray, Categorical

7014 with np.errstate(all="ignore"):

7015 result = op(self._values, other)

7016

7017 elif isinstance(self._values, ExtensionArray):

7018 result = op(self._values, other)

7019

7020 elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):

7021 # don't pass MultiIndex

7022 with np.errstate(all="ignore"):

7023 result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)

7024

7025 else:

7026 with np.errstate(all="ignore"):

7027 result = ops.comparison_op(self._values, other, op)

7028

7029 return result

7030

7031 def _construct_result(self, result, name):

7032 if isinstance(result, tuple):

7033 return (

7034 Index._with_infer(result[0], name=name),

7035 Index._with_infer(result[1], name=name),

7036 )

7037 return Index._with_infer(result, name=name)

7038

7039 def _arith_method(self, other, op):

7040 if (

7041 isinstance(other, Index)

7042 and is_object_dtype(other.dtype)

7043 and type(other) is not Index

7044 ):

7045 # We return NotImplemented for object-dtype index *subclasses* so they have

7046 # a chance to implement ops before we unwrap them.

7047 # See https://github.com/pandas-dev/pandas/issues/31109

7048 return NotImplemented

7049

7050 return super()._arith_method(other, op)

7051

7052 @final

7053 def _unary_method(self, op):

7054 result = op(self._values)

7055 return Index(result, name=self.name)

7056

7057 def __abs__(self) -> Index:

7058 return self._unary_method(operator.abs)

7059

7060 def __neg__(self) -> Index:

7061 return self._unary_method(operator.neg)

7062

7063 def __pos__(self) -> Index:

7064 return self._unary_method(operator.pos)

7065

7066 def __invert__(self) -> Index:

7067 # GH#8875

7068 return self._unary_method(operator.inv)

7069

7070 # --------------------------------------------------------------------

7071 # Reductions

7072

7073 def any(self, *args, **kwargs):

7074 """

7075 Return whether any element is Truthy.

7076

7077 Parameters

7078 ----------

7079 *args

7080 Required for compatibility with numpy.

7081 **kwargs

7082 Required for compatibility with numpy.

7083

7084 Returns

7085 -------

7086 any : bool or array-like (if axis is specified)

7087 A single element array-like may be converted to bool.

7088

7089 See Also

7090 --------

7091 Index.all : Return whether all elements are True.

7092 Series.all : Return whether all elements are True.

7093

7094 Notes

7095 -----

7096 Not a Number (NaN), positive infinity and negative infinity

7097 evaluate to True because these are not equal to zero.

7098

7099 Examples

7100 --------

7101 >>> index = pd.Index([0, 1, 2])

7102 >>> index.any()

7103 True

7104

7105 >>> index = pd.Index([0, 0, 0])

7106 >>> index.any()

7107 False

7108 """

7109 nv.validate_any(args, kwargs)

7110 self._maybe_disable_logical_methods("any")

7111 # error: Argument 1 to "any" has incompatible type "ArrayLike"; expected

7112 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,

7113 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],

7114 # _SupportsArray]"

7115 return np.any(self.values) # type: ignore[arg-type]

7116

7117 def all(self, *args, **kwargs):

7118 """

7119 Return whether all elements are Truthy.

7120

7121 Parameters

7122 ----------

7123 *args

7124 Required for compatibility with numpy.

7125 **kwargs

7126 Required for compatibility with numpy.

7127

7128 Returns

7129 -------

7130 all : bool or array-like (if axis is specified)

7131 A single element array-like may be converted to bool.

7132

7133 See Also

7134 --------

7135 Index.any : Return whether any element in an Index is True.

7136 Series.any : Return whether any element in a Series is True.

7137 Series.all : Return whether all elements in a Series are True.

7138

7139 Notes

7140 -----

7141 Not a Number (NaN), positive infinity and negative infinity

7142 evaluate to True because these are not equal to zero.

7143

7144 Examples

7145 --------

7146 True, because nonzero integers are considered True.

7147

7148 >>> pd.Index([1, 2, 3]).all()

7149 True

7150

7151 False, because ``0`` is considered False.

7152

7153 >>> pd.Index([0, 1, 2]).all()

7154 False

7155 """

7156 nv.validate_all(args, kwargs)

7157 self._maybe_disable_logical_methods("all")

7158 # error: Argument 1 to "all" has incompatible type "ArrayLike"; expected

7159 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,

7160 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],

7161 # _SupportsArray]"

7162 return np.all(self.values) # type: ignore[arg-type]

7163

7164 @final

7165 def _maybe_disable_logical_methods(self, opname: str_t) -> None:

7166 """

7167 raise if this Index subclass does not support any or all.

7168 """

7169 if (

7170 isinstance(self, ABCMultiIndex)

7171 or needs_i8_conversion(self.dtype)

7172 or is_interval_dtype(self.dtype)

7173 or is_categorical_dtype(self.dtype)

7174 or is_float_dtype(self.dtype)

7175 ):

7176 # This call will raise

7177 make_invalid_op(opname)(self)

7178

7179 @Appender(IndexOpsMixin.argmin.__doc__)

7180 def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int:

7181 nv.validate_argmin(args, kwargs)

7182 nv.validate_minmax_axis(axis)

7183

7184 if not self._is_multi and self.hasnans:

7185 # Take advantage of cache

7186 mask = self._isnan

7187 if not skipna or mask.all():

7188 return -1

7189 return super().argmin(skipna=skipna)

7190

7191 @Appender(IndexOpsMixin.argmax.__doc__)

7192 def argmax(self, axis=None, skipna=True, *args, **kwargs) -> int:

7193 nv.validate_argmax(args, kwargs)

7194 nv.validate_minmax_axis(axis)

7195

7196 if not self._is_multi and self.hasnans:

7197 # Take advantage of cache

7198 mask = self._isnan

7199 if not skipna or mask.all():

7200 return -1

7201 return super().argmax(skipna=skipna)

7202

7203 @doc(IndexOpsMixin.min)

7204 def min(self, axis=None, skipna=True, *args, **kwargs):

7205 nv.validate_min(args, kwargs)

7206 nv.validate_minmax_axis(axis)

7207

7208 if not len(self):

7209 return self._na_value

7210

7211 if len(self) and self.is_monotonic_increasing:

7212 # quick check

7213 first = self[0]

7214 if not isna(first):

7215 return first

7216

7217 if not self._is_multi and self.hasnans:

7218 # Take advantage of cache

7219 mask = self._isnan

7220 if not skipna or mask.all():

7221 return self._na_value

7222

7223 if not self._is_multi and not isinstance(self._values, np.ndarray):

7224 # "ExtensionArray" has no attribute "min"

7225 return self._values.min(skipna=skipna) # type: ignore[attr-defined]

7226

7227 return super().min(skipna=skipna)

7228

7229 @doc(IndexOpsMixin.max)

7230 def max(self, axis=None, skipna=True, *args, **kwargs):

7231 nv.validate_max(args, kwargs)

7232 nv.validate_minmax_axis(axis)

7233

7234 if not len(self):

7235 return self._na_value

7236

7237 if len(self) and self.is_monotonic_increasing:

7238 # quick check

7239 last = self[-1]

7240 if not isna(last):

7241 return last

7242

7243 if not self._is_multi and self.hasnans:

7244 # Take advantage of cache

7245 mask = self._isnan

7246 if not skipna or mask.all():

7247 return self._na_value

7248

7249 if not self._is_multi and not isinstance(self._values, np.ndarray):

7250 # "ExtensionArray" has no attribute "max"

7251 return self._values.max(skipna=skipna) # type: ignore[attr-defined]

7252

7253 return super().max(skipna=skipna)

7254

7255 # --------------------------------------------------------------------

7256

7257 @final

7258 @property

7259 def shape(self) -> Shape:

7260 """

7261 Return a tuple of the shape of the underlying data.

7262 """

7263 # See GH#27775, GH#27384 for history/reasoning in how this is defined.

7264 return (len(self),)

7265

7266 @final

7267 def _deprecated_arg(self, value, name: str_t, methodname: str_t) -> None:

7268 """

7269 Issue a FutureWarning if the arg/kwarg is not no_default.

7270 """

7271 if value is not no_default:

7272 warnings.warn(

7273 f"'{name}' argument in {methodname} is deprecated "

7274 "and will be removed in a future version. Do not pass it.",

7275 FutureWarning,

7276 stacklevel=find_stack_level(),

7277 )

7278

7279

7280def ensure_index_from_sequences(sequences, names=None) -> Index:

7281 """

7282 Construct an index from sequences of data.

7283

7284 A single sequence returns an Index. Many sequences returns a

7285 MultiIndex.

7286

7287 Parameters

7288 ----------

7289 sequences : sequence of sequences

7290 names : sequence of str

7291

7292 Returns

7293 -------

7294 index : Index or MultiIndex

7295

7296 Examples

7297 --------

7298 >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])

7299 Int64Index([1, 2, 3], dtype='int64', name='name')

7300

7301 >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])

7302 MultiIndex([('a', 'a'),

7303 ('a', 'b')],

7304 names=['L1', 'L2'])

7305

7306 See Also

7307 --------

7308 ensure_index

7309 """

7310 from pandas.core.indexes.multi import MultiIndex

7311

7312 if len(sequences) == 1:

7313 if names is not None:

7314 names = names[0]

7315 return Index._with_infer(sequences[0], name=names)

7316 else:

7317 return MultiIndex.from_arrays(sequences, names=names)

7318

7319

7320def ensure_index(index_like: Axes, copy: bool = False) -> Index:

7321 """

7322 Ensure that we have an index from some index-like object.

7323

7324 Parameters

7325 ----------

7326 index_like : sequence

7327 An Index or other sequence

7328 copy : bool, default False

7329

7330 Returns

7331 -------

7332 index : Index or MultiIndex

7333

7334 See Also

7335 --------

7336 ensure_index_from_sequences

7337

7338 Examples

7339 --------

7340 >>> ensure_index(['a', 'b'])

7341 Index(['a', 'b'], dtype='object')

7342

7343 >>> ensure_index([('a', 'a'), ('b', 'c')])

7344 Index([('a', 'a'), ('b', 'c')], dtype='object')

7345

7346 >>> ensure_index([['a', 'a'], ['b', 'c']])

7347 MultiIndex([('a', 'b'),

7348 ('a', 'c')],

7349 )

7350 """

7351 if isinstance(index_like, Index):

7352 if copy:

7353 index_like = index_like.copy()

7354 return index_like

7355

7356 if isinstance(index_like, ABCSeries):

7357 name = index_like.name

7358 return Index._with_infer(index_like, name=name, copy=copy)

7359

7360 if is_iterator(index_like):

7361 index_like = list(index_like)

7362

7363 if isinstance(index_like, list):

7364 if type(index_like) is not list:

7365 # must check for exactly list here because of strict type

7366 # check in clean_index_list

7367 index_like = list(index_like)

7368

7369 if len(index_like) and lib.is_all_arraylike(index_like):

7370 from pandas.core.indexes.multi import MultiIndex

7371

7372 return MultiIndex.from_arrays(index_like)

7373 else:

7374 return Index._with_infer(index_like, copy=copy, tupleize_cols=False)

7375 else:

7376 return Index._with_infer(index_like, copy=copy)

7377

7378

7379def ensure_has_len(seq):

7380 """

7381 If seq is an iterator, put its values into a list.

7382 """

7383 try:

7384 len(seq)

7385 except TypeError:

7386 return list(seq)

7387 else:

7388 return seq

7389

7390

7391def trim_front(strings: list[str]) -> list[str]:

7392 """

7393 Trims zeros and decimal points.

7394

7395 Examples

7396 --------

7397 >>> trim_front([" a", " b"])

7398 ['a', 'b']

7399

7400 >>> trim_front([" a", " "])

7401 ['a', '']

7402 """

7403 if not strings:

7404 return strings

7405 while all(strings) and all(x[0] == " " for x in strings):

7406 strings = [x[1:] for x in strings]

7407 return strings

7408

7409

7410def _validate_join_method(method: str) -> None:

7411 if method not in ["left", "right", "inner", "outer"]:

7412 raise ValueError(f"do not recognize join method {method}")

7413

7414

7415def maybe_extract_name(name, obj, cls) -> Hashable:

7416 """

7417 If no name is passed, then extract it from data, validating hashability.

7418 """

7419 if name is None and isinstance(obj, (Index, ABCSeries)):

7420 # Note we don't just check for "name" attribute since that would

7421 # pick up e.g. dtype.name

7422 name = obj.name

7423

7424 # GH#29069

7425 if not is_hashable(name):

7426 raise TypeError(f"{cls.__name__}.name must be a hashable type")

7427

7428 return name

7429

7430

7431_cast_depr_msg = (

7432 "In a future version, passing an object-dtype arraylike to pd.Index will "

7433 "not infer numeric values to numeric dtype (matching the Series behavior). "

7434 "To retain the old behavior, explicitly pass the desired dtype or use the "

7435 "desired Index subclass"

7436)

7437

7438

7439def _maybe_cast_data_without_dtype(

7440 subarr: np.ndarray, cast_numeric_deprecated: bool = True

7441) -> ArrayLike:

7442 """

7443 If we have an arraylike input but no passed dtype, try to infer

7444 a supported dtype.

7445

7446 Parameters

7447 ----------

7448 subarr : np.ndarray[object]

7449 cast_numeric_deprecated : bool, default True

7450 Whether to issue a FutureWarning when inferring numeric dtypes.

7451

7452 Returns

7453 -------

7454 np.ndarray or ExtensionArray

7455 """

7456

7457 result = lib.maybe_convert_objects(

7458 subarr,

7459 convert_datetime=True,

7460 convert_timedelta=True,

7461 convert_period=True,

7462 convert_interval=True,

7463 dtype_if_all_nat=np.dtype("datetime64[ns]"),

7464 )

7465 if result.dtype.kind in ["i", "u", "f"]:

7466 if not cast_numeric_deprecated:

7467 # i.e. we started with a list, not an ndarray[object]

7468 return result

7469

7470 warnings.warn(

7471 "In a future version, the Index constructor will not infer numeric "

7472 "dtypes when passed object-dtype sequences (matching Series behavior)",

7473 FutureWarning,

7474 stacklevel=find_stack_level(),

7475 )

7476 result = ensure_wrapped_if_datetimelike(result)

7477 return result

7478

7479

7480def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:

7481 """

7482 Return common name if all indices agree, otherwise None (level-by-level).

7483

7484 Parameters

7485 ----------

7486 indexes : list of Index objects

7487

7488 Returns

7489 -------

7490 list

7491 A list representing the unanimous 'names' found.

7492 """

7493 name_tups = [tuple(i.names) for i in indexes]

7494 name_sets = [{*ns} for ns in zip_longest(*name_tups)]

7495 names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)

7496 return names

7497

7498

7499def unpack_nested_dtype(other: _IndexT) -> _IndexT:

7500 """

7501 When checking if our dtype is comparable with another, we need

7502 to unpack CategoricalDtype to look at its categories.dtype.

7503

7504 Parameters

7505 ----------

7506 other : Index

7507

7508 Returns

7509 -------

7510 Index

7511 """

7512 dtype = other.dtype

7513 if is_categorical_dtype(dtype):

7514 # If there is ever a SparseIndex, this could get dispatched

7515 # here too.

7516 # error: Item "dtype[Any]"/"ExtensionDtype" of "Union[dtype[Any],

7517 # ExtensionDtype]" has no attribute "categories"

7518 return dtype.categories # type: ignore[union-attr]

7519 return other

7520

7521

7522def _maybe_try_sort(result, sort):

7523 if sort is None:

7524 try:

7525 result = algos.safe_sort(result)

7526 except TypeError as err:

7527 warnings.warn(

7528 f"{err}, sort order is undefined for incomparable objects.",

7529 RuntimeWarning,

7530 stacklevel=find_stack_level(),

7531 )

7532 return result