Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/base.py: 14%
2370 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from datetime import datetime
4import functools
5from itertools import zip_longest
6import operator
7from typing import (
8 TYPE_CHECKING,
9 Any,
10 Callable,
11 ClassVar,
12 Hashable,
13 Iterable,
14 Literal,
15 NoReturn,
16 Sequence,
17 TypeVar,
18 cast,
19 final,
20 overload,
21)
22import warnings
24import numpy as np
26from pandas._config import get_option
28from pandas._libs import (
29 NaT,
30 algos as libalgos,
31 index as libindex,
32 lib,
33)
34import pandas._libs.join as libjoin
35from pandas._libs.lib import (
36 is_datetime_array,
37 no_default,
38)
39from pandas._libs.missing import is_float_nan
40from pandas._libs.tslibs import (
41 IncompatibleFrequency,
42 OutOfBoundsDatetime,
43 Timestamp,
44 is_unitless,
45 tz_compare,
46)
47from pandas._typing import (
48 ArrayLike,
49 Axes,
50 Dtype,
51 DtypeObj,
52 F,
53 IgnoreRaise,
54 Level,
55 Shape,
56 npt,
57)
58from pandas.compat.numpy import function as nv
59from pandas.errors import (
60 DuplicateLabelError,
61 IntCastingNaNError,
62 InvalidIndexError,
63)
64from pandas.util._decorators import (
65 Appender,
66 cache_readonly,
67 deprecate_nonkeyword_arguments,
68 doc,
69)
70from pandas.util._exceptions import (
71 find_stack_level,
72 rewrite_exception,
73)
75from pandas.core.dtypes.astype import astype_nansafe
76from pandas.core.dtypes.cast import (
77 LossySetitemError,
78 can_hold_element,
79 common_dtype_categorical_compat,
80 ensure_dtype_can_hold_na,
81 find_common_type,
82 infer_dtype_from,
83 maybe_cast_pointwise_result,
84 np_can_hold_element,
85)
86from pandas.core.dtypes.common import (
87 ensure_int64,
88 ensure_object,
89 ensure_platform_int,
90 is_bool_dtype,
91 is_categorical_dtype,
92 is_dtype_equal,
93 is_ea_or_datetimelike_dtype,
94 is_extension_array_dtype,
95 is_float,
96 is_float_dtype,
97 is_hashable,
98 is_integer,
99 is_interval_dtype,
100 is_iterator,
101 is_list_like,
102 is_numeric_dtype,
103 is_object_dtype,
104 is_scalar,
105 is_signed_integer_dtype,
106 is_string_dtype,
107 is_unsigned_integer_dtype,
108 needs_i8_conversion,
109 pandas_dtype,
110 validate_all_hashable,
111)
112from pandas.core.dtypes.concat import concat_compat
113from pandas.core.dtypes.dtypes import (
114 CategoricalDtype,
115 DatetimeTZDtype,
116 ExtensionDtype,
117 IntervalDtype,
118 PandasDtype,
119 PeriodDtype,
120)
121from pandas.core.dtypes.generic import (
122 ABCDataFrame,
123 ABCDatetimeIndex,
124 ABCMultiIndex,
125 ABCPeriodIndex,
126 ABCRangeIndex,
127 ABCSeries,
128 ABCTimedeltaIndex,
129)
130from pandas.core.dtypes.inference import is_dict_like
131from pandas.core.dtypes.missing import (
132 array_equivalent,
133 is_valid_na_for_dtype,
134 isna,
135)
137from pandas.core import (
138 arraylike,
139 missing,
140 ops,
141)
142from pandas.core.accessor import CachedAccessor
143import pandas.core.algorithms as algos
144from pandas.core.array_algos.putmask import (
145 setitem_datetimelike_compat,
146 validate_putmask,
147)
148from pandas.core.arrays import (
149 Categorical,
150 ExtensionArray,
151)
152from pandas.core.arrays.datetimes import (
153 tz_to_dtype,
154 validate_tz_from_dtype,
155)
156from pandas.core.arrays.sparse import SparseDtype
157from pandas.core.arrays.string_ import StringArray
158from pandas.core.base import (
159 IndexOpsMixin,
160 PandasObject,
161)
162import pandas.core.common as com
163from pandas.core.construction import (
164 ensure_wrapped_if_datetimelike,
165 extract_array,
166 sanitize_array,
167)
168from pandas.core.indexers import deprecate_ndim_indexing
169from pandas.core.indexes.frozen import FrozenList
170from pandas.core.ops import get_op_result_name
171from pandas.core.ops.invalid import make_invalid_op
172from pandas.core.sorting import (
173 ensure_key_mapped,
174 get_group_index_sorter,
175 nargsort,
176)
177from pandas.core.strings import StringMethods
179from pandas.io.formats.printing import (
180 PrettyDict,
181 default_pprint,
182 format_object_summary,
183 pprint_thing,
184)
186if TYPE_CHECKING: 186 ↛ 187line 186 didn't jump to line 187, because the condition on line 186 was never true
187 from pandas import (
188 CategoricalIndex,
189 DataFrame,
190 MultiIndex,
191 Series,
192 )
193 from pandas.core.arrays import PeriodArray
196__all__ = ["Index"]
198_unsortable_types = frozenset(("mixed", "mixed-integer"))
200_index_doc_kwargs: dict[str, str] = {
201 "klass": "Index",
202 "inplace": "",
203 "target_klass": "Index",
204 "raises_section": "",
205 "unique": "Index",
206 "duplicated": "np.ndarray",
207}
208_index_shared_docs: dict[str, str] = {}
209str_t = str
212_dtype_obj = np.dtype("object")
215def _maybe_return_indexers(meth: F) -> F:
216 """
217 Decorator to simplify 'return_indexers' checks in Index.join.
218 """
220 @functools.wraps(meth)
221 def join(
222 self,
223 other,
224 how: str_t = "left",
225 level=None,
226 return_indexers: bool = False,
227 sort: bool = False,
228 ):
229 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)
230 if not return_indexers:
231 return join_index
233 if lidx is not None:
234 lidx = ensure_platform_int(lidx)
235 if ridx is not None:
236 ridx = ensure_platform_int(ridx)
237 return join_index, lidx, ridx
239 return cast(F, join)
242def disallow_kwargs(kwargs: dict[str, Any]) -> None:
243 if kwargs:
244 raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
247def _new_Index(cls, d):
248 """
249 This is called upon unpickling, rather than the default which doesn't
250 have arguments and breaks __new__.
251 """
252 # required for backward compat, because PI can't be instantiated with
253 # ordinals through __new__ GH #13277
254 if issubclass(cls, ABCPeriodIndex):
255 from pandas.core.indexes.period import _new_PeriodIndex
257 return _new_PeriodIndex(cls, **d)
259 if issubclass(cls, ABCMultiIndex):
260 if "labels" in d and "codes" not in d:
261 # GH#23752 "labels" kwarg has been replaced with "codes"
262 d["codes"] = d.pop("labels")
264 # Since this was a valid MultiIndex at pickle-time, we don't need to
265 # check validty at un-pickle time.
266 d["verify_integrity"] = False
268 elif "dtype" not in d and "data" in d:
269 # Prevent Index.__new__ from conducting inference;
270 # "data" key not in RangeIndex
271 d["dtype"] = d["data"].dtype
272 return cls.__new__(cls, **d)
275_IndexT = TypeVar("_IndexT", bound="Index")
278class Index(IndexOpsMixin, PandasObject):
279 """
280 Immutable sequence used for indexing and alignment.
282 The basic object storing axis labels for all pandas objects.
284 Parameters
285 ----------
286 data : array-like (1-dimensional)
287 dtype : NumPy dtype (default: object)
288 If dtype is None, we find the dtype that best fits the data.
289 If an actual dtype is provided, we coerce to that dtype if it's safe.
290 Otherwise, an error will be raised.
291 copy : bool
292 Make a copy of input ndarray.
293 name : object
294 Name to be stored in the index.
295 tupleize_cols : bool (default: True)
296 When True, attempt to create a MultiIndex if possible.
298 See Also
299 --------
300 RangeIndex : Index implementing a monotonic integer range.
301 CategoricalIndex : Index of :class:`Categorical` s.
302 MultiIndex : A multi-level, or hierarchical Index.
303 IntervalIndex : An Index of :class:`Interval` s.
304 DatetimeIndex : Index of datetime64 data.
305 TimedeltaIndex : Index of timedelta64 data.
306 PeriodIndex : Index of Period data.
307 NumericIndex : Index of numpy int/uint/float data.
308 Int64Index : Index of purely int64 labels (deprecated).
309 UInt64Index : Index of purely uint64 labels (deprecated).
310 Float64Index : Index of purely float64 labels (deprecated).
312 Notes
313 -----
314 An Index instance can **only** contain hashable objects
316 Examples
317 --------
318 >>> pd.Index([1, 2, 3])
319 Int64Index([1, 2, 3], dtype='int64')
321 >>> pd.Index(list('abc'))
322 Index(['a', 'b', 'c'], dtype='object')
323 """
325 # tolist is not actually deprecated, just suppressed in the __dir__
326 _hidden_attrs: frozenset[str] = (
327 PandasObject._hidden_attrs
328 | IndexOpsMixin._hidden_attrs
329 | frozenset(["contains", "set_value"])
330 )
332 # To hand over control to subclasses
333 _join_precedence = 1
335 # Cython methods; see github.com/cython/cython/issues/2647
336 # for why we need to wrap these instead of making them class attributes
337 # Moreover, cython will choose the appropriate-dtyped sub-function
338 # given the dtypes of the passed arguments
340 @final
341 def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]:
342 # Caller is responsible for ensuring other.dtype == self.dtype
343 sv = self._get_engine_target()
344 ov = other._get_engine_target()
345 # can_use_libjoin assures sv and ov are ndarrays
346 sv = cast(np.ndarray, sv)
347 ov = cast(np.ndarray, ov)
348 return libjoin.left_join_indexer_unique(sv, ov)
350 @final
351 def _left_indexer(
352 self: _IndexT, other: _IndexT
353 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
354 # Caller is responsible for ensuring other.dtype == self.dtype
355 sv = self._get_engine_target()
356 ov = other._get_engine_target()
357 # can_use_libjoin assures sv and ov are ndarrays
358 sv = cast(np.ndarray, sv)
359 ov = cast(np.ndarray, ov)
360 joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov)
361 joined = self._from_join_target(joined_ndarray)
362 return joined, lidx, ridx
364 @final
365 def _inner_indexer(
366 self: _IndexT, other: _IndexT
367 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
368 # Caller is responsible for ensuring other.dtype == self.dtype
369 sv = self._get_engine_target()
370 ov = other._get_engine_target()
371 # can_use_libjoin assures sv and ov are ndarrays
372 sv = cast(np.ndarray, sv)
373 ov = cast(np.ndarray, ov)
374 joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov)
375 joined = self._from_join_target(joined_ndarray)
376 return joined, lidx, ridx
378 @final
379 def _outer_indexer(
380 self: _IndexT, other: _IndexT
381 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
382 # Caller is responsible for ensuring other.dtype == self.dtype
383 sv = self._get_engine_target()
384 ov = other._get_engine_target()
385 # can_use_libjoin assures sv and ov are ndarrays
386 sv = cast(np.ndarray, sv)
387 ov = cast(np.ndarray, ov)
388 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov)
389 joined = self._from_join_target(joined_ndarray)
390 return joined, lidx, ridx
392 _typ: str = "index"
393 _data: ExtensionArray | np.ndarray
394 _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = (
395 np.ndarray,
396 ExtensionArray,
397 )
398 _id: object | None = None
399 _name: Hashable = None
400 # MultiIndex.levels previously allowed setting the index name. We
401 # don't allow this anymore, and raise if it happens rather than
402 # failing silently.
403 _no_setting_name: bool = False
404 _comparables: list[str] = ["name"]
405 _attributes: list[str] = ["name"]
406 _is_numeric_dtype: bool = False
407 _can_hold_strings: bool = True
409 # Whether this index is a NumericIndex, but not a Int64Index, Float64Index,
410 # UInt64Index or RangeIndex. Needed for backwards compat. Remove this attribute and
411 # associated code in pandas 2.0.
412 _is_backward_compat_public_numeric_index: bool = False
414 @property
415 def _engine_type(
416 self,
417 ) -> type[libindex.IndexEngine] | type[libindex.ExtensionEngine]:
418 return libindex.ObjectEngine
420 # whether we support partial string indexing. Overridden
421 # in DatetimeIndex and PeriodIndex
422 _supports_partial_string_indexing = False
424 _accessors = {"str"}
426 str = CachedAccessor("str", StringMethods)
428 # --------------------------------------------------------------------
429 # Constructors
431 def __new__(
432 cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs
433 ) -> Index:
435 if kwargs:
436 warnings.warn(
437 "Passing keywords other than 'data', 'dtype', 'copy', 'name', "
438 "'tupleize_cols' is deprecated and will raise TypeError in a "
439 "future version. Use the specific Index subclass directly instead.",
440 FutureWarning,
441 stacklevel=find_stack_level(),
442 )
444 from pandas.core.arrays import PandasArray
445 from pandas.core.indexes.range import RangeIndex
447 name = maybe_extract_name(name, data, cls)
449 if dtype is not None:
450 dtype = pandas_dtype(dtype)
451 if "tz" in kwargs:
452 tz = kwargs.pop("tz")
453 validate_tz_from_dtype(dtype, tz)
454 dtype = tz_to_dtype(tz)
456 if type(data) is PandasArray:
457 # ensure users don't accidentally put a PandasArray in an index,
458 # but don't unpack StringArray
459 data = data.to_numpy()
460 if isinstance(dtype, PandasDtype):
461 dtype = dtype.numpy_dtype
463 data_dtype = getattr(data, "dtype", None)
465 # range
466 if isinstance(data, (range, RangeIndex)):
467 result = RangeIndex(start=data, copy=copy, name=name)
468 if dtype is not None:
469 return result.astype(dtype, copy=False)
470 return result
472 elif is_ea_or_datetimelike_dtype(dtype):
473 # non-EA dtype indexes have special casting logic, so we punt here
474 klass = cls._dtype_to_subclass(dtype)
475 if klass is not Index:
476 return klass(data, dtype=dtype, copy=copy, name=name, **kwargs)
478 ea_cls = dtype.construct_array_type()
479 data = ea_cls._from_sequence(data, dtype=dtype, copy=copy)
480 disallow_kwargs(kwargs)
481 return Index._simple_new(data, name=name)
483 elif is_ea_or_datetimelike_dtype(data_dtype):
484 data_dtype = cast(DtypeObj, data_dtype)
485 klass = cls._dtype_to_subclass(data_dtype)
486 if klass is not Index:
487 result = klass(data, copy=copy, name=name, **kwargs)
488 if dtype is not None:
489 return result.astype(dtype, copy=False)
490 return result
491 elif dtype is not None:
492 # GH#45206
493 data = data.astype(dtype, copy=False)
495 disallow_kwargs(kwargs)
496 data = extract_array(data, extract_numpy=True)
497 return Index._simple_new(data, name=name)
499 # index-like
500 elif (
501 isinstance(data, Index)
502 and data._is_backward_compat_public_numeric_index
503 and dtype is None
504 ):
505 return data._constructor(data, name=name, copy=copy)
506 elif isinstance(data, (np.ndarray, Index, ABCSeries)):
508 if isinstance(data, ABCMultiIndex):
509 data = data._values
511 if dtype is not None:
512 # we need to avoid having numpy coerce
513 # things that look like ints/floats to ints unless
514 # they are actually ints, e.g. '0' and 0.0
515 # should not be coerced
516 # GH 11836
517 data = sanitize_array(data, None, dtype=dtype, copy=copy)
519 dtype = data.dtype
521 if data.dtype.kind in ["i", "u", "f"]:
522 # maybe coerce to a sub-class
523 arr = data
524 elif data.dtype.kind in ["b", "c"]:
525 # No special subclass, and Index._ensure_array won't do this
526 # for us.
527 arr = np.asarray(data)
528 else:
529 arr = com.asarray_tuplesafe(data, dtype=_dtype_obj)
531 if dtype is None:
532 arr = _maybe_cast_data_without_dtype(
533 arr, cast_numeric_deprecated=True
534 )
535 dtype = arr.dtype
537 if kwargs:
538 return cls(arr, dtype, copy=copy, name=name, **kwargs)
540 klass = cls._dtype_to_subclass(arr.dtype)
541 arr = klass._ensure_array(arr, dtype, copy)
542 disallow_kwargs(kwargs)
543 return klass._simple_new(arr, name)
545 elif is_scalar(data):
546 raise cls._scalar_data_error(data)
547 elif hasattr(data, "__array__"):
548 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)
549 else:
551 if tupleize_cols and is_list_like(data):
552 # GH21470: convert iterable to list before determining if empty
553 if is_iterator(data):
554 data = list(data)
556 if data and all(isinstance(e, tuple) for e in data):
557 # we must be all tuples, otherwise don't construct
558 # 10697
559 from pandas.core.indexes.multi import MultiIndex
561 return MultiIndex.from_tuples(
562 data, names=name or kwargs.get("names")
563 )
564 # other iterable of some kind
566 subarr = com.asarray_tuplesafe(data, dtype=_dtype_obj)
567 if dtype is None:
568 # with e.g. a list [1, 2, 3] casting to numeric is _not_ deprecated
569 subarr = _maybe_cast_data_without_dtype(
570 subarr, cast_numeric_deprecated=False
571 )
572 dtype = subarr.dtype
573 return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
575 @classmethod
576 def _ensure_array(cls, data, dtype, copy: bool):
577 """
578 Ensure we have a valid array to pass to _simple_new.
579 """
580 if data.ndim > 1:
581 # GH#13601, GH#20285, GH#27125
582 raise ValueError("Index data must be 1-dimensional")
583 if copy:
584 # asarray_tuplesafe does not always copy underlying data,
585 # so need to make sure that this happens
586 data = data.copy()
587 return data
589 @final
590 @classmethod
591 def _dtype_to_subclass(cls, dtype: DtypeObj):
592 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
594 if isinstance(dtype, ExtensionDtype):
595 if isinstance(dtype, DatetimeTZDtype):
596 from pandas import DatetimeIndex
598 return DatetimeIndex
599 elif isinstance(dtype, CategoricalDtype):
600 from pandas import CategoricalIndex
602 return CategoricalIndex
603 elif isinstance(dtype, IntervalDtype):
604 from pandas import IntervalIndex
606 return IntervalIndex
607 elif isinstance(dtype, PeriodDtype):
608 from pandas import PeriodIndex
610 return PeriodIndex
612 elif isinstance(dtype, SparseDtype):
613 warnings.warn(
614 "In a future version, passing a SparseArray to pd.Index "
615 "will store that array directly instead of converting to a "
616 "dense numpy ndarray. To retain the old behavior, use "
617 "pd.Index(arr.to_numpy()) instead",
618 FutureWarning,
619 stacklevel=find_stack_level(),
620 )
621 return cls._dtype_to_subclass(dtype.subtype)
623 return Index
625 if dtype.kind == "M":
626 from pandas import DatetimeIndex
628 return DatetimeIndex
630 elif dtype.kind == "m":
631 from pandas import TimedeltaIndex
633 return TimedeltaIndex
635 elif is_float_dtype(dtype):
636 from pandas.core.api import Float64Index
638 return Float64Index
639 elif is_unsigned_integer_dtype(dtype):
640 from pandas.core.api import UInt64Index
642 return UInt64Index
643 elif is_signed_integer_dtype(dtype):
644 from pandas.core.api import Int64Index
646 return Int64Index
648 elif dtype == _dtype_obj:
649 # NB: assuming away MultiIndex
650 return Index
652 elif issubclass(
653 dtype.type, (str, bool, np.bool_, complex, np.complex64, np.complex128)
654 ):
655 return Index
657 raise NotImplementedError(dtype)
659 """
660 NOTE for new Index creation:
662 - _simple_new: It returns new Index with the same type as the caller.
663 All metadata (such as name) must be provided by caller's responsibility.
664 Using _shallow_copy is recommended because it fills these metadata
665 otherwise specified.
667 - _shallow_copy: It returns new Index with the same type (using
668 _simple_new), but fills caller's metadata otherwise specified. Passed
669 kwargs will overwrite corresponding metadata.
671 See each method's docstring.
672 """
674 @property
675 def asi8(self):
676 """
677 Integer representation of the values.
679 Returns
680 -------
681 ndarray
682 An ndarray with int64 dtype.
683 """
684 warnings.warn(
685 "Index.asi8 is deprecated and will be removed in a future version.",
686 FutureWarning,
687 stacklevel=find_stack_level(),
688 )
689 return None
691 @classmethod
692 def _simple_new(cls: type[_IndexT], values, name: Hashable = None) -> _IndexT:
693 """
694 We require that we have a dtype compat for the values. If we are passed
695 a non-dtype compat, then coerce using the constructor.
697 Must be careful not to recurse.
698 """
699 assert isinstance(values, cls._data_cls), type(values)
701 result = object.__new__(cls)
702 result._data = values
703 result._name = name
704 result._cache = {}
705 result._reset_identity()
707 return result
709 @classmethod
710 def _with_infer(cls, *args, **kwargs):
711 """
712 Constructor that uses the 1.0.x behavior inferring numeric dtypes
713 for ndarray[object] inputs.
714 """
715 with warnings.catch_warnings():
716 warnings.filterwarnings("ignore", ".*the Index constructor", FutureWarning)
717 result = cls(*args, **kwargs)
719 if result.dtype == _dtype_obj and not result._is_multi:
720 # error: Argument 1 to "maybe_convert_objects" has incompatible type
721 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected
722 # "ndarray[Any, Any]"
723 values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]
724 if values.dtype.kind in ["i", "u", "f", "b"]:
725 return Index(values, name=result.name)
727 return result
729 @cache_readonly
730 def _constructor(self: _IndexT) -> type[_IndexT]:
731 return type(self)
733 @final
734 def _maybe_check_unique(self) -> None:
735 """
736 Check that an Index has no duplicates.
738 This is typically only called via
739 `NDFrame.flags.allows_duplicate_labels.setter` when it's set to
740 True (duplicates aren't allowed).
742 Raises
743 ------
744 DuplicateLabelError
745 When the index is not unique.
746 """
747 if not self.is_unique:
748 msg = """Index has duplicates."""
749 duplicates = self._format_duplicate_message()
750 msg += f"\n{duplicates}"
752 raise DuplicateLabelError(msg)
754 @final
755 def _format_duplicate_message(self) -> DataFrame:
756 """
757 Construct the DataFrame for a DuplicateLabelError.
759 This returns a DataFrame indicating the labels and positions
760 of duplicates in an index. This should only be called when it's
761 already known that duplicates are present.
763 Examples
764 --------
765 >>> idx = pd.Index(['a', 'b', 'a'])
766 >>> idx._format_duplicate_message()
767 positions
768 label
769 a [0, 2]
770 """
771 from pandas import Series
773 duplicates = self[self.duplicated(keep="first")].unique()
774 assert len(duplicates)
776 out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
777 if self._is_multi:
778 # test_format_duplicate_labels_message_multi
779 # error: "Type[Index]" has no attribute "from_tuples" [attr-defined]
780 out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]
782 if self.nlevels == 1:
783 out = out.rename_axis("label")
784 return out.to_frame(name="positions")
786 # --------------------------------------------------------------------
787 # Index Internals Methods
789 @final
790 def _get_attributes_dict(self) -> dict[str_t, Any]:
791 """
792 Return an attributes dict for my class.
794 Temporarily added back for compatibility issue in dask, see
795 https://github.com/pandas-dev/pandas/pull/43895
796 """
797 warnings.warn(
798 "The Index._get_attributes_dict method is deprecated, and will be "
799 "removed in a future version",
800 DeprecationWarning,
801 stacklevel=find_stack_level(),
802 )
803 return {k: getattr(self, k, None) for k in self._attributes}
805 def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT:
806 """
807 Create a new Index with the same class as the caller, don't copy the
808 data, use the same object attributes with passed in attributes taking
809 precedence.
811 *this is an internal non-public method*
813 Parameters
814 ----------
815 values : the values to create the new Index, optional
816 name : Label, defaults to self.name
817 """
818 name = self._name if name is no_default else name
820 return self._simple_new(values, name=name)
822 def _view(self: _IndexT) -> _IndexT:
823 """
824 fastpath to make a shallow copy, i.e. new object with same data.
825 """
826 result = self._simple_new(self._values, name=self._name)
828 result._cache = self._cache
829 return result
831 @final
832 def _rename(self: _IndexT, name: Hashable) -> _IndexT:
833 """
834 fastpath for rename if new name is already validated.
835 """
836 result = self._view()
837 result._name = name
838 return result
840 @final
841 def is_(self, other) -> bool:
842 """
843 More flexible, faster check like ``is`` but that works through views.
845 Note: this is *not* the same as ``Index.identical()``, which checks
846 that metadata is also the same.
848 Parameters
849 ----------
850 other : object
851 Other object to compare against.
853 Returns
854 -------
855 bool
856 True if both have same underlying data, False otherwise.
858 See Also
859 --------
860 Index.identical : Works like ``Index.is_`` but also checks metadata.
861 """
862 if self is other:
863 return True
864 elif not hasattr(other, "_id"):
865 return False
866 elif self._id is None or other._id is None:
867 return False
868 else:
869 return self._id is other._id
871 @final
872 def _reset_identity(self) -> None:
873 """
874 Initializes or resets ``_id`` attribute with new object.
875 """
876 self._id = object()
878 @final
879 def _cleanup(self) -> None:
880 self._engine.clear_mapping()
882 @cache_readonly
883 def _engine(
884 self,
885 ) -> libindex.IndexEngine | libindex.ExtensionEngine:
886 # For base class (object dtype) we get ObjectEngine
887 target_values = self._get_engine_target()
888 if (
889 isinstance(target_values, ExtensionArray)
890 and self._engine_type is libindex.ObjectEngine
891 ):
892 return libindex.ExtensionEngine(target_values)
894 target_values = cast(np.ndarray, target_values)
895 # to avoid a reference cycle, bind `target_values` to a local variable, so
896 # `self` is not passed into the lambda.
897 if target_values.dtype == bool:
898 return libindex.BoolEngine(target_values)
899 elif target_values.dtype == np.complex64:
900 return libindex.Complex64Engine(target_values)
901 elif target_values.dtype == np.complex128:
902 return libindex.Complex128Engine(target_values)
904 # error: Argument 1 to "ExtensionEngine" has incompatible type
905 # "ndarray[Any, Any]"; expected "ExtensionArray"
906 return self._engine_type(target_values) # type: ignore[arg-type]
908 @final
909 @cache_readonly
910 def _dir_additions_for_owner(self) -> set[str_t]:
911 """
912 Add the string-like labels to the owner dataframe/series dir output.
914 If this is a MultiIndex, it's first level values are used.
915 """
916 return {
917 c
918 for c in self.unique(level=0)[: get_option("display.max_dir_items")]
919 if isinstance(c, str) and c.isidentifier()
920 }
922 # --------------------------------------------------------------------
923 # Array-Like Methods
925 # ndarray compat
926 def __len__(self) -> int:
927 """
928 Return the length of the Index.
929 """
930 return len(self._data)
932 def __array__(self, dtype=None) -> np.ndarray:
933 """
934 The array interface, return my values.
935 """
936 return np.asarray(self._data, dtype=dtype)
938 def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
939 if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
940 return NotImplemented
942 # TODO(2.0) the 'and', 'or' and 'xor' dunder methods are currently set
943 # operations and not logical operations, so don't dispatch
944 # This is deprecated, so this full 'if' clause can be removed once
945 # deprecation is enforced in 2.0
946 if not (
947 method == "__call__"
948 and ufunc in (np.bitwise_and, np.bitwise_or, np.bitwise_xor)
949 ):
950 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
951 self, ufunc, method, *inputs, **kwargs
952 )
953 if result is not NotImplemented:
954 return result
956 if "out" in kwargs:
957 # e.g. test_dti_isub_tdi
958 return arraylike.dispatch_ufunc_with_out(
959 self, ufunc, method, *inputs, **kwargs
960 )
962 if method == "reduce":
963 result = arraylike.dispatch_reduction_ufunc(
964 self, ufunc, method, *inputs, **kwargs
965 )
966 if result is not NotImplemented:
967 return result
969 new_inputs = [x if x is not self else x._values for x in inputs]
970 result = getattr(ufunc, method)(*new_inputs, **kwargs)
971 if ufunc.nout == 2:
972 # i.e. np.divmod, np.modf, np.frexp
973 return tuple(self.__array_wrap__(x) for x in result)
975 return self.__array_wrap__(result)
977 def __array_wrap__(self, result, context=None):
978 """
979 Gets called after a ufunc and other functions e.g. np.split.
980 """
981 result = lib.item_from_zerodim(result)
982 if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
983 return result
985 return Index(result, name=self.name)
987 @cache_readonly
988 def dtype(self) -> DtypeObj:
989 """
990 Return the dtype object of the underlying data.
991 """
992 return self._data.dtype
994 @final
995 def ravel(self, order="C"):
996 """
997 Return an ndarray of the flattened values of the underlying data.
999 Returns
1000 -------
1001 numpy.ndarray
1002 Flattened array.
1004 See Also
1005 --------
1006 numpy.ndarray.ravel : Return a flattened array.
1007 """
1008 warnings.warn(
1009 "Index.ravel returning ndarray is deprecated; in a future version "
1010 "this will return a view on self.",
1011 FutureWarning,
1012 stacklevel=find_stack_level(),
1013 )
1014 if needs_i8_conversion(self.dtype):
1015 # Item "ndarray[Any, Any]" of "Union[ExtensionArray, ndarray[Any, Any]]"
1016 # has no attribute "_ndarray"
1017 values = self._data._ndarray # type: ignore[union-attr]
1018 elif is_interval_dtype(self.dtype):
1019 values = np.asarray(self._data)
1020 else:
1021 values = self._get_engine_target()
1022 return values.ravel(order=order)
1024 def view(self, cls=None):
1026 # we need to see if we are subclassing an
1027 # index type here
1028 if cls is not None and not hasattr(cls, "_typ"):
1029 dtype = cls
1030 if isinstance(cls, str):
1031 dtype = pandas_dtype(cls)
1033 if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion(
1034 dtype
1035 ):
1036 if dtype.kind == "m" and dtype != "m8[ns]":
1037 # e.g. m8[s]
1038 return self._data.view(cls)
1040 idx_cls = self._dtype_to_subclass(dtype)
1041 # NB: we only get here for subclasses that override
1042 # _data_cls such that it is a type and not a tuple
1043 # of types.
1044 arr_cls = idx_cls._data_cls
1045 arr = arr_cls(self._data.view("i8"), dtype=dtype)
1046 return idx_cls._simple_new(arr, name=self.name)
1048 result = self._data.view(cls)
1049 else:
1050 result = self._view()
1051 if isinstance(result, Index):
1052 result._id = self._id
1053 return result
1055 def astype(self, dtype, copy: bool = True):
1056 """
1057 Create an Index with values cast to dtypes.
1059 The class of a new Index is determined by dtype. When conversion is
1060 impossible, a TypeError exception is raised.
1062 Parameters
1063 ----------
1064 dtype : numpy dtype or pandas type
1065 Note that any signed integer `dtype` is treated as ``'int64'``,
1066 and any unsigned integer `dtype` is treated as ``'uint64'``,
1067 regardless of the size.
1068 copy : bool, default True
1069 By default, astype always returns a newly allocated object.
1070 If copy is set to False and internal requirements on dtype are
1071 satisfied, the original data is used to create a new Index
1072 or the original Index is returned.
1074 Returns
1075 -------
1076 Index
1077 Index with values cast to specified dtype.
1078 """
1079 if dtype is not None:
1080 dtype = pandas_dtype(dtype)
1082 if is_dtype_equal(self.dtype, dtype):
1083 # Ensure that self.astype(self.dtype) is self
1084 return self.copy() if copy else self
1086 values = self._data
1087 if isinstance(values, ExtensionArray):
1088 if isinstance(dtype, np.dtype) and dtype.kind == "M" and is_unitless(dtype):
1089 # TODO(2.0): remove this special-casing once this is enforced
1090 # in DTA.astype
1091 raise TypeError(f"Cannot cast {type(self).__name__} to dtype")
1093 with rewrite_exception(type(values).__name__, type(self).__name__):
1094 new_values = values.astype(dtype, copy=copy)
1096 elif is_float_dtype(self.dtype) and needs_i8_conversion(dtype):
1097 # NB: this must come before the ExtensionDtype check below
1098 # TODO: this differs from Series behavior; can/should we align them?
1099 raise TypeError(
1100 f"Cannot convert Float64Index to dtype {dtype}; integer "
1101 "values are required for conversion"
1102 )
1104 elif isinstance(dtype, ExtensionDtype):
1105 cls = dtype.construct_array_type()
1106 # Note: for RangeIndex and CategoricalDtype self vs self._values
1107 # behaves differently here.
1108 new_values = cls._from_sequence(self, dtype=dtype, copy=copy)
1110 else:
1111 try:
1112 if dtype == str:
1113 # GH#38607
1114 new_values = values.astype(dtype, copy=copy)
1115 else:
1116 # GH#13149 specifically use astype_nansafe instead of astype
1117 new_values = astype_nansafe(values, dtype=dtype, copy=copy)
1118 except IntCastingNaNError:
1119 raise
1120 except (TypeError, ValueError) as err:
1121 if dtype.kind == "u" and "losslessly" in str(err):
1122 # keep the message from _astype_float_to_int_nansafe
1123 raise
1124 raise TypeError(
1125 f"Cannot cast {type(self).__name__} to dtype {dtype}"
1126 ) from err
1128 # pass copy=False because any copying will be done in the astype above
1129 if self._is_backward_compat_public_numeric_index:
1130 # this block is needed so e.g. NumericIndex[int8].astype("int32") returns
1131 # NumericIndex[int32] and not Int64Index with dtype int64.
1132 # When Int64Index etc. are removed from the code base, removed this also.
1133 if isinstance(dtype, np.dtype) and is_numeric_dtype(dtype):
1134 return self._constructor(
1135 new_values, name=self.name, dtype=dtype, copy=False
1136 )
1137 return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)
1139 _index_shared_docs[
1140 "take"
1141 ] = """
1142 Return a new %(klass)s of the values selected by the indices.
1144 For internal compatibility with numpy arrays.
1146 Parameters
1147 ----------
1148 indices : array-like
1149 Indices to be taken.
1150 axis : int, optional
1151 The axis over which to select values, always 0.
1152 allow_fill : bool, default True
1153 fill_value : scalar, default None
1154 If allow_fill=True and fill_value is not None, indices specified by
1155 -1 are regarded as NA. If Index doesn't hold NA, raise ValueError.
1157 Returns
1158 -------
1159 Index
1160 An index formed of elements at the given indices. Will be the same
1161 type as self, except for RangeIndex.
1163 See Also
1164 --------
1165 numpy.ndarray.take: Return an array formed from the
1166 elements of a at the given indices.
1167 """
1169 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
1170 def take(
1171 self, indices, axis: int = 0, allow_fill: bool = True, fill_value=None, **kwargs
1172 ):
1173 if kwargs:
1174 nv.validate_take((), kwargs)
1175 if is_scalar(indices):
1176 raise TypeError("Expected indices to be array-like")
1177 indices = ensure_platform_int(indices)
1178 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)
1180 # Note: we discard fill_value and use self._na_value, only relevant
1181 # in the case where allow_fill is True and fill_value is not None
1182 values = self._values
1183 if isinstance(values, np.ndarray):
1184 taken = algos.take(
1185 values, indices, allow_fill=allow_fill, fill_value=self._na_value
1186 )
1187 else:
1188 # algos.take passes 'axis' keyword which not all EAs accept
1189 taken = values.take(
1190 indices, allow_fill=allow_fill, fill_value=self._na_value
1191 )
1192 # _constructor so RangeIndex->Int64Index
1193 return self._constructor._simple_new(taken, name=self.name)
1195 @final
1196 def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:
1197 """
1198 We only use pandas-style take when allow_fill is True _and_
1199 fill_value is not None.
1200 """
1201 if allow_fill and fill_value is not None:
1202 # only fill if we are passing a non-None fill_value
1203 if self._can_hold_na:
1204 if (indices < -1).any():
1205 raise ValueError(
1206 "When allow_fill=True and fill_value is not None, "
1207 "all indices must be >= -1"
1208 )
1209 else:
1210 cls_name = type(self).__name__
1211 raise ValueError(
1212 f"Unable to fill values because {cls_name} cannot contain NA"
1213 )
1214 else:
1215 allow_fill = False
1216 return allow_fill
1218 _index_shared_docs[
1219 "repeat"
1220 ] = """
1221 Repeat elements of a %(klass)s.
1223 Returns a new %(klass)s where each element of the current %(klass)s
1224 is repeated consecutively a given number of times.
1226 Parameters
1227 ----------
1228 repeats : int or array of ints
1229 The number of repetitions for each element. This should be a
1230 non-negative integer. Repeating 0 times will return an empty
1231 %(klass)s.
1232 axis : None
1233 Must be ``None``. Has no effect but is accepted for compatibility
1234 with numpy.
1236 Returns
1237 -------
1238 repeated_index : %(klass)s
1239 Newly created %(klass)s with repeated elements.
1241 See Also
1242 --------
1243 Series.repeat : Equivalent function for Series.
1244 numpy.repeat : Similar method for :class:`numpy.ndarray`.
1246 Examples
1247 --------
1248 >>> idx = pd.Index(['a', 'b', 'c'])
1249 >>> idx
1250 Index(['a', 'b', 'c'], dtype='object')
1251 >>> idx.repeat(2)
1252 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
1253 >>> idx.repeat([1, 2, 3])
1254 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
1255 """
1257 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
1258 def repeat(self, repeats, axis=None):
1259 repeats = ensure_platform_int(repeats)
1260 nv.validate_repeat((), {"axis": axis})
1261 res_values = self._values.repeat(repeats)
1263 # _constructor so RangeIndex->Int64Index
1264 return self._constructor._simple_new(res_values, name=self.name)
1266 # --------------------------------------------------------------------
1267 # Copying Methods
1269 def copy(
1270 self: _IndexT,
1271 name: Hashable | None = None,
1272 deep: bool = False,
1273 dtype: Dtype | None = None,
1274 names: Sequence[Hashable] | None = None,
1275 ) -> _IndexT:
1276 """
1277 Make a copy of this object.
1279 Name and dtype sets those attributes on the new object.
1281 Parameters
1282 ----------
1283 name : Label, optional
1284 Set name for new object.
1285 deep : bool, default False
1286 dtype : numpy dtype or pandas type, optional
1287 Set dtype for new object.
1289 .. deprecated:: 1.2.0
1290 use ``astype`` method instead.
1291 names : list-like, optional
1292 Kept for compatibility with MultiIndex. Should not be used.
1294 .. deprecated:: 1.4.0
1295 use ``name`` instead.
1297 Returns
1298 -------
1299 Index
1300 Index refer to new object which is a copy of this object.
1302 Notes
1303 -----
1304 In most cases, there should be no functional difference from using
1305 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
1306 """
1307 if names is not None:
1308 warnings.warn(
1309 "parameter names is deprecated and will be removed in a future "
1310 "version. Use the name parameter instead.",
1311 FutureWarning,
1312 stacklevel=find_stack_level(),
1313 )
1315 name = self._validate_names(name=name, names=names, deep=deep)[0]
1316 if deep:
1317 new_data = self._data.copy()
1318 new_index = type(self)._simple_new(new_data, name=name)
1319 else:
1320 new_index = self._rename(name=name)
1322 if dtype:
1323 warnings.warn(
1324 "parameter dtype is deprecated and will be removed in a future "
1325 "version. Use the astype method instead.",
1326 FutureWarning,
1327 stacklevel=find_stack_level(),
1328 )
1329 new_index = new_index.astype(dtype)
1330 return new_index
1332 @final
1333 def __copy__(self: _IndexT, **kwargs) -> _IndexT:
1334 return self.copy(**kwargs)
1336 @final
1337 def __deepcopy__(self: _IndexT, memo=None) -> _IndexT:
1338 """
1339 Parameters
1340 ----------
1341 memo, default None
1342 Standard signature. Unused
1343 """
1344 return self.copy(deep=True)
1346 # --------------------------------------------------------------------
1347 # Rendering Methods
1349 @final
1350 def __repr__(self) -> str_t:
1351 """
1352 Return a string representation for this object.
1353 """
1354 klass_name = type(self).__name__
1355 data = self._format_data()
1356 attrs = self._format_attrs()
1357 space = self._format_space()
1358 attrs_str = [f"{k}={v}" for k, v in attrs]
1359 prepr = f",{space}".join(attrs_str)
1361 # no data provided, just attributes
1362 if data is None:
1363 data = ""
1365 return f"{klass_name}({data}{prepr})"
1367 def _format_space(self) -> str_t:
1369 # using space here controls if the attributes
1370 # are line separated or not (the default)
1372 # max_seq_items = get_option('display.max_seq_items')
1373 # if len(self) > max_seq_items:
1374 # space = "\n%s" % (' ' * (len(klass) + 1))
1375 return " "
1377 @property
1378 def _formatter_func(self):
1379 """
1380 Return the formatter function.
1381 """
1382 return default_pprint
1384 def _format_data(self, name=None) -> str_t:
1385 """
1386 Return the formatted data as a unicode string.
1387 """
1388 # do we want to justify (only do so for non-objects)
1389 is_justify = True
1391 if self.inferred_type == "string":
1392 is_justify = False
1393 elif self.inferred_type == "categorical":
1394 self = cast("CategoricalIndex", self)
1395 if is_object_dtype(self.categories):
1396 is_justify = False
1398 return format_object_summary(
1399 self,
1400 self._formatter_func,
1401 is_justify=is_justify,
1402 name=name,
1403 line_break_each_value=self._is_multi,
1404 )
1406 def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:
1407 """
1408 Return a list of tuples of the (attr,formatted_value).
1409 """
1410 attrs: list[tuple[str_t, str_t | int | bool | None]] = []
1412 if not self._is_multi:
1413 attrs.append(("dtype", f"'{self.dtype}'"))
1415 if self.name is not None:
1416 attrs.append(("name", default_pprint(self.name)))
1417 elif self._is_multi and any(x is not None for x in self.names):
1418 attrs.append(("names", default_pprint(self.names)))
1420 max_seq_items = get_option("display.max_seq_items") or len(self)
1421 if len(self) > max_seq_items:
1422 attrs.append(("length", len(self)))
1423 return attrs
1425 @final
1426 def _get_level_names(self) -> Hashable | Sequence[Hashable]:
1427 """
1428 Return a name or list of names with None replaced by the level number.
1429 """
1430 if self._is_multi:
1431 return [
1432 level if name is None else name for level, name in enumerate(self.names)
1433 ]
1434 else:
1435 return 0 if self.name is None else self.name
1437 @final
1438 def _mpl_repr(self) -> np.ndarray:
1439 # how to represent ourselves to matplotlib
1440 if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M":
1441 return cast(np.ndarray, self.values)
1442 return self.astype(object, copy=False)._values
1444 def format(
1445 self,
1446 name: bool = False,
1447 formatter: Callable | None = None,
1448 na_rep: str_t = "NaN",
1449 ) -> list[str_t]:
1450 """
1451 Render a string representation of the Index.
1452 """
1453 header = []
1454 if name:
1455 header.append(
1456 pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
1457 if self.name is not None
1458 else ""
1459 )
1461 if formatter is not None:
1462 return header + list(self.map(formatter))
1464 return self._format_with_header(header, na_rep=na_rep)
1466 def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]:
1467 from pandas.io.formats.format import format_array
1469 values = self._values
1471 if is_object_dtype(values.dtype):
1472 values = cast(np.ndarray, values)
1473 values = lib.maybe_convert_objects(values, safe=True)
1475 result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
1477 # could have nans
1478 mask = is_float_nan(values)
1479 if mask.any():
1480 result_arr = np.array(result)
1481 result_arr[mask] = na_rep
1482 result = result_arr.tolist()
1483 else:
1484 result = trim_front(format_array(values, None, justify="left"))
1485 return header + result
1487 @final
1488 def to_native_types(self, slicer=None, **kwargs) -> np.ndarray:
1489 """
1490 Format specified values of `self` and return them.
1492 .. deprecated:: 1.2.0
1494 Parameters
1495 ----------
1496 slicer : int, array-like
1497 An indexer into `self` that specifies which values
1498 are used in the formatting process.
1499 kwargs : dict
1500 Options for specifying how the values should be formatted.
1501 These options include the following:
1503 1) na_rep : str
1504 The value that serves as a placeholder for NULL values
1505 2) quoting : bool or None
1506 Whether or not there are quoted values in `self`
1507 3) date_format : str
1508 The format used to represent date-like values.
1510 Returns
1511 -------
1512 numpy.ndarray
1513 Formatted values.
1514 """
1515 warnings.warn(
1516 "The 'to_native_types' method is deprecated and will be removed in "
1517 "a future version. Use 'astype(str)' instead.",
1518 FutureWarning,
1519 stacklevel=find_stack_level(),
1520 )
1521 values = self
1522 if slicer is not None:
1523 values = values[slicer]
1524 return values._format_native_types(**kwargs)
1526 def _format_native_types(
1527 self, *, na_rep="", quoting=None, **kwargs
1528 ) -> npt.NDArray[np.object_]:
1529 """
1530 Actually format specific types of the index.
1531 """
1532 mask = isna(self)
1533 if not self.is_object() and not quoting:
1534 values = np.asarray(self).astype(str)
1535 else:
1536 values = np.array(self, dtype=object, copy=True)
1538 values[mask] = na_rep
1539 return values
1541 def _summary(self, name=None) -> str_t:
1542 """
1543 Return a summarized representation.
1545 Parameters
1546 ----------
1547 name : str
1548 name to use in the summary representation
1550 Returns
1551 -------
1552 String with a summarized representation of the index
1553 """
1554 if len(self) > 0:
1555 head = self[0]
1556 if hasattr(head, "format") and not isinstance(head, str):
1557 head = head.format()
1558 elif needs_i8_conversion(self.dtype):
1559 # e.g. Timedelta, display as values, not quoted
1560 head = self._formatter_func(head).replace("'", "")
1561 tail = self[-1]
1562 if hasattr(tail, "format") and not isinstance(tail, str):
1563 tail = tail.format()
1564 elif needs_i8_conversion(self.dtype):
1565 # e.g. Timedelta, display as values, not quoted
1566 tail = self._formatter_func(tail).replace("'", "")
1568 index_summary = f", {head} to {tail}"
1569 else:
1570 index_summary = ""
1572 if name is None:
1573 name = type(self).__name__
1574 return f"{name}: {len(self)} entries{index_summary}"
1576 # --------------------------------------------------------------------
1577 # Conversion Methods
1579 def to_flat_index(self: _IndexT) -> _IndexT:
1580 """
1581 Identity method.
1583 This is implemented for compatibility with subclass implementations
1584 when chaining.
1586 Returns
1587 -------
1588 pd.Index
1589 Caller.
1591 See Also
1592 --------
1593 MultiIndex.to_flat_index : Subclass implementation.
1594 """
1595 return self
1597 def to_series(self, index=None, name: Hashable = None) -> Series:
1598 """
1599 Create a Series with both index and values equal to the index keys.
1601 Useful with map for returning an indexer based on an index.
1603 Parameters
1604 ----------
1605 index : Index, optional
1606 Index of resulting Series. If None, defaults to original index.
1607 name : str, optional
1608 Name of resulting Series. If None, defaults to name of original
1609 index.
1611 Returns
1612 -------
1613 Series
1614 The dtype will be based on the type of the Index values.
1616 See Also
1617 --------
1618 Index.to_frame : Convert an Index to a DataFrame.
1619 Series.to_frame : Convert Series to DataFrame.
1621 Examples
1622 --------
1623 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
1625 By default, the original Index and original name is reused.
1627 >>> idx.to_series()
1628 animal
1629 Ant Ant
1630 Bear Bear
1631 Cow Cow
1632 Name: animal, dtype: object
1634 To enforce a new Index, specify new labels to ``index``:
1636 >>> idx.to_series(index=[0, 1, 2])
1637 0 Ant
1638 1 Bear
1639 2 Cow
1640 Name: animal, dtype: object
1642 To override the name of the resulting column, specify `name`:
1644 >>> idx.to_series(name='zoo')
1645 animal
1646 Ant Ant
1647 Bear Bear
1648 Cow Cow
1649 Name: zoo, dtype: object
1650 """
1651 from pandas import Series
1653 if index is None:
1654 index = self._view()
1655 if name is None:
1656 name = self.name
1658 return Series(self._values.copy(), index=index, name=name)
1660 def to_frame(
1661 self, index: bool = True, name: Hashable = lib.no_default
1662 ) -> DataFrame:
1663 """
1664 Create a DataFrame with a column containing the Index.
1666 Parameters
1667 ----------
1668 index : bool, default True
1669 Set the index of the returned DataFrame as the original Index.
1671 name : object, default None
1672 The passed name should substitute for the index name (if it has
1673 one).
1675 Returns
1676 -------
1677 DataFrame
1678 DataFrame containing the original Index data.
1680 See Also
1681 --------
1682 Index.to_series : Convert an Index to a Series.
1683 Series.to_frame : Convert Series to DataFrame.
1685 Examples
1686 --------
1687 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
1688 >>> idx.to_frame()
1689 animal
1690 animal
1691 Ant Ant
1692 Bear Bear
1693 Cow Cow
1695 By default, the original Index is reused. To enforce a new Index:
1697 >>> idx.to_frame(index=False)
1698 animal
1699 0 Ant
1700 1 Bear
1701 2 Cow
1703 To override the name of the resulting column, specify `name`:
1705 >>> idx.to_frame(index=False, name='zoo')
1706 zoo
1707 0 Ant
1708 1 Bear
1709 2 Cow
1710 """
1711 from pandas import DataFrame
1713 if name is None:
1714 warnings.warn(
1715 "Explicitly passing `name=None` currently preserves the Index's name "
1716 "or uses a default name of 0. This behaviour is deprecated, and in "
1717 "the future `None` will be used as the name of the resulting "
1718 "DataFrame column.",
1719 FutureWarning,
1720 stacklevel=find_stack_level(),
1721 )
1722 name = lib.no_default
1724 if name is lib.no_default:
1725 name = self._get_level_names()
1726 result = DataFrame({name: self._values.copy()})
1728 if index:
1729 result.index = self
1730 return result
1732 # --------------------------------------------------------------------
1733 # Name-Centric Methods
1735 @property
1736 def name(self) -> Hashable:
1737 """
1738 Return Index or MultiIndex name.
1739 """
1740 return self._name
1742 @name.setter
1743 def name(self, value: Hashable) -> None:
1744 if self._no_setting_name:
1745 # Used in MultiIndex.levels to avoid silently ignoring name updates.
1746 raise RuntimeError(
1747 "Cannot set name on a level of a MultiIndex. Use "
1748 "'MultiIndex.set_names' instead."
1749 )
1750 maybe_extract_name(value, None, type(self))
1751 self._name = value
1753 @final
1754 def _validate_names(
1755 self, name=None, names=None, deep: bool = False
1756 ) -> list[Hashable]:
1757 """
1758 Handles the quirks of having a singular 'name' parameter for general
1759 Index and plural 'names' parameter for MultiIndex.
1760 """
1761 from copy import deepcopy
1763 if names is not None and name is not None:
1764 raise TypeError("Can only provide one of `names` and `name`")
1765 elif names is None and name is None:
1766 new_names = deepcopy(self.names) if deep else self.names
1767 elif names is not None:
1768 if not is_list_like(names):
1769 raise TypeError("Must pass list-like as `names`.")
1770 new_names = names
1771 elif not is_list_like(name):
1772 new_names = [name]
1773 else:
1774 new_names = name
1776 if len(new_names) != len(self.names):
1777 raise ValueError(
1778 f"Length of new names must be {len(self.names)}, got {len(new_names)}"
1779 )
1781 # All items in 'new_names' need to be hashable
1782 validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name")
1784 return new_names
1786 def _get_default_index_names(
1787 self, names: Hashable | Sequence[Hashable] | None = None, default=None
1788 ) -> list[Hashable]:
1789 """
1790 Get names of index.
1792 Parameters
1793 ----------
1794 names : int, str or 1-dimensional list, default None
1795 Index names to set.
1796 default : str
1797 Default name of index.
1799 Raises
1800 ------
1801 TypeError
1802 if names not str or list-like
1803 """
1804 from pandas.core.indexes.multi import MultiIndex
1806 if names is not None:
1807 if isinstance(names, str) or isinstance(names, int):
1808 names = [names]
1810 if not isinstance(names, list) and names is not None:
1811 raise ValueError("Index names must be str or 1-dimensional list")
1813 if not names:
1814 if isinstance(self, MultiIndex):
1815 names = com.fill_missing_names(self.names)
1816 else:
1817 names = [default] if self.name is None else [self.name]
1819 return names
1821 def _get_names(self) -> FrozenList:
1822 return FrozenList((self.name,))
1824 def _set_names(self, values, *, level=None) -> None:
1825 """
1826 Set new names on index. Each name has to be a hashable type.
1828 Parameters
1829 ----------
1830 values : str or sequence
1831 name(s) to set
1832 level : int, level name, or sequence of int/level names (default None)
1833 If the index is a MultiIndex (hierarchical), level(s) to set (None
1834 for all levels). Otherwise level must be None
1836 Raises
1837 ------
1838 TypeError if each name is not hashable.
1839 """
1840 if not is_list_like(values):
1841 raise ValueError("Names must be a list-like")
1842 if len(values) != 1:
1843 raise ValueError(f"Length of new names must be 1, got {len(values)}")
1845 # GH 20527
1846 # All items in 'name' need to be hashable:
1847 validate_all_hashable(*values, error_name=f"{type(self).__name__}.name")
1849 self._name = values[0]
1851 names = property(fset=_set_names, fget=_get_names)
1853 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"])
1854 def set_names(self, names, level=None, inplace: bool = False):
1855 """
1856 Set Index or MultiIndex name.
1858 Able to set new names partially and by level.
1860 Parameters
1861 ----------
1863 names : label or list of label or dict-like for MultiIndex
1864 Name(s) to set.
1866 .. versionchanged:: 1.3.0
1868 level : int, label or list of int or label, optional
1869 If the index is a MultiIndex and names is not dict-like, level(s) to set
1870 (None for all levels). Otherwise level must be None.
1872 .. versionchanged:: 1.3.0
1874 inplace : bool, default False
1875 Modifies the object directly, instead of creating a new Index or
1876 MultiIndex.
1878 Returns
1879 -------
1880 Index or None
1881 The same type as the caller or None if ``inplace=True``.
1883 See Also
1884 --------
1885 Index.rename : Able to set new names without level.
1887 Examples
1888 --------
1889 >>> idx = pd.Index([1, 2, 3, 4])
1890 >>> idx
1891 Int64Index([1, 2, 3, 4], dtype='int64')
1892 >>> idx.set_names('quarter')
1893 Int64Index([1, 2, 3, 4], dtype='int64', name='quarter')
1895 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
1896 ... [2018, 2019]])
1897 >>> idx
1898 MultiIndex([('python', 2018),
1899 ('python', 2019),
1900 ( 'cobra', 2018),
1901 ( 'cobra', 2019)],
1902 )
1903 >>> idx.set_names(['kind', 'year'], inplace=True)
1904 >>> idx
1905 MultiIndex([('python', 2018),
1906 ('python', 2019),
1907 ( 'cobra', 2018),
1908 ( 'cobra', 2019)],
1909 names=['kind', 'year'])
1910 >>> idx.set_names('species', level=0)
1911 MultiIndex([('python', 2018),
1912 ('python', 2019),
1913 ( 'cobra', 2018),
1914 ( 'cobra', 2019)],
1915 names=['species', 'year'])
1917 When renaming levels with a dict, levels can not be passed.
1919 >>> idx.set_names({'kind': 'snake'})
1920 MultiIndex([('python', 2018),
1921 ('python', 2019),
1922 ( 'cobra', 2018),
1923 ( 'cobra', 2019)],
1924 names=['snake', 'year'])
1925 """
1926 if level is not None and not isinstance(self, ABCMultiIndex):
1927 raise ValueError("Level must be None for non-MultiIndex")
1929 elif level is not None and not is_list_like(level) and is_list_like(names):
1930 raise TypeError("Names must be a string when a single level is provided.")
1932 elif not is_list_like(names) and level is None and self.nlevels > 1:
1933 raise TypeError("Must pass list-like as `names`.")
1935 elif is_dict_like(names) and not isinstance(self, ABCMultiIndex):
1936 raise TypeError("Can only pass dict-like as `names` for MultiIndex.")
1938 elif is_dict_like(names) and level is not None:
1939 raise TypeError("Can not pass level for dictlike `names`.")
1941 if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None:
1942 # Transform dict to list of new names and corresponding levels
1943 level, names_adjusted = [], []
1944 for i, name in enumerate(self.names):
1945 if name in names.keys():
1946 level.append(i)
1947 names_adjusted.append(names[name])
1948 names = names_adjusted
1950 if not is_list_like(names):
1951 names = [names]
1952 if level is not None and not is_list_like(level):
1953 level = [level]
1955 if inplace:
1956 idx = self
1957 else:
1958 idx = self._view()
1960 idx._set_names(names, level=level)
1961 if not inplace:
1962 return idx
1964 def rename(self, name, inplace=False):
1965 """
1966 Alter Index or MultiIndex name.
1968 Able to set new names without level. Defaults to returning new index.
1969 Length of names must match number of levels in MultiIndex.
1971 Parameters
1972 ----------
1973 name : label or list of labels
1974 Name(s) to set.
1975 inplace : bool, default False
1976 Modifies the object directly, instead of creating a new Index or
1977 MultiIndex.
1979 Returns
1980 -------
1981 Index or None
1982 The same type as the caller or None if ``inplace=True``.
1984 See Also
1985 --------
1986 Index.set_names : Able to set new names partially and by level.
1988 Examples
1989 --------
1990 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')
1991 >>> idx.rename('grade')
1992 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')
1994 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
1995 ... [2018, 2019]],
1996 ... names=['kind', 'year'])
1997 >>> idx
1998 MultiIndex([('python', 2018),
1999 ('python', 2019),
2000 ( 'cobra', 2018),
2001 ( 'cobra', 2019)],
2002 names=['kind', 'year'])
2003 >>> idx.rename(['species', 'year'])
2004 MultiIndex([('python', 2018),
2005 ('python', 2019),
2006 ( 'cobra', 2018),
2007 ( 'cobra', 2019)],
2008 names=['species', 'year'])
2009 >>> idx.rename('species')
2010 Traceback (most recent call last):
2011 TypeError: Must pass list-like as `names`.
2012 """
2013 return self.set_names([name], inplace=inplace)
2015 # --------------------------------------------------------------------
2016 # Level-Centric Methods
2018 @property
2019 def nlevels(self) -> int:
2020 """
2021 Number of levels.
2022 """
2023 return 1
2025 def _sort_levels_monotonic(self: _IndexT) -> _IndexT:
2026 """
2027 Compat with MultiIndex.
2028 """
2029 return self
2031 @final
2032 def _validate_index_level(self, level) -> None:
2033 """
2034 Validate index level.
2036 For single-level Index getting level number is a no-op, but some
2037 verification must be done like in MultiIndex.
2039 """
2040 if isinstance(level, int):
2041 if level < 0 and level != -1:
2042 raise IndexError(
2043 "Too many levels: Index has only 1 level, "
2044 f"{level} is not a valid level number"
2045 )
2046 elif level > 0:
2047 raise IndexError(
2048 f"Too many levels: Index has only 1 level, not {level + 1}"
2049 )
2050 elif level != self.name:
2051 raise KeyError(
2052 f"Requested level ({level}) does not match index name ({self.name})"
2053 )
2055 def _get_level_number(self, level) -> int:
2056 self._validate_index_level(level)
2057 return 0
2059 def sortlevel(self, level=None, ascending=True, sort_remaining=None):
2060 """
2061 For internal compatibility with the Index API.
2063 Sort the Index. This is for compat with MultiIndex
2065 Parameters
2066 ----------
2067 ascending : bool, default True
2068 False to sort in descending order
2070 level, sort_remaining are compat parameters
2072 Returns
2073 -------
2074 Index
2075 """
2076 if not isinstance(ascending, (list, bool)):
2077 raise TypeError(
2078 "ascending must be a single bool value or"
2079 "a list of bool values of length 1"
2080 )
2082 if isinstance(ascending, list):
2083 if len(ascending) != 1:
2084 raise TypeError("ascending must be a list of bool values of length 1")
2085 ascending = ascending[0]
2087 if not isinstance(ascending, bool):
2088 raise TypeError("ascending must be a bool value")
2090 return self.sort_values(return_indexer=True, ascending=ascending)
2092 def _get_level_values(self, level) -> Index:
2093 """
2094 Return an Index of values for requested level.
2096 This is primarily useful to get an individual level of values from a
2097 MultiIndex, but is provided on Index as well for compatibility.
2099 Parameters
2100 ----------
2101 level : int or str
2102 It is either the integer position or the name of the level.
2104 Returns
2105 -------
2106 Index
2107 Calling object, as there is only one level in the Index.
2109 See Also
2110 --------
2111 MultiIndex.get_level_values : Get values for a level of a MultiIndex.
2113 Notes
2114 -----
2115 For Index, level should be 0, since there are no multiple levels.
2117 Examples
2118 --------
2119 >>> idx = pd.Index(list('abc'))
2120 >>> idx
2121 Index(['a', 'b', 'c'], dtype='object')
2123 Get level values by supplying `level` as integer:
2125 >>> idx.get_level_values(0)
2126 Index(['a', 'b', 'c'], dtype='object')
2127 """
2128 self._validate_index_level(level)
2129 return self
2131 get_level_values = _get_level_values
2133 @final
2134 def droplevel(self, level=0):
2135 """
2136 Return index with requested level(s) removed.
2138 If resulting index has only 1 level left, the result will be
2139 of Index type, not MultiIndex.
2141 Parameters
2142 ----------
2143 level : int, str, or list-like, default 0
2144 If a string is given, must be the name of a level
2145 If list-like, elements must be names or indexes of levels.
2147 Returns
2148 -------
2149 Index or MultiIndex
2151 Examples
2152 --------
2153 >>> mi = pd.MultiIndex.from_arrays(
2154 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
2155 >>> mi
2156 MultiIndex([(1, 3, 5),
2157 (2, 4, 6)],
2158 names=['x', 'y', 'z'])
2160 >>> mi.droplevel()
2161 MultiIndex([(3, 5),
2162 (4, 6)],
2163 names=['y', 'z'])
2165 >>> mi.droplevel(2)
2166 MultiIndex([(1, 3),
2167 (2, 4)],
2168 names=['x', 'y'])
2170 >>> mi.droplevel('z')
2171 MultiIndex([(1, 3),
2172 (2, 4)],
2173 names=['x', 'y'])
2175 >>> mi.droplevel(['x', 'y'])
2176 Int64Index([5, 6], dtype='int64', name='z')
2177 """
2178 if not isinstance(level, (tuple, list)):
2179 level = [level]
2181 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
2183 return self._drop_level_numbers(levnums)
2185 @final
2186 def _drop_level_numbers(self, levnums: list[int]):
2187 """
2188 Drop MultiIndex levels by level _number_, not name.
2189 """
2191 if not levnums and not isinstance(self, ABCMultiIndex):
2192 return self
2193 if len(levnums) >= self.nlevels:
2194 raise ValueError(
2195 f"Cannot remove {len(levnums)} levels from an index with "
2196 f"{self.nlevels} levels: at least one level must be left."
2197 )
2198 # The two checks above guarantee that here self is a MultiIndex
2199 self = cast("MultiIndex", self)
2201 new_levels = list(self.levels)
2202 new_codes = list(self.codes)
2203 new_names = list(self.names)
2205 for i in levnums:
2206 new_levels.pop(i)
2207 new_codes.pop(i)
2208 new_names.pop(i)
2210 if len(new_levels) == 1:
2211 lev = new_levels[0]
2213 if len(lev) == 0:
2214 # If lev is empty, lev.take will fail GH#42055
2215 if len(new_codes[0]) == 0:
2216 # GH#45230 preserve RangeIndex here
2217 # see test_reset_index_empty_rangeindex
2218 result = lev[:0]
2219 else:
2220 res_values = algos.take(lev._values, new_codes[0], allow_fill=True)
2221 # _constructor instead of type(lev) for RangeIndex compat GH#35230
2222 result = lev._constructor._simple_new(res_values, name=new_names[0])
2223 else:
2224 # set nan if needed
2225 mask = new_codes[0] == -1
2226 result = new_levels[0].take(new_codes[0])
2227 if mask.any():
2228 result = result.putmask(mask, np.nan)
2230 result._name = new_names[0]
2232 return result
2233 else:
2234 from pandas.core.indexes.multi import MultiIndex
2236 return MultiIndex(
2237 levels=new_levels,
2238 codes=new_codes,
2239 names=new_names,
2240 verify_integrity=False,
2241 )
2243 def _get_grouper_for_level(
2244 self,
2245 mapper,
2246 *,
2247 level=None,
2248 dropna: bool = True,
2249 ) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]:
2250 """
2251 Get index grouper corresponding to an index level
2253 Parameters
2254 ----------
2255 mapper: Group mapping function or None
2256 Function mapping index values to groups
2257 level : int or None
2258 Index level, positional
2259 dropna : bool
2260 dropna from groupby
2262 Returns
2263 -------
2264 grouper : Index
2265 Index of values to group on.
2266 labels : ndarray of int or None
2267 Array of locations in level_index.
2268 uniques : Index or None
2269 Index of unique values for level.
2270 """
2271 assert level is None or level == 0
2272 if mapper is None:
2273 grouper = self
2274 else:
2275 grouper = self.map(mapper)
2277 return grouper, None, None
2279 # --------------------------------------------------------------------
2280 # Introspection Methods
2282 @cache_readonly
2283 @final
2284 def _can_hold_na(self) -> bool:
2285 if isinstance(self.dtype, ExtensionDtype):
2286 if isinstance(self.dtype, IntervalDtype):
2287 # FIXME(GH#45720): this is inaccurate for integer-backed
2288 # IntervalArray, but without it other.categories.take raises
2289 # in IntervalArray._cmp_method
2290 return True
2291 return self.dtype._can_hold_na
2292 if self.dtype.kind in ["i", "u", "b"]:
2293 return False
2294 return True
2296 @final
2297 @property
2298 def is_monotonic(self) -> bool:
2299 """
2300 Alias for is_monotonic_increasing.
2302 .. deprecated:: 1.5.0
2303 is_monotonic is deprecated and will be removed in a future version.
2304 Use is_monotonic_increasing instead.
2305 """
2306 warnings.warn(
2307 "is_monotonic is deprecated and will be removed in a future version. "
2308 "Use is_monotonic_increasing instead.",
2309 FutureWarning,
2310 stacklevel=find_stack_level(),
2311 )
2312 return self.is_monotonic_increasing
2314 @property
2315 def is_monotonic_increasing(self) -> bool:
2316 """
2317 Return a boolean if the values are equal or increasing.
2319 Examples
2320 --------
2321 >>> Index([1, 2, 3]).is_monotonic_increasing
2322 True
2323 >>> Index([1, 2, 2]).is_monotonic_increasing
2324 True
2325 >>> Index([1, 3, 2]).is_monotonic_increasing
2326 False
2327 """
2328 return self._engine.is_monotonic_increasing
2330 @property
2331 def is_monotonic_decreasing(self) -> bool:
2332 """
2333 Return a boolean if the values are equal or decreasing.
2335 Examples
2336 --------
2337 >>> Index([3, 2, 1]).is_monotonic_decreasing
2338 True
2339 >>> Index([3, 2, 2]).is_monotonic_decreasing
2340 True
2341 >>> Index([3, 1, 2]).is_monotonic_decreasing
2342 False
2343 """
2344 return self._engine.is_monotonic_decreasing
2346 @final
2347 @property
2348 def _is_strictly_monotonic_increasing(self) -> bool:
2349 """
2350 Return if the index is strictly monotonic increasing
2351 (only increasing) values.
2353 Examples
2354 --------
2355 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing
2356 True
2357 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing
2358 False
2359 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing
2360 False
2361 """
2362 return self.is_unique and self.is_monotonic_increasing
2364 @final
2365 @property
2366 def _is_strictly_monotonic_decreasing(self) -> bool:
2367 """
2368 Return if the index is strictly monotonic decreasing
2369 (only decreasing) values.
2371 Examples
2372 --------
2373 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing
2374 True
2375 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing
2376 False
2377 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing
2378 False
2379 """
2380 return self.is_unique and self.is_monotonic_decreasing
2382 @cache_readonly
2383 def is_unique(self) -> bool:
2384 """
2385 Return if the index has unique values.
2386 """
2387 return self._engine.is_unique
2389 @final
2390 @property
2391 def has_duplicates(self) -> bool:
2392 """
2393 Check if the Index has duplicate values.
2395 Returns
2396 -------
2397 bool
2398 Whether or not the Index has duplicate values.
2400 Examples
2401 --------
2402 >>> idx = pd.Index([1, 5, 7, 7])
2403 >>> idx.has_duplicates
2404 True
2406 >>> idx = pd.Index([1, 5, 7])
2407 >>> idx.has_duplicates
2408 False
2410 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2411 ... "Watermelon"]).astype("category")
2412 >>> idx.has_duplicates
2413 True
2415 >>> idx = pd.Index(["Orange", "Apple",
2416 ... "Watermelon"]).astype("category")
2417 >>> idx.has_duplicates
2418 False
2419 """
2420 return not self.is_unique
2422 @final
2423 def is_boolean(self) -> bool:
2424 """
2425 Check if the Index only consists of booleans.
2427 Returns
2428 -------
2429 bool
2430 Whether or not the Index only consists of booleans.
2432 See Also
2433 --------
2434 is_integer : Check if the Index only consists of integers.
2435 is_floating : Check if the Index is a floating type.
2436 is_numeric : Check if the Index only consists of numeric data.
2437 is_object : Check if the Index is of the object dtype.
2438 is_categorical : Check if the Index holds categorical data.
2439 is_interval : Check if the Index holds Interval objects.
2440 is_mixed : Check if the Index holds data with mixed data types.
2442 Examples
2443 --------
2444 >>> idx = pd.Index([True, False, True])
2445 >>> idx.is_boolean()
2446 True
2448 >>> idx = pd.Index(["True", "False", "True"])
2449 >>> idx.is_boolean()
2450 False
2452 >>> idx = pd.Index([True, False, "True"])
2453 >>> idx.is_boolean()
2454 False
2455 """
2456 return self.inferred_type in ["boolean"]
2458 @final
2459 def is_integer(self) -> bool:
2460 """
2461 Check if the Index only consists of integers.
2463 Returns
2464 -------
2465 bool
2466 Whether or not the Index only consists of integers.
2468 See Also
2469 --------
2470 is_boolean : Check if the Index only consists of booleans.
2471 is_floating : Check if the Index is a floating type.
2472 is_numeric : Check if the Index only consists of numeric data.
2473 is_object : Check if the Index is of the object dtype.
2474 is_categorical : Check if the Index holds categorical data.
2475 is_interval : Check if the Index holds Interval objects.
2476 is_mixed : Check if the Index holds data with mixed data types.
2478 Examples
2479 --------
2480 >>> idx = pd.Index([1, 2, 3, 4])
2481 >>> idx.is_integer()
2482 True
2484 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2485 >>> idx.is_integer()
2486 False
2488 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
2489 >>> idx.is_integer()
2490 False
2491 """
2492 return self.inferred_type in ["integer"]
2494 @final
2495 def is_floating(self) -> bool:
2496 """
2497 Check if the Index is a floating type.
2499 The Index may consist of only floats, NaNs, or a mix of floats,
2500 integers, or NaNs.
2502 Returns
2503 -------
2504 bool
2505 Whether or not the Index only consists of only consists of floats, NaNs, or
2506 a mix of floats, integers, or NaNs.
2508 See Also
2509 --------
2510 is_boolean : Check if the Index only consists of booleans.
2511 is_integer : Check if the Index only consists of integers.
2512 is_numeric : Check if the Index only consists of numeric data.
2513 is_object : Check if the Index is of the object dtype.
2514 is_categorical : Check if the Index holds categorical data.
2515 is_interval : Check if the Index holds Interval objects.
2516 is_mixed : Check if the Index holds data with mixed data types.
2518 Examples
2519 --------
2520 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2521 >>> idx.is_floating()
2522 True
2524 >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0])
2525 >>> idx.is_floating()
2526 True
2528 >>> idx = pd.Index([1, 2, 3, 4, np.nan])
2529 >>> idx.is_floating()
2530 True
2532 >>> idx = pd.Index([1, 2, 3, 4])
2533 >>> idx.is_floating()
2534 False
2535 """
2536 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]
2538 @final
2539 def is_numeric(self) -> bool:
2540 """
2541 Check if the Index only consists of numeric data.
2543 Returns
2544 -------
2545 bool
2546 Whether or not the Index only consists of numeric data.
2548 See Also
2549 --------
2550 is_boolean : Check if the Index only consists of booleans.
2551 is_integer : Check if the Index only consists of integers.
2552 is_floating : Check if the Index is a floating type.
2553 is_object : Check if the Index is of the object dtype.
2554 is_categorical : Check if the Index holds categorical data.
2555 is_interval : Check if the Index holds Interval objects.
2556 is_mixed : Check if the Index holds data with mixed data types.
2558 Examples
2559 --------
2560 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2561 >>> idx.is_numeric()
2562 True
2564 >>> idx = pd.Index([1, 2, 3, 4.0])
2565 >>> idx.is_numeric()
2566 True
2568 >>> idx = pd.Index([1, 2, 3, 4])
2569 >>> idx.is_numeric()
2570 True
2572 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan])
2573 >>> idx.is_numeric()
2574 True
2576 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"])
2577 >>> idx.is_numeric()
2578 False
2579 """
2580 return self.inferred_type in ["integer", "floating"]
2582 @final
2583 def is_object(self) -> bool:
2584 """
2585 Check if the Index is of the object dtype.
2587 Returns
2588 -------
2589 bool
2590 Whether or not the Index is of the object dtype.
2592 See Also
2593 --------
2594 is_boolean : Check if the Index only consists of booleans.
2595 is_integer : Check if the Index only consists of integers.
2596 is_floating : Check if the Index is a floating type.
2597 is_numeric : Check if the Index only consists of numeric data.
2598 is_categorical : Check if the Index holds categorical data.
2599 is_interval : Check if the Index holds Interval objects.
2600 is_mixed : Check if the Index holds data with mixed data types.
2602 Examples
2603 --------
2604 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
2605 >>> idx.is_object()
2606 True
2608 >>> idx = pd.Index(["Apple", "Mango", 2.0])
2609 >>> idx.is_object()
2610 True
2612 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2613 ... "Watermelon"]).astype("category")
2614 >>> idx.is_object()
2615 False
2617 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2618 >>> idx.is_object()
2619 False
2620 """
2621 return is_object_dtype(self.dtype)
2623 @final
2624 def is_categorical(self) -> bool:
2625 """
2626 Check if the Index holds categorical data.
2628 Returns
2629 -------
2630 bool
2631 True if the Index is categorical.
2633 See Also
2634 --------
2635 CategoricalIndex : Index for categorical data.
2636 is_boolean : Check if the Index only consists of booleans.
2637 is_integer : Check if the Index only consists of integers.
2638 is_floating : Check if the Index is a floating type.
2639 is_numeric : Check if the Index only consists of numeric data.
2640 is_object : Check if the Index is of the object dtype.
2641 is_interval : Check if the Index holds Interval objects.
2642 is_mixed : Check if the Index holds data with mixed data types.
2644 Examples
2645 --------
2646 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2647 ... "Watermelon"]).astype("category")
2648 >>> idx.is_categorical()
2649 True
2651 >>> idx = pd.Index([1, 3, 5, 7])
2652 >>> idx.is_categorical()
2653 False
2655 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])
2656 >>> s
2657 0 Peter
2658 1 Victor
2659 2 Elisabeth
2660 3 Mar
2661 dtype: object
2662 >>> s.index.is_categorical()
2663 False
2664 """
2665 return self.inferred_type in ["categorical"]
2667 @final
2668 def is_interval(self) -> bool:
2669 """
2670 Check if the Index holds Interval objects.
2672 Returns
2673 -------
2674 bool
2675 Whether or not the Index holds Interval objects.
2677 See Also
2678 --------
2679 IntervalIndex : Index for Interval objects.
2680 is_boolean : Check if the Index only consists of booleans.
2681 is_integer : Check if the Index only consists of integers.
2682 is_floating : Check if the Index is a floating type.
2683 is_numeric : Check if the Index only consists of numeric data.
2684 is_object : Check if the Index is of the object dtype.
2685 is_categorical : Check if the Index holds categorical data.
2686 is_mixed : Check if the Index holds data with mixed data types.
2688 Examples
2689 --------
2690 >>> idx = pd.Index([pd.Interval(left=0, right=5),
2691 ... pd.Interval(left=5, right=10)])
2692 >>> idx.is_interval()
2693 True
2695 >>> idx = pd.Index([1, 3, 5, 7])
2696 >>> idx.is_interval()
2697 False
2698 """
2699 return self.inferred_type in ["interval"]
2701 @final
2702 def is_mixed(self) -> bool:
2703 """
2704 Check if the Index holds data with mixed data types.
2706 Returns
2707 -------
2708 bool
2709 Whether or not the Index holds data with mixed data types.
2711 See Also
2712 --------
2713 is_boolean : Check if the Index only consists of booleans.
2714 is_integer : Check if the Index only consists of integers.
2715 is_floating : Check if the Index is a floating type.
2716 is_numeric : Check if the Index only consists of numeric data.
2717 is_object : Check if the Index is of the object dtype.
2718 is_categorical : Check if the Index holds categorical data.
2719 is_interval : Check if the Index holds Interval objects.
2721 Examples
2722 --------
2723 >>> idx = pd.Index(['a', np.nan, 'b'])
2724 >>> idx.is_mixed()
2725 True
2727 >>> idx = pd.Index([1.0, 2.0, 3.0, 5.0])
2728 >>> idx.is_mixed()
2729 False
2730 """
2731 warnings.warn(
2732 "Index.is_mixed is deprecated and will be removed in a future version. "
2733 "Check index.inferred_type directly instead.",
2734 FutureWarning,
2735 stacklevel=find_stack_level(),
2736 )
2737 return self.inferred_type in ["mixed"]
2739 @final
2740 def holds_integer(self) -> bool:
2741 """
2742 Whether the type is an integer type.
2743 """
2744 return self.inferred_type in ["integer", "mixed-integer"]
2746 @cache_readonly
2747 def inferred_type(self) -> str_t:
2748 """
2749 Return a string of the type inferred from the values.
2750 """
2751 return lib.infer_dtype(self._values, skipna=False)
2753 @cache_readonly
2754 @final
2755 def _is_all_dates(self) -> bool:
2756 """
2757 Whether or not the index values only consist of dates.
2758 """
2759 if needs_i8_conversion(self.dtype):
2760 return True
2761 elif self.dtype != _dtype_obj:
2762 # TODO(ExtensionIndex): 3rd party EA might override?
2763 # Note: this includes IntervalIndex, even when the left/right
2764 # contain datetime-like objects.
2765 return False
2766 elif self._is_multi:
2767 return False
2768 return is_datetime_array(ensure_object(self._values))
2770 @cache_readonly
2771 @final
2772 def is_all_dates(self) -> bool:
2773 """
2774 Whether or not the index values only consist of dates.
2775 """
2776 warnings.warn(
2777 "Index.is_all_dates is deprecated, will be removed in a future version. "
2778 "check index.inferred_type instead.",
2779 FutureWarning,
2780 stacklevel=find_stack_level(),
2781 )
2782 return self._is_all_dates
2784 @final
2785 @cache_readonly
2786 def _is_multi(self) -> bool:
2787 """
2788 Cached check equivalent to isinstance(self, MultiIndex)
2789 """
2790 return isinstance(self, ABCMultiIndex)
2792 # --------------------------------------------------------------------
2793 # Pickle Methods
2795 def __reduce__(self):
2796 d = {"data": self._data, "name": self.name}
2797 return _new_Index, (type(self), d), None
2799 # --------------------------------------------------------------------
2800 # Null Handling Methods
2802 @cache_readonly
2803 def _na_value(self):
2804 """The expected NA value to use with this index."""
2805 dtype = self.dtype
2806 if isinstance(dtype, np.dtype):
2807 if dtype.kind in ["m", "M"]:
2808 return NaT
2809 return np.nan
2810 return dtype.na_value
2812 @cache_readonly
2813 def _isnan(self) -> npt.NDArray[np.bool_]:
2814 """
2815 Return if each value is NaN.
2816 """
2817 if self._can_hold_na:
2818 return isna(self)
2819 else:
2820 # shouldn't reach to this condition by checking hasnans beforehand
2821 values = np.empty(len(self), dtype=np.bool_)
2822 values.fill(False)
2823 return values
2825 @cache_readonly
2826 def hasnans(self) -> bool:
2827 """
2828 Return True if there are any NaNs.
2830 Enables various performance speedups.
2831 """
2832 if self._can_hold_na:
2833 return bool(self._isnan.any())
2834 else:
2835 return False
2837 @final
2838 def isna(self) -> npt.NDArray[np.bool_]:
2839 """
2840 Detect missing values.
2842 Return a boolean same-sized object indicating if the values are NA.
2843 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
2844 mapped to ``True`` values.
2845 Everything else get mapped to ``False`` values. Characters such as
2846 empty strings `''` or :attr:`numpy.inf` are not considered NA values
2847 (unless you set ``pandas.options.mode.use_inf_as_na = True``).
2849 Returns
2850 -------
2851 numpy.ndarray[bool]
2852 A boolean array of whether my values are NA.
2854 See Also
2855 --------
2856 Index.notna : Boolean inverse of isna.
2857 Index.dropna : Omit entries with missing values.
2858 isna : Top-level isna.
2859 Series.isna : Detect missing values in Series object.
2861 Examples
2862 --------
2863 Show which entries in a pandas.Index are NA. The result is an
2864 array.
2866 >>> idx = pd.Index([5.2, 6.0, np.NaN])
2867 >>> idx
2868 Float64Index([5.2, 6.0, nan], dtype='float64')
2869 >>> idx.isna()
2870 array([False, False, True])
2872 Empty strings are not considered NA values. None is considered an NA
2873 value.
2875 >>> idx = pd.Index(['black', '', 'red', None])
2876 >>> idx
2877 Index(['black', '', 'red', None], dtype='object')
2878 >>> idx.isna()
2879 array([False, False, False, True])
2881 For datetimes, `NaT` (Not a Time) is considered as an NA value.
2883 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
2884 ... pd.Timestamp(''), None, pd.NaT])
2885 >>> idx
2886 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
2887 dtype='datetime64[ns]', freq=None)
2888 >>> idx.isna()
2889 array([False, True, True, True])
2890 """
2891 return self._isnan
2893 isnull = isna
2895 @final
2896 def notna(self) -> npt.NDArray[np.bool_]:
2897 """
2898 Detect existing (non-missing) values.
2900 Return a boolean same-sized object indicating if the values are not NA.
2901 Non-missing values get mapped to ``True``. Characters such as empty
2902 strings ``''`` or :attr:`numpy.inf` are not considered NA values
2903 (unless you set ``pandas.options.mode.use_inf_as_na = True``).
2904 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
2905 values.
2907 Returns
2908 -------
2909 numpy.ndarray[bool]
2910 Boolean array to indicate which entries are not NA.
2912 See Also
2913 --------
2914 Index.notnull : Alias of notna.
2915 Index.isna: Inverse of notna.
2916 notna : Top-level notna.
2918 Examples
2919 --------
2920 Show which entries in an Index are not NA. The result is an
2921 array.
2923 >>> idx = pd.Index([5.2, 6.0, np.NaN])
2924 >>> idx
2925 Float64Index([5.2, 6.0, nan], dtype='float64')
2926 >>> idx.notna()
2927 array([ True, True, False])
2929 Empty strings are not considered NA values. None is considered a NA
2930 value.
2932 >>> idx = pd.Index(['black', '', 'red', None])
2933 >>> idx
2934 Index(['black', '', 'red', None], dtype='object')
2935 >>> idx.notna()
2936 array([ True, True, True, False])
2937 """
2938 return ~self.isna()
2940 notnull = notna
2942 def fillna(self, value=None, downcast=None):
2943 """
2944 Fill NA/NaN values with the specified value.
2946 Parameters
2947 ----------
2948 value : scalar
2949 Scalar value to use to fill holes (e.g. 0).
2950 This value cannot be a list-likes.
2951 downcast : dict, default is None
2952 A dict of item->dtype of what to downcast if possible,
2953 or the string 'infer' which will try to downcast to an appropriate
2954 equal type (e.g. float64 to int64 if possible).
2956 Returns
2957 -------
2958 Index
2960 See Also
2961 --------
2962 DataFrame.fillna : Fill NaN values of a DataFrame.
2963 Series.fillna : Fill NaN Values of a Series.
2964 """
2966 value = self._require_scalar(value)
2967 if self.hasnans:
2968 result = self.putmask(self._isnan, value)
2969 if downcast is None:
2970 # no need to care metadata other than name
2971 # because it can't have freq if it has NaTs
2972 return Index._with_infer(result, name=self.name)
2973 raise NotImplementedError(
2974 f"{type(self).__name__}.fillna does not support 'downcast' "
2975 "argument values other than 'None'."
2976 )
2977 return self._view()
2979 def dropna(self: _IndexT, how: str_t = "any") -> _IndexT:
2980 """
2981 Return Index without NA/NaN values.
2983 Parameters
2984 ----------
2985 how : {'any', 'all'}, default 'any'
2986 If the Index is a MultiIndex, drop the value when any or all levels
2987 are NaN.
2989 Returns
2990 -------
2991 Index
2992 """
2993 if how not in ("any", "all"):
2994 raise ValueError(f"invalid how option: {how}")
2996 if self.hasnans:
2997 res_values = self._values[~self._isnan]
2998 return type(self)._simple_new(res_values, name=self.name)
2999 return self._view()
3001 # --------------------------------------------------------------------
3002 # Uniqueness Methods
3004 def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT:
3005 """
3006 Return unique values in the index.
3008 Unique values are returned in order of appearance, this does NOT sort.
3010 Parameters
3011 ----------
3012 level : int or hashable, optional
3013 Only return values from specified level (for MultiIndex).
3014 If int, gets the level by integer position, else by level name.
3016 Returns
3017 -------
3018 Index
3020 See Also
3021 --------
3022 unique : Numpy array of unique values in that column.
3023 Series.unique : Return unique values of Series object.
3024 """
3025 if level is not None:
3026 self._validate_index_level(level)
3028 if self.is_unique:
3029 return self._view()
3031 result = super().unique()
3032 return self._shallow_copy(result)
3034 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
3035 def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT:
3036 """
3037 Return Index with duplicate values removed.
3039 Parameters
3040 ----------
3041 keep : {'first', 'last', ``False``}, default 'first'
3042 - 'first' : Drop duplicates except for the first occurrence.
3043 - 'last' : Drop duplicates except for the last occurrence.
3044 - ``False`` : Drop all duplicates.
3046 Returns
3047 -------
3048 deduplicated : Index
3050 See Also
3051 --------
3052 Series.drop_duplicates : Equivalent method on Series.
3053 DataFrame.drop_duplicates : Equivalent method on DataFrame.
3054 Index.duplicated : Related method on Index, indicating duplicate
3055 Index values.
3057 Examples
3058 --------
3059 Generate an pandas.Index with duplicate values.
3061 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
3063 The `keep` parameter controls which duplicate values are removed.
3064 The value 'first' keeps the first occurrence for each
3065 set of duplicated entries. The default value of keep is 'first'.
3067 >>> idx.drop_duplicates(keep='first')
3068 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
3070 The value 'last' keeps the last occurrence for each set of duplicated
3071 entries.
3073 >>> idx.drop_duplicates(keep='last')
3074 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
3076 The value ``False`` discards all sets of duplicated entries.
3078 >>> idx.drop_duplicates(keep=False)
3079 Index(['cow', 'beetle', 'hippo'], dtype='object')
3080 """
3081 if self.is_unique:
3082 return self._view()
3084 return super().drop_duplicates(keep=keep)
3086 def duplicated(
3087 self, keep: Literal["first", "last", False] = "first"
3088 ) -> npt.NDArray[np.bool_]:
3089 """
3090 Indicate duplicate index values.
3092 Duplicated values are indicated as ``True`` values in the resulting
3093 array. Either all duplicates, all except the first, or all except the
3094 last occurrence of duplicates can be indicated.
3096 Parameters
3097 ----------
3098 keep : {'first', 'last', False}, default 'first'
3099 The value or values in a set of duplicates to mark as missing.
3101 - 'first' : Mark duplicates as ``True`` except for the first
3102 occurrence.
3103 - 'last' : Mark duplicates as ``True`` except for the last
3104 occurrence.
3105 - ``False`` : Mark all duplicates as ``True``.
3107 Returns
3108 -------
3109 np.ndarray[bool]
3111 See Also
3112 --------
3113 Series.duplicated : Equivalent method on pandas.Series.
3114 DataFrame.duplicated : Equivalent method on pandas.DataFrame.
3115 Index.drop_duplicates : Remove duplicate values from Index.
3117 Examples
3118 --------
3119 By default, for each set of duplicated values, the first occurrence is
3120 set to False and all others to True:
3122 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
3123 >>> idx.duplicated()
3124 array([False, False, True, False, True])
3126 which is equivalent to
3128 >>> idx.duplicated(keep='first')
3129 array([False, False, True, False, True])
3131 By using 'last', the last occurrence of each set of duplicated values
3132 is set on False and all others on True:
3134 >>> idx.duplicated(keep='last')
3135 array([ True, False, True, False, False])
3137 By setting keep on ``False``, all duplicates are True:
3139 >>> idx.duplicated(keep=False)
3140 array([ True, False, True, False, True])
3141 """
3142 if self.is_unique:
3143 # fastpath available bc we are immutable
3144 return np.zeros(len(self), dtype=bool)
3145 return self._duplicated(keep=keep)
3147 # --------------------------------------------------------------------
3148 # Arithmetic & Logical Methods
3150 def __iadd__(self, other):
3151 # alias for __add__
3152 return self + other
3154 @final
3155 def __and__(self, other):
3156 warnings.warn(
3157 "Index.__and__ operating as a set operation is deprecated, "
3158 "in the future this will be a logical operation matching "
3159 "Series.__and__. Use index.intersection(other) instead.",
3160 FutureWarning,
3161 stacklevel=find_stack_level(),
3162 )
3163 return self.intersection(other)
3165 @final
3166 def __or__(self, other):
3167 warnings.warn(
3168 "Index.__or__ operating as a set operation is deprecated, "
3169 "in the future this will be a logical operation matching "
3170 "Series.__or__. Use index.union(other) instead.",
3171 FutureWarning,
3172 stacklevel=find_stack_level(),
3173 )
3174 return self.union(other)
3176 @final
3177 def __xor__(self, other):
3178 warnings.warn(
3179 "Index.__xor__ operating as a set operation is deprecated, "
3180 "in the future this will be a logical operation matching "
3181 "Series.__xor__. Use index.symmetric_difference(other) instead.",
3182 FutureWarning,
3183 stacklevel=find_stack_level(),
3184 )
3185 return self.symmetric_difference(other)
3187 @final
3188 def __nonzero__(self) -> NoReturn:
3189 raise ValueError(
3190 f"The truth value of a {type(self).__name__} is ambiguous. "
3191 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
3192 )
3194 __bool__ = __nonzero__
3196 # --------------------------------------------------------------------
3197 # Set Operation Methods
3199 def _get_reconciled_name_object(self, other):
3200 """
3201 If the result of a set operation will be self,
3202 return self, unless the name changes, in which
3203 case make a shallow copy of self.
3204 """
3205 name = get_op_result_name(self, other)
3206 if self.name is not name:
3207 return self.rename(name)
3208 return self
3210 @final
3211 def _validate_sort_keyword(self, sort):
3212 if sort not in [None, False]:
3213 raise ValueError(
3214 "The 'sort' keyword only takes the values of "
3215 f"None or False; {sort} was passed."
3216 )
3218 @final
3219 def _deprecate_dti_setop(self, other: Index, setop: str_t):
3220 """
3221 Deprecate setop behavior between timezone-aware DatetimeIndexes with
3222 mismatched timezones.
3223 """
3224 # Caller is responsibelf or checking
3225 # `not is_dtype_equal(self.dtype, other.dtype)`
3226 if (
3227 isinstance(self, ABCDatetimeIndex)
3228 and isinstance(other, ABCDatetimeIndex)
3229 and self.tz is not None
3230 and other.tz is not None
3231 ):
3232 # GH#39328, GH#45357
3233 warnings.warn(
3234 f"In a future version, the {setop} of DatetimeIndex objects "
3235 "with mismatched timezones will cast both to UTC instead of "
3236 "object dtype. To retain the old behavior, "
3237 f"use `index.astype(object).{setop}(other)`",
3238 FutureWarning,
3239 stacklevel=find_stack_level(),
3240 )
3242 @final
3243 def union(self, other, sort=None):
3244 """
3245 Form the union of two Index objects.
3247 If the Index objects are incompatible, both Index objects will be
3248 cast to dtype('object') first.
3250 .. versionchanged:: 0.25.0
3252 Parameters
3253 ----------
3254 other : Index or array-like
3255 sort : bool or None, default None
3256 Whether to sort the resulting Index.
3258 * None : Sort the result, except when
3260 1. `self` and `other` are equal.
3261 2. `self` or `other` has length 0.
3262 3. Some values in `self` or `other` cannot be compared.
3263 A RuntimeWarning is issued in this case.
3265 * False : do not sort the result.
3267 Returns
3268 -------
3269 union : Index
3271 Examples
3272 --------
3273 Union matching dtypes
3275 >>> idx1 = pd.Index([1, 2, 3, 4])
3276 >>> idx2 = pd.Index([3, 4, 5, 6])
3277 >>> idx1.union(idx2)
3278 Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
3280 Union mismatched dtypes
3282 >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])
3283 >>> idx2 = pd.Index([1, 2, 3, 4])
3284 >>> idx1.union(idx2)
3285 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')
3287 MultiIndex case
3289 >>> idx1 = pd.MultiIndex.from_arrays(
3290 ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
3291 ... )
3292 >>> idx1
3293 MultiIndex([(1, 'Red'),
3294 (1, 'Blue'),
3295 (2, 'Red'),
3296 (2, 'Blue')],
3297 )
3298 >>> idx2 = pd.MultiIndex.from_arrays(
3299 ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
3300 ... )
3301 >>> idx2
3302 MultiIndex([(3, 'Red'),
3303 (3, 'Green'),
3304 (2, 'Red'),
3305 (2, 'Green')],
3306 )
3307 >>> idx1.union(idx2)
3308 MultiIndex([(1, 'Blue'),
3309 (1, 'Red'),
3310 (2, 'Blue'),
3311 (2, 'Green'),
3312 (2, 'Red'),
3313 (3, 'Green'),
3314 (3, 'Red')],
3315 )
3316 >>> idx1.union(idx2, sort=False)
3317 MultiIndex([(1, 'Red'),
3318 (1, 'Blue'),
3319 (2, 'Red'),
3320 (2, 'Blue'),
3321 (3, 'Red'),
3322 (3, 'Green'),
3323 (2, 'Green')],
3324 )
3325 """
3326 self._validate_sort_keyword(sort)
3327 self._assert_can_do_setop(other)
3328 other, result_name = self._convert_can_do_setop(other)
3330 if not is_dtype_equal(self.dtype, other.dtype):
3331 if (
3332 isinstance(self, ABCMultiIndex)
3333 and not is_object_dtype(unpack_nested_dtype(other))
3334 and len(other) > 0
3335 ):
3336 raise NotImplementedError(
3337 "Can only union MultiIndex with MultiIndex or Index of tuples, "
3338 "try mi.to_flat_index().union(other) instead."
3339 )
3340 self._deprecate_dti_setop(other, "union")
3342 dtype = self._find_common_type_compat(other)
3343 left = self.astype(dtype, copy=False)
3344 right = other.astype(dtype, copy=False)
3345 return left.union(right, sort=sort)
3347 elif not len(other) or self.equals(other):
3348 # NB: whether this (and the `if not len(self)` check below) come before
3349 # or after the is_dtype_equal check above affects the returned dtype
3350 return self._get_reconciled_name_object(other)
3352 elif not len(self):
3353 return other._get_reconciled_name_object(self)
3355 result = self._union(other, sort=sort)
3357 return self._wrap_setop_result(other, result)
3359 def _union(self, other: Index, sort):
3360 """
3361 Specific union logic should go here. In subclasses, union behavior
3362 should be overwritten here rather than in `self.union`.
3364 Parameters
3365 ----------
3366 other : Index or array-like
3367 sort : False or None, default False
3368 Whether to sort the resulting index.
3370 * False : do not sort the result.
3371 * None : sort the result, except when `self` and `other` are equal
3372 or when the values cannot be compared.
3374 Returns
3375 -------
3376 Index
3377 """
3378 lvals = self._values
3379 rvals = other._values
3381 if (
3382 sort is None
3383 and self.is_monotonic_increasing
3384 and other.is_monotonic_increasing
3385 and not (self.has_duplicates and other.has_duplicates)
3386 and self._can_use_libjoin
3387 ):
3388 # Both are monotonic and at least one is unique, so can use outer join
3389 # (actually don't need either unique, but without this restriction
3390 # test_union_same_value_duplicated_in_both fails)
3391 try:
3392 return self._outer_indexer(other)[0]
3393 except (TypeError, IncompatibleFrequency):
3394 # incomparable objects; should only be for object dtype
3395 value_list = list(lvals)
3397 # worth making this faster? a very unusual case
3398 value_set = set(lvals)
3399 value_list.extend([x for x in rvals if x not in value_set])
3400 # If objects are unorderable, we must have object dtype.
3401 return np.array(value_list, dtype=object)
3403 elif not other.is_unique:
3404 # other has duplicates
3405 result = algos.union_with_duplicates(lvals, rvals)
3406 return _maybe_try_sort(result, sort)
3408 # Self may have duplicates; other already checked as unique
3409 # find indexes of things in "other" that are not in "self"
3410 if self._index_as_unique:
3411 indexer = self.get_indexer(other)
3412 missing = (indexer == -1).nonzero()[0]
3413 else:
3414 missing = algos.unique1d(self.get_indexer_non_unique(other)[1])
3416 if len(missing) > 0:
3417 other_diff = rvals.take(missing)
3418 result = concat_compat((lvals, other_diff))
3419 else:
3420 result = lvals
3422 if not self.is_monotonic_increasing or not other.is_monotonic_increasing:
3423 # if both are monotonic then result should already be sorted
3424 result = _maybe_try_sort(result, sort)
3426 return result
3428 @final
3429 def _wrap_setop_result(self, other: Index, result) -> Index:
3430 name = get_op_result_name(self, other)
3431 if isinstance(result, Index):
3432 if result.name != name:
3433 result = result.rename(name)
3434 else:
3435 result = self._shallow_copy(result, name=name)
3436 return result
3438 @final
3439 def intersection(self, other, sort=False):
3440 """
3441 Form the intersection of two Index objects.
3443 This returns a new Index with elements common to the index and `other`.
3445 Parameters
3446 ----------
3447 other : Index or array-like
3448 sort : False or None, default False
3449 Whether to sort the resulting index.
3451 * False : do not sort the result.
3452 * None : sort the result, except when `self` and `other` are equal
3453 or when the values cannot be compared.
3455 Returns
3456 -------
3457 intersection : Index
3459 Examples
3460 --------
3461 >>> idx1 = pd.Index([1, 2, 3, 4])
3462 >>> idx2 = pd.Index([3, 4, 5, 6])
3463 >>> idx1.intersection(idx2)
3464 Int64Index([3, 4], dtype='int64')
3465 """
3466 self._validate_sort_keyword(sort)
3467 self._assert_can_do_setop(other)
3468 other, result_name = self._convert_can_do_setop(other)
3470 if not is_dtype_equal(self.dtype, other.dtype):
3471 self._deprecate_dti_setop(other, "intersection")
3473 if self.equals(other):
3474 if self.has_duplicates:
3475 return self.unique()._get_reconciled_name_object(other)
3476 return self._get_reconciled_name_object(other)
3478 if len(self) == 0 or len(other) == 0:
3479 # fastpath; we need to be careful about having commutativity
3481 if self._is_multi or other._is_multi:
3482 # _convert_can_do_setop ensures that we have both or neither
3483 # We retain self.levels
3484 return self[:0].rename(result_name)
3486 dtype = self._find_common_type_compat(other)
3487 if is_dtype_equal(self.dtype, dtype):
3488 # Slicing allows us to retain DTI/TDI.freq, RangeIndex
3490 # Note: self[:0] vs other[:0] affects
3491 # 1) which index's `freq` we get in DTI/TDI cases
3492 # This may be a historical artifact, i.e. no documented
3493 # reason for this choice.
3494 # 2) The `step` we get in RangeIndex cases
3495 if len(self) == 0:
3496 return self[:0].rename(result_name)
3497 else:
3498 return other[:0].rename(result_name)
3500 return Index([], dtype=dtype, name=result_name)
3502 elif not self._should_compare(other):
3503 # We can infer that the intersection is empty.
3504 if isinstance(self, ABCMultiIndex):
3505 return self[:0].rename(result_name)
3506 return Index([], name=result_name)
3508 elif not is_dtype_equal(self.dtype, other.dtype):
3509 dtype = self._find_common_type_compat(other)
3510 this = self.astype(dtype, copy=False)
3511 other = other.astype(dtype, copy=False)
3512 return this.intersection(other, sort=sort)
3514 result = self._intersection(other, sort=sort)
3515 return self._wrap_intersection_result(other, result)
3517 def _intersection(self, other: Index, sort=False):
3518 """
3519 intersection specialized to the case with matching dtypes.
3520 """
3521 if (
3522 self.is_monotonic_increasing
3523 and other.is_monotonic_increasing
3524 and self._can_use_libjoin
3525 ):
3526 try:
3527 result = self._inner_indexer(other)[0]
3528 except TypeError:
3529 # non-comparable; should only be for object dtype
3530 pass
3531 else:
3532 # TODO: algos.unique1d should preserve DTA/TDA
3533 res = algos.unique1d(result)
3534 return ensure_wrapped_if_datetimelike(res)
3536 res_values = self._intersection_via_get_indexer(other, sort=sort)
3537 res_values = _maybe_try_sort(res_values, sort)
3538 return res_values
3540 def _wrap_intersection_result(self, other, result):
3541 # We will override for MultiIndex to handle empty results
3542 return self._wrap_setop_result(other, result)
3544 @final
3545 def _intersection_via_get_indexer(self, other: Index, sort) -> ArrayLike:
3546 """
3547 Find the intersection of two Indexes using get_indexer.
3549 Returns
3550 -------
3551 np.ndarray or ExtensionArray
3552 The returned array will be unique.
3553 """
3554 left_unique = self.unique()
3555 right_unique = other.unique()
3557 # even though we are unique, we need get_indexer_for for IntervalIndex
3558 indexer = left_unique.get_indexer_for(right_unique)
3560 mask = indexer != -1
3562 taker = indexer.take(mask.nonzero()[0])
3563 if sort is False:
3564 # sort bc we want the elements in the same order they are in self
3565 # unnecessary in the case with sort=None bc we will sort later
3566 taker = np.sort(taker)
3568 result = left_unique.take(taker)._values
3569 return result
3571 @final
3572 def difference(self, other, sort=None):
3573 """
3574 Return a new Index with elements of index not in `other`.
3576 This is the set difference of two Index objects.
3578 Parameters
3579 ----------
3580 other : Index or array-like
3581 sort : False or None, default None
3582 Whether to sort the resulting index. By default, the
3583 values are attempted to be sorted, but any TypeError from
3584 incomparable elements is caught by pandas.
3586 * None : Attempt to sort the result, but catch any TypeErrors
3587 from comparing incomparable elements.
3588 * False : Do not sort the result.
3590 Returns
3591 -------
3592 difference : Index
3594 Examples
3595 --------
3596 >>> idx1 = pd.Index([2, 1, 3, 4])
3597 >>> idx2 = pd.Index([3, 4, 5, 6])
3598 >>> idx1.difference(idx2)
3599 Int64Index([1, 2], dtype='int64')
3600 >>> idx1.difference(idx2, sort=False)
3601 Int64Index([2, 1], dtype='int64')
3602 """
3603 self._validate_sort_keyword(sort)
3604 self._assert_can_do_setop(other)
3605 other, result_name = self._convert_can_do_setop(other)
3607 # Note: we do NOT call _deprecate_dti_setop here, as there
3608 # is no requirement that .difference be commutative, so it does
3609 # not cast to object.
3611 if self.equals(other):
3612 # Note: we do not (yet) sort even if sort=None GH#24959
3613 return self[:0].rename(result_name)
3615 if len(other) == 0:
3616 # Note: we do not (yet) sort even if sort=None GH#24959
3617 return self.rename(result_name)
3619 if not self._should_compare(other):
3620 # Nothing matches -> difference is everything
3621 return self.rename(result_name)
3623 result = self._difference(other, sort=sort)
3624 return self._wrap_difference_result(other, result)
3626 def _difference(self, other, sort):
3627 # overridden by RangeIndex
3629 this = self.unique()
3631 indexer = this.get_indexer_for(other)
3632 indexer = indexer.take((indexer != -1).nonzero()[0])
3634 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
3635 the_diff = this._values.take(label_diff)
3636 the_diff = _maybe_try_sort(the_diff, sort)
3638 return the_diff
3640 def _wrap_difference_result(self, other, result):
3641 # We will override for MultiIndex to handle empty results
3642 return self._wrap_setop_result(other, result)
3644 def symmetric_difference(self, other, result_name=None, sort=None):
3645 """
3646 Compute the symmetric difference of two Index objects.
3648 Parameters
3649 ----------
3650 other : Index or array-like
3651 result_name : str
3652 sort : False or None, default None
3653 Whether to sort the resulting index. By default, the
3654 values are attempted to be sorted, but any TypeError from
3655 incomparable elements is caught by pandas.
3657 * None : Attempt to sort the result, but catch any TypeErrors
3658 from comparing incomparable elements.
3659 * False : Do not sort the result.
3661 Returns
3662 -------
3663 symmetric_difference : Index
3665 Notes
3666 -----
3667 ``symmetric_difference`` contains elements that appear in either
3668 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
3669 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates
3670 dropped.
3672 Examples
3673 --------
3674 >>> idx1 = pd.Index([1, 2, 3, 4])
3675 >>> idx2 = pd.Index([2, 3, 4, 5])
3676 >>> idx1.symmetric_difference(idx2)
3677 Int64Index([1, 5], dtype='int64')
3678 """
3679 self._validate_sort_keyword(sort)
3680 self._assert_can_do_setop(other)
3681 other, result_name_update = self._convert_can_do_setop(other)
3682 if result_name is None:
3683 result_name = result_name_update
3685 if not is_dtype_equal(self.dtype, other.dtype):
3686 self._deprecate_dti_setop(other, "symmetric_difference")
3688 if not self._should_compare(other):
3689 return self.union(other, sort=sort).rename(result_name)
3691 elif not is_dtype_equal(self.dtype, other.dtype):
3692 dtype = self._find_common_type_compat(other)
3693 this = self.astype(dtype, copy=False)
3694 that = other.astype(dtype, copy=False)
3695 return this.symmetric_difference(that, sort=sort).rename(result_name)
3697 this = self.unique()
3698 other = other.unique()
3699 indexer = this.get_indexer_for(other)
3701 # {this} minus {other}
3702 common_indexer = indexer.take((indexer != -1).nonzero()[0])
3703 left_indexer = np.setdiff1d(
3704 np.arange(this.size), common_indexer, assume_unique=True
3705 )
3706 left_diff = this._values.take(left_indexer)
3708 # {other} minus {this}
3709 right_indexer = (indexer == -1).nonzero()[0]
3710 right_diff = other._values.take(right_indexer)
3712 res_values = concat_compat([left_diff, right_diff])
3713 res_values = _maybe_try_sort(res_values, sort)
3715 # pass dtype so we retain object dtype
3716 result = Index(res_values, name=result_name, dtype=res_values.dtype)
3718 if self._is_multi:
3719 self = cast("MultiIndex", self)
3720 if len(result) == 0:
3721 # On equal symmetric_difference MultiIndexes the difference is empty.
3722 # Therefore, an empty MultiIndex is returned GH#13490
3723 return type(self)(
3724 levels=[[] for _ in range(self.nlevels)],
3725 codes=[[] for _ in range(self.nlevels)],
3726 names=result.name,
3727 )
3728 return type(self).from_tuples(result, names=result.name)
3730 return result
3732 @final
3733 def _assert_can_do_setop(self, other) -> bool:
3734 if not is_list_like(other):
3735 raise TypeError("Input must be Index or array-like")
3736 return True
3738 def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:
3739 if not isinstance(other, Index):
3740 # TODO(2.0): no need to special-case here once _with_infer
3741 # deprecation is enforced
3742 if hasattr(other, "dtype"):
3743 other = Index(other, name=self.name, dtype=other.dtype)
3744 else:
3745 # e.g. list
3746 other = Index(other, name=self.name)
3747 result_name = self.name
3748 else:
3749 result_name = get_op_result_name(self, other)
3750 return other, result_name
3752 # --------------------------------------------------------------------
3753 # Indexing Methods
3755 def get_loc(self, key, method=None, tolerance=None):
3756 """
3757 Get integer location, slice or boolean mask for requested label.
3759 Parameters
3760 ----------
3761 key : label
3762 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
3763 * default: exact matches only.
3764 * pad / ffill: find the PREVIOUS index value if no exact match.
3765 * backfill / bfill: use NEXT index value if no exact match
3766 * nearest: use the NEAREST index value if no exact match. Tied
3767 distances are broken by preferring the larger index value.
3769 .. deprecated:: 1.4
3770 Use index.get_indexer([item], method=...) instead.
3772 tolerance : int or float, optional
3773 Maximum distance from index value for inexact matches. The value of
3774 the index at the matching location must satisfy the equation
3775 ``abs(index[loc] - key) <= tolerance``.
3777 Returns
3778 -------
3779 loc : int if unique index, slice if monotonic index, else mask
3781 Examples
3782 --------
3783 >>> unique_index = pd.Index(list('abc'))
3784 >>> unique_index.get_loc('b')
3785 1
3787 >>> monotonic_index = pd.Index(list('abbc'))
3788 >>> monotonic_index.get_loc('b')
3789 slice(1, 3, None)
3791 >>> non_monotonic_index = pd.Index(list('abcb'))
3792 >>> non_monotonic_index.get_loc('b')
3793 array([False, True, False, True])
3794 """
3795 if method is None:
3796 if tolerance is not None:
3797 raise ValueError(
3798 "tolerance argument only valid if using pad, "
3799 "backfill or nearest lookups"
3800 )
3801 casted_key = self._maybe_cast_indexer(key)
3802 try:
3803 return self._engine.get_loc(casted_key)
3804 except KeyError as err:
3805 raise KeyError(key) from err
3806 except TypeError:
3807 # If we have a listlike key, _check_indexing_error will raise
3808 # InvalidIndexError. Otherwise we fall through and re-raise
3809 # the TypeError.
3810 self._check_indexing_error(key)
3811 raise
3813 # GH#42269
3814 warnings.warn(
3815 f"Passing method to {type(self).__name__}.get_loc is deprecated "
3816 "and will raise in a future version. Use "
3817 "index.get_indexer([item], method=...) instead.",
3818 FutureWarning,
3819 stacklevel=find_stack_level(),
3820 )
3822 if is_scalar(key) and isna(key) and not self.hasnans:
3823 raise KeyError(key)
3825 if tolerance is not None:
3826 tolerance = self._convert_tolerance(tolerance, np.asarray(key))
3828 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
3829 if indexer.ndim > 1 or indexer.size > 1:
3830 raise TypeError("get_loc requires scalar valued input")
3831 loc = indexer.item()
3832 if loc == -1:
3833 raise KeyError(key)
3834 return loc
3836 _index_shared_docs[
3837 "get_indexer"
3838 ] = """
3839 Compute indexer and mask for new index given the current index.
3841 The indexer should be then used as an input to ndarray.take to align the
3842 current data to the new index.
3844 Parameters
3845 ----------
3846 target : %(target_klass)s
3847 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
3848 * default: exact matches only.
3849 * pad / ffill: find the PREVIOUS index value if no exact match.
3850 * backfill / bfill: use NEXT index value if no exact match
3851 * nearest: use the NEAREST index value if no exact match. Tied
3852 distances are broken by preferring the larger index value.
3853 limit : int, optional
3854 Maximum number of consecutive labels in ``target`` to match for
3855 inexact matches.
3856 tolerance : optional
3857 Maximum distance between original and new labels for inexact
3858 matches. The values of the index at the matching locations must
3859 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
3861 Tolerance may be a scalar value, which applies the same tolerance
3862 to all values, or list-like, which applies variable tolerance per
3863 element. List-like includes list, tuple, array, Series, and must be
3864 the same size as the index and its dtype must exactly match the
3865 index's type.
3867 Returns
3868 -------
3869 indexer : np.ndarray[np.intp]
3870 Integers from 0 to n - 1 indicating that the index at these
3871 positions matches the corresponding target values. Missing values
3872 in the target are marked by -1.
3873 %(raises_section)s
3874 Notes
3875 -----
3876 Returns -1 for unmatched values, for further explanation see the
3877 example below.
3879 Examples
3880 --------
3881 >>> index = pd.Index(['c', 'a', 'b'])
3882 >>> index.get_indexer(['a', 'b', 'x'])
3883 array([ 1, 2, -1])
3885 Notice that the return value is an array of locations in ``index``
3886 and ``x`` is marked by -1, as it is not in ``index``.
3887 """
3889 @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
3890 @final
3891 def get_indexer(
3892 self,
3893 target,
3894 method: str_t | None = None,
3895 limit: int | None = None,
3896 tolerance=None,
3897 ) -> npt.NDArray[np.intp]:
3898 method = missing.clean_reindex_fill_method(method)
3899 orig_target = target
3900 target = self._maybe_cast_listlike_indexer(target)
3902 self._check_indexing_method(method, limit, tolerance)
3904 if not self._index_as_unique:
3905 raise InvalidIndexError(self._requires_unique_msg)
3907 if len(target) == 0:
3908 return np.array([], dtype=np.intp)
3910 if not self._should_compare(target) and not self._should_partial_index(target):
3911 # IntervalIndex get special treatment bc numeric scalars can be
3912 # matched to Interval scalars
3913 return self._get_indexer_non_comparable(target, method=method, unique=True)
3915 if is_categorical_dtype(self.dtype):
3916 # _maybe_cast_listlike_indexer ensures target has our dtype
3917 # (could improve perf by doing _should_compare check earlier?)
3918 assert is_dtype_equal(self.dtype, target.dtype)
3920 indexer = self._engine.get_indexer(target.codes)
3921 if self.hasnans and target.hasnans:
3922 # After _maybe_cast_listlike_indexer, target elements which do not
3923 # belong to some category are changed to NaNs
3924 # Mask to track actual NaN values compared to inserted NaN values
3925 # GH#45361
3926 target_nans = isna(orig_target)
3927 loc = self.get_loc(np.nan)
3928 mask = target.isna()
3929 indexer[target_nans] = loc
3930 indexer[mask & ~target_nans] = -1
3931 return indexer
3933 if is_categorical_dtype(target.dtype):
3934 # potential fastpath
3935 # get an indexer for unique categories then propagate to codes via take_nd
3936 # get_indexer instead of _get_indexer needed for MultiIndex cases
3937 # e.g. test_append_different_columns_types
3938 categories_indexer = self.get_indexer(target.categories)
3940 indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)
3942 if (not self._is_multi and self.hasnans) and target.hasnans:
3943 # Exclude MultiIndex because hasnans raises NotImplementedError
3944 # we should only get here if we are unique, so loc is an integer
3945 # GH#41934
3946 loc = self.get_loc(np.nan)
3947 mask = target.isna()
3948 indexer[mask] = loc
3950 return ensure_platform_int(indexer)
3952 pself, ptarget = self._maybe_promote(target)
3953 if pself is not self or ptarget is not target:
3954 return pself.get_indexer(
3955 ptarget, method=method, limit=limit, tolerance=tolerance
3956 )
3958 if is_dtype_equal(self.dtype, target.dtype) and self.equals(target):
3959 # Only call equals if we have same dtype to avoid inference/casting
3960 return np.arange(len(target), dtype=np.intp)
3962 if not is_dtype_equal(self.dtype, target.dtype) and not is_interval_dtype(
3963 self.dtype
3964 ):
3965 # IntervalIndex gets special treatment for partial-indexing
3966 dtype = self._find_common_type_compat(target)
3968 this = self.astype(dtype, copy=False)
3969 target = target.astype(dtype, copy=False)
3970 return this._get_indexer(
3971 target, method=method, limit=limit, tolerance=tolerance
3972 )
3974 return self._get_indexer(target, method, limit, tolerance)
3976 def _get_indexer(
3977 self,
3978 target: Index,
3979 method: str_t | None = None,
3980 limit: int | None = None,
3981 tolerance=None,
3982 ) -> npt.NDArray[np.intp]:
3983 if tolerance is not None:
3984 tolerance = self._convert_tolerance(tolerance, target)
3986 if method in ["pad", "backfill"]:
3987 indexer = self._get_fill_indexer(target, method, limit, tolerance)
3988 elif method == "nearest":
3989 indexer = self._get_nearest_indexer(target, limit, tolerance)
3990 else:
3991 if target._is_multi and self._is_multi:
3992 engine = self._engine
3993 # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]"
3994 # has no attribute "_extract_level_codes"
3995 tgt_values = engine._extract_level_codes( # type: ignore[union-attr]
3996 target
3997 )
3998 else:
3999 tgt_values = target._get_engine_target()
4001 indexer = self._engine.get_indexer(tgt_values)
4003 return ensure_platform_int(indexer)
4005 @final
4006 def _should_partial_index(self, target: Index) -> bool:
4007 """
4008 Should we attempt partial-matching indexing?
4009 """
4010 if is_interval_dtype(self.dtype):
4011 if is_interval_dtype(target.dtype):
4012 return False
4013 # See https://github.com/pandas-dev/pandas/issues/47772 the commented
4014 # out code can be restored (instead of hardcoding `return True`)
4015 # once that issue if fixed
4016 # "Index" has no attribute "left"
4017 # return self.left._should_compare(target) # type: ignore[attr-defined]
4018 return True
4019 return False
4021 @final
4022 def _check_indexing_method(
4023 self,
4024 method: str_t | None,
4025 limit: int | None = None,
4026 tolerance=None,
4027 ) -> None:
4028 """
4029 Raise if we have a get_indexer `method` that is not supported or valid.
4030 """
4031 if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]:
4032 # in practice the clean_reindex_fill_method call would raise
4033 # before we get here
4034 raise ValueError("Invalid fill method") # pragma: no cover
4036 if self._is_multi:
4037 if method == "nearest":
4038 raise NotImplementedError(
4039 "method='nearest' not implemented yet "
4040 "for MultiIndex; see GitHub issue 9365"
4041 )
4042 elif method == "pad" or method == "backfill":
4043 if tolerance is not None:
4044 raise NotImplementedError(
4045 "tolerance not implemented yet for MultiIndex"
4046 )
4048 if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype):
4049 # GH#37871 for now this is only for IntervalIndex and CategoricalIndex
4050 if method is not None:
4051 raise NotImplementedError(
4052 f"method {method} not yet implemented for {type(self).__name__}"
4053 )
4055 if method is None:
4056 if tolerance is not None:
4057 raise ValueError(
4058 "tolerance argument only valid if doing pad, "
4059 "backfill or nearest reindexing"
4060 )
4061 if limit is not None:
4062 raise ValueError(
4063 "limit argument only valid if doing pad, "
4064 "backfill or nearest reindexing"
4065 )
4067 def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray:
4068 # override this method on subclasses
4069 tolerance = np.asarray(tolerance)
4070 if target.size != tolerance.size and tolerance.size > 1:
4071 raise ValueError("list-like tolerance size must match target index size")
4072 return tolerance
4074 @final
4075 def _get_fill_indexer(
4076 self, target: Index, method: str_t, limit: int | None = None, tolerance=None
4077 ) -> npt.NDArray[np.intp]:
4079 if self._is_multi:
4080 # TODO: get_indexer_with_fill docstring says values must be _sorted_
4081 # but that doesn't appear to be enforced
4082 # error: "IndexEngine" has no attribute "get_indexer_with_fill"
4083 engine = self._engine
4084 return engine.get_indexer_with_fill( # type: ignore[union-attr]
4085 target=target._values, values=self._values, method=method, limit=limit
4086 )
4088 if self.is_monotonic_increasing and target.is_monotonic_increasing:
4089 target_values = target._get_engine_target()
4090 own_values = self._get_engine_target()
4091 if not isinstance(target_values, np.ndarray) or not isinstance(
4092 own_values, np.ndarray
4093 ):
4094 raise NotImplementedError
4096 if method == "pad":
4097 indexer = libalgos.pad(own_values, target_values, limit=limit)
4098 else:
4099 # i.e. "backfill"
4100 indexer = libalgos.backfill(own_values, target_values, limit=limit)
4101 else:
4102 indexer = self._get_fill_indexer_searchsorted(target, method, limit)
4103 if tolerance is not None and len(self):
4104 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
4105 return indexer
4107 @final
4108 def _get_fill_indexer_searchsorted(
4109 self, target: Index, method: str_t, limit: int | None = None
4110 ) -> npt.NDArray[np.intp]:
4111 """
4112 Fallback pad/backfill get_indexer that works for monotonic decreasing
4113 indexes and non-monotonic targets.
4114 """
4115 if limit is not None:
4116 raise ValueError(
4117 f"limit argument for {repr(method)} method only well-defined "
4118 "if index and target are monotonic"
4119 )
4121 side: Literal["left", "right"] = "left" if method == "pad" else "right"
4123 # find exact matches first (this simplifies the algorithm)
4124 indexer = self.get_indexer(target)
4125 nonexact = indexer == -1
4126 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)
4127 if side == "left":
4128 # searchsorted returns "indices into a sorted array such that,
4129 # if the corresponding elements in v were inserted before the
4130 # indices, the order of a would be preserved".
4131 # Thus, we need to subtract 1 to find values to the left.
4132 indexer[nonexact] -= 1
4133 # This also mapped not found values (values of 0 from
4134 # np.searchsorted) to -1, which conveniently is also our
4135 # sentinel for missing values
4136 else:
4137 # Mark indices to the right of the largest value as not found
4138 indexer[indexer == len(self)] = -1
4139 return indexer
4141 @final
4142 def _get_nearest_indexer(
4143 self, target: Index, limit: int | None, tolerance
4144 ) -> npt.NDArray[np.intp]:
4145 """
4146 Get the indexer for the nearest index labels; requires an index with
4147 values that can be subtracted from each other (e.g., not strings or
4148 tuples).
4149 """
4150 if not len(self):
4151 return self._get_fill_indexer(target, "pad")
4153 left_indexer = self.get_indexer(target, "pad", limit=limit)
4154 right_indexer = self.get_indexer(target, "backfill", limit=limit)
4156 left_distances = self._difference_compat(target, left_indexer)
4157 right_distances = self._difference_compat(target, right_indexer)
4159 op = operator.lt if self.is_monotonic_increasing else operator.le
4160 indexer = np.where(
4161 # error: Argument 1&2 has incompatible type "Union[ExtensionArray,
4162 # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE,
4163 # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]"
4164 op(left_distances, right_distances) # type: ignore[arg-type]
4165 | (right_indexer == -1),
4166 left_indexer,
4167 right_indexer,
4168 )
4169 if tolerance is not None:
4170 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
4171 return indexer
4173 @final
4174 def _filter_indexer_tolerance(
4175 self,
4176 target: Index,
4177 indexer: npt.NDArray[np.intp],
4178 tolerance,
4179 ) -> npt.NDArray[np.intp]:
4181 distance = self._difference_compat(target, indexer)
4183 return np.where(distance <= tolerance, indexer, -1)
4185 @final
4186 def _difference_compat(
4187 self, target: Index, indexer: npt.NDArray[np.intp]
4188 ) -> ArrayLike:
4189 # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object]
4190 # of DateOffset objects, which do not support __abs__ (and would be slow
4191 # if they did)
4193 if isinstance(self.dtype, PeriodDtype):
4194 # Note: we only get here with matching dtypes
4195 own_values = cast("PeriodArray", self._data)._ndarray
4196 target_values = cast("PeriodArray", target._data)._ndarray
4197 diff = own_values[indexer] - target_values
4198 else:
4199 # error: Unsupported left operand type for - ("ExtensionArray")
4200 diff = self._values[indexer] - target._values # type: ignore[operator]
4201 return abs(diff)
4203 # --------------------------------------------------------------------
4204 # Indexer Conversion Methods
4206 @final
4207 def _validate_positional_slice(self, key: slice) -> None:
4208 """
4209 For positional indexing, a slice must have either int or None
4210 for each of start, stop, and step.
4211 """
4212 self._validate_indexer("positional", key.start, "iloc")
4213 self._validate_indexer("positional", key.stop, "iloc")
4214 self._validate_indexer("positional", key.step, "iloc")
4216 def _convert_slice_indexer(self, key: slice, kind: str_t, is_frame: bool = False):
4217 """
4218 Convert a slice indexer.
4220 By definition, these are labels unless 'iloc' is passed in.
4221 Floats are not allowed as the start, step, or stop of the slice.
4223 Parameters
4224 ----------
4225 key : label of the slice bound
4226 kind : {'loc', 'getitem'}
4227 is_frame : bool, default False
4228 Whether this is a slice called on DataFrame.__getitem__
4229 as opposed to Series.__getitem__
4230 """
4231 assert kind in ["loc", "getitem"], kind
4233 # potentially cast the bounds to integers
4234 start, stop, step = key.start, key.stop, key.step
4236 # figure out if this is a positional indexer
4237 def is_int(v):
4238 return v is None or is_integer(v)
4240 is_index_slice = is_int(start) and is_int(stop) and is_int(step)
4242 # special case for interval_dtype bc we do not do partial-indexing
4243 # on integer Intervals when slicing
4244 # TODO: write this in terms of e.g. should_partial_index?
4245 ints_are_positional = self._should_fallback_to_positional or is_interval_dtype(
4246 self.dtype
4247 )
4248 is_positional = is_index_slice and ints_are_positional
4250 if kind == "getitem":
4251 """
4252 called from the getitem slicers, validate that we are in fact
4253 integers
4254 """
4255 if self.is_integer():
4256 if is_frame:
4257 # unambiguously positional, no deprecation
4258 pass
4259 elif start is None and stop is None:
4260 # label-based vs positional is irrelevant
4261 pass
4262 elif isinstance(self, ABCRangeIndex) and self._range == range(
4263 len(self)
4264 ):
4265 # In this case there is no difference between label-based
4266 # and positional, so nothing will change.
4267 pass
4268 elif (
4269 self.dtype.kind in ["i", "u"]
4270 and self._is_strictly_monotonic_increasing
4271 and len(self) > 0
4272 and self[0] == 0
4273 and self[-1] == len(self) - 1
4274 ):
4275 # We are range-like, e.g. created with Index(np.arange(N))
4276 pass
4277 elif not is_index_slice:
4278 # we're going to raise, so don't bother warning, e.g.
4279 # test_integer_positional_indexing
4280 pass
4281 else:
4282 warnings.warn(
4283 "The behavior of `series[i:j]` with an integer-dtype index "
4284 "is deprecated. In a future version, this will be treated "
4285 "as *label-based* indexing, consistent with e.g. `series[i]` "
4286 "lookups. To retain the old behavior, use `series.iloc[i:j]`. "
4287 "To get the future behavior, use `series.loc[i:j]`.",
4288 FutureWarning,
4289 stacklevel=find_stack_level(),
4290 )
4291 if self.is_integer() or is_index_slice:
4292 # Note: these checks are redundant if we know is_index_slice
4293 self._validate_indexer("slice", key.start, "getitem")
4294 self._validate_indexer("slice", key.stop, "getitem")
4295 self._validate_indexer("slice", key.step, "getitem")
4296 return key
4298 # convert the slice to an indexer here
4300 # if we are mixed and have integers
4301 if is_positional:
4302 try:
4303 # Validate start & stop
4304 if start is not None:
4305 self.get_loc(start)
4306 if stop is not None:
4307 self.get_loc(stop)
4308 is_positional = False
4309 except KeyError:
4310 pass
4312 if com.is_null_slice(key):
4313 # It doesn't matter if we are positional or label based
4314 indexer = key
4315 elif is_positional:
4316 if kind == "loc":
4317 # GH#16121, GH#24612, GH#31810
4318 warnings.warn(
4319 "Slicing a positional slice with .loc is not supported, "
4320 "and will raise TypeError in a future version. "
4321 "Use .loc with labels or .iloc with positions instead.",
4322 FutureWarning,
4323 stacklevel=find_stack_level(),
4324 )
4325 indexer = key
4326 else:
4327 indexer = self.slice_indexer(start, stop, step)
4329 return indexer
4331 @final
4332 def _invalid_indexer(self, form: str_t, key) -> TypeError:
4333 """
4334 Consistent invalid indexer message.
4335 """
4336 return TypeError(
4337 f"cannot do {form} indexing on {type(self).__name__} with these "
4338 f"indexers [{key}] of type {type(key).__name__}"
4339 )
4341 # --------------------------------------------------------------------
4342 # Reindex Methods
4344 @final
4345 def _validate_can_reindex(self, indexer: np.ndarray) -> None:
4346 """
4347 Check if we are allowing reindexing with this particular indexer.
4349 Parameters
4350 ----------
4351 indexer : an integer ndarray
4353 Raises
4354 ------
4355 ValueError if its a duplicate axis
4356 """
4357 # trying to reindex on an axis with duplicates
4358 if not self._index_as_unique and len(indexer):
4359 raise ValueError("cannot reindex on an axis with duplicate labels")
4361 def reindex(
4362 self, target, method=None, level=None, limit=None, tolerance=None
4363 ) -> tuple[Index, npt.NDArray[np.intp] | None]:
4364 """
4365 Create index with target's values.
4367 Parameters
4368 ----------
4369 target : an iterable
4370 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
4371 * default: exact matches only.
4372 * pad / ffill: find the PREVIOUS index value if no exact match.
4373 * backfill / bfill: use NEXT index value if no exact match
4374 * nearest: use the NEAREST index value if no exact match. Tied
4375 distances are broken by preferring the larger index value.
4376 level : int, optional
4377 Level of multiindex.
4378 limit : int, optional
4379 Maximum number of consecutive labels in ``target`` to match for
4380 inexact matches.
4381 tolerance : int or float, optional
4382 Maximum distance between original and new labels for inexact
4383 matches. The values of the index at the matching locations must
4384 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
4386 Tolerance may be a scalar value, which applies the same tolerance
4387 to all values, or list-like, which applies variable tolerance per
4388 element. List-like includes list, tuple, array, Series, and must be
4389 the same size as the index and its dtype must exactly match the
4390 index's type.
4392 Returns
4393 -------
4394 new_index : pd.Index
4395 Resulting index.
4396 indexer : np.ndarray[np.intp] or None
4397 Indices of output values in original index.
4399 Raises
4400 ------
4401 TypeError
4402 If ``method`` passed along with ``level``.
4403 ValueError
4404 If non-unique multi-index
4405 ValueError
4406 If non-unique index and ``method`` or ``limit`` passed.
4408 See Also
4409 --------
4410 Series.reindex : Conform Series to new index with optional filling logic.
4411 DataFrame.reindex : Conform DataFrame to new index with optional filling logic.
4413 Examples
4414 --------
4415 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
4416 >>> idx
4417 Index(['car', 'bike', 'train', 'tractor'], dtype='object')
4418 >>> idx.reindex(['car', 'bike'])
4419 (Index(['car', 'bike'], dtype='object'), array([0, 1]))
4420 """
4421 # GH6552: preserve names when reindexing to non-named target
4422 # (i.e. neither Index nor Series).
4423 preserve_names = not hasattr(target, "name")
4425 # GH7774: preserve dtype/tz if target is empty and not an Index.
4426 target = ensure_has_len(target) # target may be an iterator
4428 if not isinstance(target, Index) and len(target) == 0:
4429 if level is not None and self._is_multi:
4430 # "Index" has no attribute "levels"; maybe "nlevels"?
4431 idx = self.levels[level] # type: ignore[attr-defined]
4432 else:
4433 idx = self
4434 target = idx[:0]
4435 else:
4436 target = ensure_index(target)
4438 if level is not None and (
4439 isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex)
4440 ):
4441 if method is not None:
4442 raise TypeError("Fill method not supported if level passed")
4444 # TODO: tests where passing `keep_order=not self._is_multi`
4445 # makes a difference for non-MultiIndex case
4446 target, indexer, _ = self._join_level(
4447 target, level, how="right", keep_order=not self._is_multi
4448 )
4450 else:
4451 if self.equals(target):
4452 indexer = None
4453 else:
4454 if self._index_as_unique:
4455 indexer = self.get_indexer(
4456 target, method=method, limit=limit, tolerance=tolerance
4457 )
4458 elif self._is_multi:
4459 raise ValueError("cannot handle a non-unique multi-index!")
4460 else:
4461 if method is not None or limit is not None:
4462 raise ValueError(
4463 "cannot reindex a non-unique index "
4464 "with a method or limit"
4465 )
4466 indexer, _ = self.get_indexer_non_unique(target)
4468 if not self.is_unique:
4469 # GH#42568
4470 warnings.warn(
4471 "reindexing with a non-unique Index is deprecated and "
4472 "will raise in a future version.",
4473 FutureWarning,
4474 stacklevel=find_stack_level(),
4475 )
4477 target = self._wrap_reindex_result(target, indexer, preserve_names)
4478 return target, indexer
4480 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
4481 target = self._maybe_preserve_names(target, preserve_names)
4482 return target
4484 def _maybe_preserve_names(self, target: Index, preserve_names: bool):
4485 if preserve_names and target.nlevels == 1 and target.name != self.name:
4486 target = target.copy(deep=False)
4487 target.name = self.name
4488 return target
4490 @final
4491 def _reindex_non_unique(
4492 self, target: Index
4493 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]:
4494 """
4495 Create a new index with target's values (move/add/delete values as
4496 necessary) use with non-unique Index and a possibly non-unique target.
4498 Parameters
4499 ----------
4500 target : an iterable
4502 Returns
4503 -------
4504 new_index : pd.Index
4505 Resulting index.
4506 indexer : np.ndarray[np.intp]
4507 Indices of output values in original index.
4508 new_indexer : np.ndarray[np.intp] or None
4510 """
4511 target = ensure_index(target)
4512 if len(target) == 0:
4513 # GH#13691
4514 return self[:0], np.array([], dtype=np.intp), None
4516 indexer, missing = self.get_indexer_non_unique(target)
4517 check = indexer != -1
4518 new_labels = self.take(indexer[check])
4519 new_indexer = None
4521 if len(missing):
4522 length = np.arange(len(indexer), dtype=np.intp)
4524 missing = ensure_platform_int(missing)
4525 missing_labels = target.take(missing)
4526 missing_indexer = length[~check]
4527 cur_labels = self.take(indexer[check]).values
4528 cur_indexer = length[check]
4530 # Index constructor below will do inference
4531 new_labels = np.empty((len(indexer),), dtype=object)
4532 new_labels[cur_indexer] = cur_labels
4533 new_labels[missing_indexer] = missing_labels
4535 # GH#38906
4536 if not len(self):
4538 new_indexer = np.arange(0, dtype=np.intp)
4540 # a unique indexer
4541 elif target.is_unique:
4543 # see GH5553, make sure we use the right indexer
4544 new_indexer = np.arange(len(indexer), dtype=np.intp)
4545 new_indexer[cur_indexer] = np.arange(len(cur_labels))
4546 new_indexer[missing_indexer] = -1
4548 # we have a non_unique selector, need to use the original
4549 # indexer here
4550 else:
4552 # need to retake to have the same size as the indexer
4553 indexer[~check] = -1
4555 # reset the new indexer to account for the new size
4556 new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp)
4557 new_indexer[~check] = -1
4559 if isinstance(self, ABCMultiIndex):
4560 new_index = type(self).from_tuples(new_labels, names=self.names)
4561 else:
4562 new_index = Index._with_infer(new_labels, name=self.name)
4563 return new_index, indexer, new_indexer
4565 # --------------------------------------------------------------------
4566 # Join Methods
4568 @overload
4569 def join(
4570 self,
4571 other: Index,
4572 *,
4573 how: str_t = ...,
4574 level: Level = ...,
4575 return_indexers: Literal[True],
4576 sort: bool = ...,
4577 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4578 ...
4580 @overload
4581 def join(
4582 self,
4583 other: Index,
4584 *,
4585 how: str_t = ...,
4586 level: Level = ...,
4587 return_indexers: Literal[False] = ...,
4588 sort: bool = ...,
4589 ) -> Index:
4590 ...
4592 @overload
4593 def join(
4594 self,
4595 other: Index,
4596 *,
4597 how: str_t = ...,
4598 level: Level = ...,
4599 return_indexers: bool = ...,
4600 sort: bool = ...,
4601 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4602 ...
4604 @final
4605 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "other"])
4606 @_maybe_return_indexers
4607 def join(
4608 self,
4609 other: Index,
4610 how: str_t = "left",
4611 level: Level = None,
4612 return_indexers: bool = False,
4613 sort: bool = False,
4614 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4615 """
4616 Compute join_index and indexers to conform data structures to the new index.
4618 Parameters
4619 ----------
4620 other : Index
4621 how : {'left', 'right', 'inner', 'outer'}
4622 level : int or level name, default None
4623 return_indexers : bool, default False
4624 sort : bool, default False
4625 Sort the join keys lexicographically in the result Index. If False,
4626 the order of the join keys depends on the join type (how keyword).
4628 Returns
4629 -------
4630 join_index, (left_indexer, right_indexer)
4631 """
4632 other = ensure_index(other)
4634 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):
4635 if (self.tz is None) ^ (other.tz is None):
4636 # Raise instead of casting to object below.
4637 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
4639 if not self._is_multi and not other._is_multi:
4640 # We have specific handling for MultiIndex below
4641 pself, pother = self._maybe_promote(other)
4642 if pself is not self or pother is not other:
4643 return pself.join(
4644 pother, how=how, level=level, return_indexers=True, sort=sort
4645 )
4647 lindexer: np.ndarray | None
4648 rindexer: np.ndarray | None
4650 # try to figure out the join level
4651 # GH3662
4652 if level is None and (self._is_multi or other._is_multi):
4654 # have the same levels/names so a simple join
4655 if self.names == other.names:
4656 pass
4657 else:
4658 return self._join_multi(other, how=how)
4660 # join on the level
4661 if level is not None and (self._is_multi or other._is_multi):
4662 return self._join_level(other, level, how=how)
4664 if len(other) == 0:
4665 if how in ("left", "outer"):
4666 join_index = self._view()
4667 rindexer = np.broadcast_to(np.intp(-1), len(join_index))
4668 return join_index, None, rindexer
4669 elif how in ("right", "inner", "cross"):
4670 join_index = other._view()
4671 lindexer = np.array([])
4672 return join_index, lindexer, None
4674 if len(self) == 0:
4675 if how in ("right", "outer"):
4676 join_index = other._view()
4677 lindexer = np.broadcast_to(np.intp(-1), len(join_index))
4678 return join_index, lindexer, None
4679 elif how in ("left", "inner", "cross"):
4680 join_index = self._view()
4681 rindexer = np.array([])
4682 return join_index, None, rindexer
4684 if self._join_precedence < other._join_precedence:
4685 how = {"right": "left", "left": "right"}.get(how, how)
4686 join_index, lidx, ridx = other.join(
4687 self, how=how, level=level, return_indexers=True
4688 )
4689 lidx, ridx = ridx, lidx
4690 return join_index, lidx, ridx
4692 if not is_dtype_equal(self.dtype, other.dtype):
4693 dtype = self._find_common_type_compat(other)
4694 this = self.astype(dtype, copy=False)
4695 other = other.astype(dtype, copy=False)
4696 return this.join(other, how=how, return_indexers=True)
4698 _validate_join_method(how)
4700 if not self.is_unique and not other.is_unique:
4701 return self._join_non_unique(other, how=how)
4702 elif not self.is_unique or not other.is_unique:
4703 if self.is_monotonic_increasing and other.is_monotonic_increasing:
4704 if not is_interval_dtype(self.dtype):
4705 # otherwise we will fall through to _join_via_get_indexer
4706 # GH#39133
4707 # go through object dtype for ea till engine is supported properly
4708 return self._join_monotonic(other, how=how)
4709 else:
4710 return self._join_non_unique(other, how=how)
4711 elif (
4712 self.is_monotonic_increasing
4713 and other.is_monotonic_increasing
4714 and self._can_use_libjoin
4715 and (
4716 not isinstance(self, ABCMultiIndex)
4717 or not any(is_categorical_dtype(dtype) for dtype in self.dtypes)
4718 )
4719 and not is_categorical_dtype(self.dtype)
4720 ):
4721 # Categorical is monotonic if data are ordered as categories, but join can
4722 # not handle this in case of not lexicographically monotonic GH#38502
4723 try:
4724 return self._join_monotonic(other, how=how)
4725 except TypeError:
4726 # object dtype; non-comparable objects
4727 pass
4729 return self._join_via_get_indexer(other, how, sort)
4731 @final
4732 def _join_via_get_indexer(
4733 self, other: Index, how: str_t, sort: bool
4734 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4735 # Fallback if we do not have any fastpaths available based on
4736 # uniqueness/monotonicity
4738 # Note: at this point we have checked matching dtypes
4740 if how == "left":
4741 join_index = self
4742 elif how == "right":
4743 join_index = other
4744 elif how == "inner":
4745 # TODO: sort=False here for backwards compat. It may
4746 # be better to use the sort parameter passed into join
4747 join_index = self.intersection(other, sort=False)
4748 elif how == "outer":
4749 # TODO: sort=True here for backwards compat. It may
4750 # be better to use the sort parameter passed into join
4751 join_index = self.union(other)
4753 if sort:
4754 join_index = join_index.sort_values()
4756 if join_index is self:
4757 lindexer = None
4758 else:
4759 lindexer = self.get_indexer_for(join_index)
4760 if join_index is other:
4761 rindexer = None
4762 else:
4763 rindexer = other.get_indexer_for(join_index)
4764 return join_index, lindexer, rindexer
4766 @final
4767 def _join_multi(self, other: Index, how: str_t):
4768 from pandas.core.indexes.multi import MultiIndex
4769 from pandas.core.reshape.merge import restore_dropped_levels_multijoin
4771 # figure out join names
4772 self_names_list = list(com.not_none(*self.names))
4773 other_names_list = list(com.not_none(*other.names))
4774 self_names_order = self_names_list.index
4775 other_names_order = other_names_list.index
4776 self_names = set(self_names_list)
4777 other_names = set(other_names_list)
4778 overlap = self_names & other_names
4780 # need at least 1 in common
4781 if not overlap:
4782 raise ValueError("cannot join with no overlapping index names")
4784 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
4786 # Drop the non-matching levels from left and right respectively
4787 ldrop_names = sorted(self_names - overlap, key=self_names_order)
4788 rdrop_names = sorted(other_names - overlap, key=other_names_order)
4790 # if only the order differs
4791 if not len(ldrop_names + rdrop_names):
4792 self_jnlevels = self
4793 other_jnlevels = other.reorder_levels(self.names)
4794 else:
4795 self_jnlevels = self.droplevel(ldrop_names)
4796 other_jnlevels = other.droplevel(rdrop_names)
4798 # Join left and right
4799 # Join on same leveled multi-index frames is supported
4800 join_idx, lidx, ridx = self_jnlevels.join(
4801 other_jnlevels, how=how, return_indexers=True
4802 )
4804 # Restore the dropped levels
4805 # Returned index level order is
4806 # common levels, ldrop_names, rdrop_names
4807 dropped_names = ldrop_names + rdrop_names
4809 # error: Argument 5/6 to "restore_dropped_levels_multijoin" has
4810 # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any
4811 # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]"
4812 levels, codes, names = restore_dropped_levels_multijoin(
4813 self,
4814 other,
4815 dropped_names,
4816 join_idx,
4817 lidx, # type: ignore[arg-type]
4818 ridx, # type: ignore[arg-type]
4819 )
4821 # Re-create the multi-index
4822 multi_join_idx = MultiIndex(
4823 levels=levels, codes=codes, names=names, verify_integrity=False
4824 )
4826 multi_join_idx = multi_join_idx.remove_unused_levels()
4828 return multi_join_idx, lidx, ridx
4830 jl = list(overlap)[0]
4832 # Case where only one index is multi
4833 # make the indices into mi's that match
4834 flip_order = False
4835 if isinstance(self, MultiIndex):
4836 self, other = other, self
4837 flip_order = True
4838 # flip if join method is right or left
4839 how = {"right": "left", "left": "right"}.get(how, how)
4841 level = other.names.index(jl)
4842 result = self._join_level(other, level, how=how)
4844 if flip_order:
4845 return result[0], result[2], result[1]
4846 return result
4848 @final
4849 def _join_non_unique(
4850 self, other: Index, how: str_t = "left"
4851 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
4852 from pandas.core.reshape.merge import get_join_indexers
4854 # We only get here if dtypes match
4855 assert self.dtype == other.dtype
4857 left_idx, right_idx = get_join_indexers(
4858 [self._values], [other._values], how=how, sort=True
4859 )
4860 mask = left_idx == -1
4862 join_array = self._values.take(left_idx)
4863 right = other._values.take(right_idx)
4865 if isinstance(join_array, np.ndarray):
4866 # error: Argument 3 to "putmask" has incompatible type
4867 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected
4868 # "Union[_SupportsArray[dtype[Any]], _NestedSequence[
4869 # _SupportsArray[dtype[Any]]], bool, int, float, complex,
4870 # str, bytes, _NestedSequence[Union[bool, int, float,
4871 # complex, str, bytes]]]"
4872 np.putmask(join_array, mask, right) # type: ignore[arg-type]
4873 else:
4874 join_array._putmask(mask, right)
4876 join_index = self._wrap_joined_index(join_array, other)
4878 return join_index, left_idx, right_idx
4880 @final
4881 def _join_level(
4882 self, other: Index, level, how: str_t = "left", keep_order: bool = True
4883 ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4884 """
4885 The join method *only* affects the level of the resulting
4886 MultiIndex. Otherwise it just exactly aligns the Index data to the
4887 labels of the level in the MultiIndex.
4889 If ```keep_order == True```, the order of the data indexed by the
4890 MultiIndex will not be changed; otherwise, it will tie out
4891 with `other`.
4892 """
4893 from pandas.core.indexes.multi import MultiIndex
4895 def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:
4896 """
4897 Returns sorter for the inner most level while preserving the
4898 order of higher levels.
4900 Parameters
4901 ----------
4902 labels : list[np.ndarray]
4903 Each ndarray has signed integer dtype, not necessarily identical.
4905 Returns
4906 -------
4907 np.ndarray[np.intp]
4908 """
4909 if labels[0].size == 0:
4910 return np.empty(0, dtype=np.intp)
4912 if len(labels) == 1:
4913 return get_group_index_sorter(ensure_platform_int(labels[0]))
4915 # find indexers of beginning of each set of
4916 # same-key labels w.r.t all but last level
4917 tic = labels[0][:-1] != labels[0][1:]
4918 for lab in labels[1:-1]:
4919 tic |= lab[:-1] != lab[1:]
4921 starts = np.hstack(([True], tic, [True])).nonzero()[0]
4922 lab = ensure_int64(labels[-1])
4923 return lib.get_level_sorter(lab, ensure_platform_int(starts))
4925 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
4926 raise TypeError("Join on level between two MultiIndex objects is ambiguous")
4928 left, right = self, other
4930 flip_order = not isinstance(self, MultiIndex)
4931 if flip_order:
4932 left, right = right, left
4933 how = {"right": "left", "left": "right"}.get(how, how)
4935 assert isinstance(left, MultiIndex)
4937 level = left._get_level_number(level)
4938 old_level = left.levels[level]
4940 if not right.is_unique:
4941 raise NotImplementedError(
4942 "Index._join_level on non-unique index is not implemented"
4943 )
4945 new_level, left_lev_indexer, right_lev_indexer = old_level.join(
4946 right, how=how, return_indexers=True
4947 )
4949 if left_lev_indexer is None:
4950 if keep_order or len(left) == 0:
4951 left_indexer = None
4952 join_index = left
4953 else: # sort the leaves
4954 left_indexer = _get_leaf_sorter(left.codes[: level + 1])
4955 join_index = left[left_indexer]
4957 else:
4958 left_lev_indexer = ensure_platform_int(left_lev_indexer)
4959 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))
4960 old_codes = left.codes[level]
4962 taker = old_codes[old_codes != -1]
4963 new_lev_codes = rev_indexer.take(taker)
4965 new_codes = list(left.codes)
4966 new_codes[level] = new_lev_codes
4968 new_levels = list(left.levels)
4969 new_levels[level] = new_level
4971 if keep_order: # just drop missing values. o.w. keep order
4972 left_indexer = np.arange(len(left), dtype=np.intp)
4973 left_indexer = cast(np.ndarray, left_indexer)
4974 mask = new_lev_codes != -1
4975 if not mask.all():
4976 new_codes = [lab[mask] for lab in new_codes]
4977 left_indexer = left_indexer[mask]
4979 else: # tie out the order with other
4980 if level == 0: # outer most level, take the fast route
4981 max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max()
4982 ngroups = 1 + max_new_lev
4983 left_indexer, counts = libalgos.groupsort_indexer(
4984 new_lev_codes, ngroups
4985 )
4987 # missing values are placed first; drop them!
4988 left_indexer = left_indexer[counts[0] :]
4989 new_codes = [lab[left_indexer] for lab in new_codes]
4991 else: # sort the leaves
4992 mask = new_lev_codes != -1
4993 mask_all = mask.all()
4994 if not mask_all:
4995 new_codes = [lab[mask] for lab in new_codes]
4997 left_indexer = _get_leaf_sorter(new_codes[: level + 1])
4998 new_codes = [lab[left_indexer] for lab in new_codes]
5000 # left_indexers are w.r.t masked frame.
5001 # reverse to original frame!
5002 if not mask_all:
5003 left_indexer = mask.nonzero()[0][left_indexer]
5005 join_index = MultiIndex(
5006 levels=new_levels,
5007 codes=new_codes,
5008 names=left.names,
5009 verify_integrity=False,
5010 )
5012 if right_lev_indexer is not None:
5013 right_indexer = right_lev_indexer.take(join_index.codes[level])
5014 else:
5015 right_indexer = join_index.codes[level]
5017 if flip_order:
5018 left_indexer, right_indexer = right_indexer, left_indexer
5020 left_indexer = (
5021 None if left_indexer is None else ensure_platform_int(left_indexer)
5022 )
5023 right_indexer = (
5024 None if right_indexer is None else ensure_platform_int(right_indexer)
5025 )
5026 return join_index, left_indexer, right_indexer
5028 @final
5029 def _join_monotonic(
5030 self, other: Index, how: str_t = "left"
5031 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
5032 # We only get here with matching dtypes and both monotonic increasing
5033 assert other.dtype == self.dtype
5035 if self.equals(other):
5036 ret_index = other if how == "right" else self
5037 return ret_index, None, None
5039 ridx: np.ndarray | None
5040 lidx: np.ndarray | None
5042 if self.is_unique and other.is_unique:
5043 # We can perform much better than the general case
5044 if how == "left":
5045 join_index = self
5046 lidx = None
5047 ridx = self._left_indexer_unique(other)
5048 elif how == "right":
5049 join_index = other
5050 lidx = other._left_indexer_unique(self)
5051 ridx = None
5052 elif how == "inner":
5053 join_array, lidx, ridx = self._inner_indexer(other)
5054 join_index = self._wrap_joined_index(join_array, other)
5055 elif how == "outer":
5056 join_array, lidx, ridx = self._outer_indexer(other)
5057 join_index = self._wrap_joined_index(join_array, other)
5058 else:
5059 if how == "left":
5060 join_array, lidx, ridx = self._left_indexer(other)
5061 elif how == "right":
5062 join_array, ridx, lidx = other._left_indexer(self)
5063 elif how == "inner":
5064 join_array, lidx, ridx = self._inner_indexer(other)
5065 elif how == "outer":
5066 join_array, lidx, ridx = self._outer_indexer(other)
5068 join_index = self._wrap_joined_index(join_array, other)
5070 lidx = None if lidx is None else ensure_platform_int(lidx)
5071 ridx = None if ridx is None else ensure_platform_int(ridx)
5072 return join_index, lidx, ridx
5074 def _wrap_joined_index(self: _IndexT, joined: ArrayLike, other: _IndexT) -> _IndexT:
5075 assert other.dtype == self.dtype
5077 if isinstance(self, ABCMultiIndex):
5078 name = self.names if self.names == other.names else None
5079 # error: Incompatible return value type (got "MultiIndex",
5080 # expected "_IndexT")
5081 return self._constructor(joined, name=name) # type: ignore[return-value]
5082 else:
5083 name = get_op_result_name(self, other)
5084 return self._constructor._with_infer(joined, name=name, dtype=self.dtype)
5086 @cache_readonly
5087 def _can_use_libjoin(self) -> bool:
5088 """
5089 Whether we can use the fastpaths implement in _libs.join
5090 """
5091 if type(self) is Index:
5092 # excludes EAs
5093 return isinstance(self.dtype, np.dtype)
5094 return not is_interval_dtype(self.dtype)
5096 # --------------------------------------------------------------------
5097 # Uncategorized Methods
5099 @property
5100 def values(self) -> ArrayLike:
5101 """
5102 Return an array representing the data in the Index.
5104 .. warning::
5106 We recommend using :attr:`Index.array` or
5107 :meth:`Index.to_numpy`, depending on whether you need
5108 a reference to the underlying data or a NumPy array.
5110 Returns
5111 -------
5112 array: numpy.ndarray or ExtensionArray
5114 See Also
5115 --------
5116 Index.array : Reference to the underlying data.
5117 Index.to_numpy : A NumPy array representing the underlying data.
5118 """
5119 return self._data
5121 # error: Decorated property not supported
5122 # https://github.com/python/mypy/issues/1362
5123 @cache_readonly # type: ignore[misc]
5124 @doc(IndexOpsMixin.array)
5125 def array(self) -> ExtensionArray:
5126 array = self._data
5127 if isinstance(array, np.ndarray):
5128 from pandas.core.arrays.numpy_ import PandasArray
5130 array = PandasArray(array)
5131 return array
5133 @property
5134 def _values(self) -> ExtensionArray | np.ndarray:
5135 """
5136 The best array representation.
5138 This is an ndarray or ExtensionArray.
5140 ``_values`` are consistent between ``Series`` and ``Index``.
5142 It may differ from the public '.values' method.
5144 index | values | _values |
5145 ----------------- | --------------- | ------------- |
5146 Index | ndarray | ndarray |
5147 CategoricalIndex | Categorical | Categorical |
5148 DatetimeIndex | ndarray[M8ns] | DatetimeArray |
5149 DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |
5150 PeriodIndex | ndarray[object] | PeriodArray |
5151 IntervalIndex | IntervalArray | IntervalArray |
5153 See Also
5154 --------
5155 values : Values
5156 """
5157 return self._data
5159 def _get_engine_target(self) -> ArrayLike:
5160 """
5161 Get the ndarray or ExtensionArray that we can pass to the IndexEngine
5162 constructor.
5163 """
5164 vals = self._values
5165 if isinstance(vals, StringArray):
5166 # GH#45652 much more performant than ExtensionEngine
5167 return vals._ndarray
5168 if type(self) is Index and isinstance(self._values, ExtensionArray):
5169 # TODO(ExtensionIndex): remove special-case, just use self._values
5170 return self._values.astype(object)
5171 return vals
5173 def _from_join_target(self, result: np.ndarray) -> ArrayLike:
5174 """
5175 Cast the ndarray returned from one of the libjoin.foo_indexer functions
5176 back to type(self)._data.
5177 """
5178 return result
5180 @doc(IndexOpsMixin._memory_usage)
5181 def memory_usage(self, deep: bool = False) -> int:
5182 result = self._memory_usage(deep=deep)
5184 # include our engine hashtable
5185 result += self._engine.sizeof(deep=deep)
5186 return result
5188 @final
5189 def where(self, cond, other=None) -> Index:
5190 """
5191 Replace values where the condition is False.
5193 The replacement is taken from other.
5195 Parameters
5196 ----------
5197 cond : bool array-like with the same length as self
5198 Condition to select the values on.
5199 other : scalar, or array-like, default None
5200 Replacement if the condition is False.
5202 Returns
5203 -------
5204 pandas.Index
5205 A copy of self with values replaced from other
5206 where the condition is False.
5208 See Also
5209 --------
5210 Series.where : Same method for Series.
5211 DataFrame.where : Same method for DataFrame.
5213 Examples
5214 --------
5215 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
5216 >>> idx
5217 Index(['car', 'bike', 'train', 'tractor'], dtype='object')
5218 >>> idx.where(idx.isin(['car', 'train']), 'other')
5219 Index(['car', 'other', 'train', 'other'], dtype='object')
5220 """
5221 if isinstance(self, ABCMultiIndex):
5222 raise NotImplementedError(
5223 ".where is not supported for MultiIndex operations"
5224 )
5225 cond = np.asarray(cond, dtype=bool)
5226 return self.putmask(~cond, other)
5228 # construction helpers
5229 @final
5230 @classmethod
5231 def _scalar_data_error(cls, data):
5232 # We return the TypeError so that we can raise it from the constructor
5233 # in order to keep mypy happy
5234 return TypeError(
5235 f"{cls.__name__}(...) must be called with a collection of some "
5236 f"kind, {repr(data)} was passed"
5237 )
5239 @final
5240 @classmethod
5241 def _string_data_error(cls, data):
5242 raise TypeError(
5243 "String dtype not supported, you may need "
5244 "to explicitly cast to a numeric type"
5245 )
5247 def _validate_fill_value(self, value):
5248 """
5249 Check if the value can be inserted into our array without casting,
5250 and convert it to an appropriate native type if necessary.
5252 Raises
5253 ------
5254 TypeError
5255 If the value cannot be inserted into an array of this dtype.
5256 """
5257 dtype = self.dtype
5258 if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]:
5259 # return np_can_hold_element(dtype, value)
5260 try:
5261 return np_can_hold_element(dtype, value)
5262 except LossySetitemError as err:
5263 # re-raise as TypeError for consistency
5264 raise TypeError from err
5265 elif not can_hold_element(self._values, value):
5266 raise TypeError
5267 return value
5269 @final
5270 def _require_scalar(self, value):
5271 """
5272 Check that this is a scalar value that we can use for setitem-like
5273 operations without changing dtype.
5274 """
5275 if not is_scalar(value):
5276 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
5277 return value
5279 def _is_memory_usage_qualified(self) -> bool:
5280 """
5281 Return a boolean if we need a qualified .info display.
5282 """
5283 return self.is_object()
5285 def is_type_compatible(self, kind: str_t) -> bool:
5286 """
5287 Whether the index type is compatible with the provided type.
5288 """
5289 warnings.warn(
5290 "Index.is_type_compatible is deprecated and will be removed in a "
5291 "future version.",
5292 FutureWarning,
5293 stacklevel=find_stack_level(),
5294 )
5295 return kind == self.inferred_type
5297 def __contains__(self, key: Any) -> bool:
5298 """
5299 Return a boolean indicating whether the provided key is in the index.
5301 Parameters
5302 ----------
5303 key : label
5304 The key to check if it is present in the index.
5306 Returns
5307 -------
5308 bool
5309 Whether the key search is in the index.
5311 Raises
5312 ------
5313 TypeError
5314 If the key is not hashable.
5316 See Also
5317 --------
5318 Index.isin : Returns an ndarray of boolean dtype indicating whether the
5319 list-like key is in the index.
5321 Examples
5322 --------
5323 >>> idx = pd.Index([1, 2, 3, 4])
5324 >>> idx
5325 Int64Index([1, 2, 3, 4], dtype='int64')
5327 >>> 2 in idx
5328 True
5329 >>> 6 in idx
5330 False
5331 """
5332 hash(key)
5333 try:
5334 return key in self._engine
5335 except (OverflowError, TypeError, ValueError):
5336 return False
5338 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
5339 # Incompatible types in assignment (expression has type "None", base class
5340 # "object" defined the type as "Callable[[object], int]")
5341 __hash__: ClassVar[None] # type: ignore[assignment]
5343 @final
5344 def __setitem__(self, key, value):
5345 raise TypeError("Index does not support mutable operations")
5347 def __getitem__(self, key):
5348 """
5349 Override numpy.ndarray's __getitem__ method to work as desired.
5351 This function adds lists and Series as valid boolean indexers
5352 (ndarrays only supports ndarray with dtype=bool).
5354 If resulting ndim != 1, plain ndarray is returned instead of
5355 corresponding `Index` subclass.
5357 """
5358 getitem = self._data.__getitem__
5360 if is_integer(key) or is_float(key):
5361 # GH#44051 exclude bool, which would return a 2d ndarray
5362 key = com.cast_scalar_indexer(key, warn_float=True)
5363 return getitem(key)
5365 if isinstance(key, slice):
5366 # This case is separated from the conditional above to avoid
5367 # pessimization com.is_bool_indexer and ndim checks.
5368 result = getitem(key)
5369 # Going through simple_new for performance.
5370 return type(self)._simple_new(result, name=self._name)
5372 if com.is_bool_indexer(key):
5373 # if we have list[bools, length=1e5] then doing this check+convert
5374 # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__
5375 # time below from 3.8 ms to 496 µs
5376 # if we already have ndarray[bool], the overhead is 1.4 µs or .25%
5377 if is_extension_array_dtype(getattr(key, "dtype", None)):
5378 key = key.to_numpy(dtype=bool, na_value=False)
5379 else:
5380 key = np.asarray(key, dtype=bool)
5382 result = getitem(key)
5383 # Because we ruled out integer above, we always get an arraylike here
5384 if result.ndim > 1:
5385 deprecate_ndim_indexing(result)
5386 if hasattr(result, "_ndarray"):
5387 # i.e. NDArrayBackedExtensionArray
5388 # Unpack to ndarray for MPL compat
5389 # error: Item "ndarray[Any, Any]" of
5390 # "Union[ExtensionArray, ndarray[Any, Any]]"
5391 # has no attribute "_ndarray"
5392 return result._ndarray # type: ignore[union-attr]
5393 return result
5395 # NB: Using _constructor._simple_new would break if MultiIndex
5396 # didn't override __getitem__
5397 return self._constructor._simple_new(result, name=self._name)
5399 def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT:
5400 """
5401 Fastpath for __getitem__ when we know we have a slice.
5402 """
5403 res = self._data[slobj]
5404 return type(self)._simple_new(res, name=self._name)
5406 @final
5407 def _can_hold_identifiers_and_holds_name(self, name) -> bool:
5408 """
5409 Faster check for ``name in self`` when we know `name` is a Python
5410 identifier (e.g. in NDFrame.__getattr__, which hits this to support
5411 . key lookup). For indexes that can't hold identifiers (everything
5412 but object & categorical) we just return False.
5414 https://github.com/pandas-dev/pandas/issues/19764
5415 """
5416 if self.is_object() or is_string_dtype(self.dtype) or self.is_categorical():
5417 return name in self
5418 return False
5420 def append(self, other: Index | Sequence[Index]) -> Index:
5421 """
5422 Append a collection of Index options together.
5424 Parameters
5425 ----------
5426 other : Index or list/tuple of indices
5428 Returns
5429 -------
5430 Index
5431 """
5432 to_concat = [self]
5434 if isinstance(other, (list, tuple)):
5435 to_concat += list(other)
5436 else:
5437 # error: Argument 1 to "append" of "list" has incompatible type
5438 # "Union[Index, Sequence[Index]]"; expected "Index"
5439 to_concat.append(other) # type: ignore[arg-type]
5441 for obj in to_concat:
5442 if not isinstance(obj, Index):
5443 raise TypeError("all inputs must be Index")
5445 names = {obj.name for obj in to_concat}
5446 name = None if len(names) > 1 else self.name
5448 return self._concat(to_concat, name)
5450 def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
5451 """
5452 Concatenate multiple Index objects.
5453 """
5454 to_concat_vals = [x._values for x in to_concat]
5456 result = concat_compat(to_concat_vals)
5458 is_numeric = result.dtype.kind in ["i", "u", "f"]
5459 if self._is_backward_compat_public_numeric_index and is_numeric:
5460 return type(self)._simple_new(result, name=name)
5462 return Index._with_infer(result, name=name)
5464 @final
5465 def putmask(self, mask, value) -> Index:
5466 """
5467 Return a new Index of the values set with the mask.
5469 Returns
5470 -------
5471 Index
5473 See Also
5474 --------
5475 numpy.ndarray.putmask : Changes elements of an array
5476 based on conditional and input values.
5477 """
5478 mask, noop = validate_putmask(self._values, mask)
5479 if noop:
5480 return self.copy()
5482 if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):
5483 # e.g. None -> np.nan, see also Block._standardize_fill_value
5484 value = self._na_value
5485 try:
5486 converted = self._validate_fill_value(value)
5487 except (LossySetitemError, ValueError, TypeError) as err:
5488 if is_object_dtype(self): # pragma: no cover
5489 raise err
5491 dtype = self._find_common_type_compat(value)
5492 return self.astype(dtype).putmask(mask, value)
5494 values = self._values.copy()
5496 if isinstance(values, np.ndarray):
5497 converted = setitem_datetimelike_compat(values, mask.sum(), converted)
5498 np.putmask(values, mask, converted)
5500 else:
5501 # Note: we use the original value here, not converted, as
5502 # _validate_fill_value is not idempotent
5503 values._putmask(mask, value)
5505 return self._shallow_copy(values)
5507 def equals(self, other: Any) -> bool:
5508 """
5509 Determine if two Index object are equal.
5511 The things that are being compared are:
5513 * The elements inside the Index object.
5514 * The order of the elements inside the Index object.
5516 Parameters
5517 ----------
5518 other : Any
5519 The other object to compare against.
5521 Returns
5522 -------
5523 bool
5524 True if "other" is an Index and it has the same elements and order
5525 as the calling index; False otherwise.
5527 Examples
5528 --------
5529 >>> idx1 = pd.Index([1, 2, 3])
5530 >>> idx1
5531 Int64Index([1, 2, 3], dtype='int64')
5532 >>> idx1.equals(pd.Index([1, 2, 3]))
5533 True
5535 The elements inside are compared
5537 >>> idx2 = pd.Index(["1", "2", "3"])
5538 >>> idx2
5539 Index(['1', '2', '3'], dtype='object')
5541 >>> idx1.equals(idx2)
5542 False
5544 The order is compared
5546 >>> ascending_idx = pd.Index([1, 2, 3])
5547 >>> ascending_idx
5548 Int64Index([1, 2, 3], dtype='int64')
5549 >>> descending_idx = pd.Index([3, 2, 1])
5550 >>> descending_idx
5551 Int64Index([3, 2, 1], dtype='int64')
5552 >>> ascending_idx.equals(descending_idx)
5553 False
5555 The dtype is *not* compared
5557 >>> int64_idx = pd.Index([1, 2, 3], dtype='int64')
5558 >>> int64_idx
5559 Int64Index([1, 2, 3], dtype='int64')
5560 >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64')
5561 >>> uint64_idx
5562 UInt64Index([1, 2, 3], dtype='uint64')
5563 >>> int64_idx.equals(uint64_idx)
5564 True
5565 """
5566 if self.is_(other):
5567 return True
5569 if not isinstance(other, Index):
5570 return False
5572 if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):
5573 # if other is not object, use other's logic for coercion
5574 return other.equals(self)
5576 if isinstance(other, ABCMultiIndex):
5577 # d-level MultiIndex can equal d-tuple Index
5578 return other.equals(self)
5580 if isinstance(self._values, ExtensionArray):
5581 # Dispatch to the ExtensionArray's .equals method.
5582 if not isinstance(other, type(self)):
5583 return False
5585 earr = cast(ExtensionArray, self._data)
5586 return earr.equals(other._data)
5588 if is_extension_array_dtype(other.dtype):
5589 # All EA-backed Index subclasses override equals
5590 return other.equals(self)
5592 return array_equivalent(self._values, other._values)
5594 @final
5595 def identical(self, other) -> bool:
5596 """
5597 Similar to equals, but checks that object attributes and types are also equal.
5599 Returns
5600 -------
5601 bool
5602 If two Index objects have equal elements and same type True,
5603 otherwise False.
5604 """
5605 return (
5606 self.equals(other)
5607 and all(
5608 getattr(self, c, None) == getattr(other, c, None)
5609 for c in self._comparables
5610 )
5611 and type(self) == type(other)
5612 )
5614 @final
5615 def asof(self, label):
5616 """
5617 Return the label from the index, or, if not present, the previous one.
5619 Assuming that the index is sorted, return the passed index label if it
5620 is in the index, or return the previous index label if the passed one
5621 is not in the index.
5623 Parameters
5624 ----------
5625 label : object
5626 The label up to which the method returns the latest index label.
5628 Returns
5629 -------
5630 object
5631 The passed label if it is in the index. The previous label if the
5632 passed label is not in the sorted index or `NaN` if there is no
5633 such label.
5635 See Also
5636 --------
5637 Series.asof : Return the latest value in a Series up to the
5638 passed index.
5639 merge_asof : Perform an asof merge (similar to left join but it
5640 matches on nearest key rather than equal key).
5641 Index.get_loc : An `asof` is a thin wrapper around `get_loc`
5642 with method='pad'.
5644 Examples
5645 --------
5646 `Index.asof` returns the latest index label up to the passed label.
5648 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])
5649 >>> idx.asof('2014-01-01')
5650 '2013-12-31'
5652 If the label is in the index, the method returns the passed label.
5654 >>> idx.asof('2014-01-02')
5655 '2014-01-02'
5657 If all of the labels in the index are later than the passed label,
5658 NaN is returned.
5660 >>> idx.asof('1999-01-02')
5661 nan
5663 If the index is not sorted, an error is raised.
5665 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',
5666 ... '2014-01-03'])
5667 >>> idx_not_sorted.asof('2013-12-31')
5668 Traceback (most recent call last):
5669 ValueError: index must be monotonic increasing or decreasing
5670 """
5671 self._searchsorted_monotonic(label) # validate sortedness
5672 try:
5673 loc = self.get_loc(label)
5674 except (KeyError, TypeError):
5675 # KeyError -> No exact match, try for padded
5676 # TypeError -> passed e.g. non-hashable, fall through to get
5677 # the tested exception message
5678 indexer = self.get_indexer([label], method="pad")
5679 if indexer.ndim > 1 or indexer.size > 1:
5680 raise TypeError("asof requires scalar valued input")
5681 loc = indexer.item()
5682 if loc == -1:
5683 return self._na_value
5684 else:
5685 if isinstance(loc, slice):
5686 loc = loc.indices(len(self))[-1]
5688 return self[loc]
5690 def asof_locs(
5691 self, where: Index, mask: npt.NDArray[np.bool_]
5692 ) -> npt.NDArray[np.intp]:
5693 """
5694 Return the locations (indices) of labels in the index.
5696 As in the `asof` function, if the label (a particular entry in
5697 `where`) is not in the index, the latest index label up to the
5698 passed label is chosen and its index returned.
5700 If all of the labels in the index are later than a label in `where`,
5701 -1 is returned.
5703 `mask` is used to ignore NA values in the index during calculation.
5705 Parameters
5706 ----------
5707 where : Index
5708 An Index consisting of an array of timestamps.
5709 mask : np.ndarray[bool]
5710 Array of booleans denoting where values in the original
5711 data are not NA.
5713 Returns
5714 -------
5715 np.ndarray[np.intp]
5716 An array of locations (indices) of the labels from the Index
5717 which correspond to the return values of the `asof` function
5718 for every element in `where`.
5719 """
5720 # error: No overload variant of "searchsorted" of "ndarray" matches argument
5721 # types "Union[ExtensionArray, ndarray[Any, Any]]", "str"
5722 # TODO: will be fixed when ExtensionArray.searchsorted() is fixed
5723 locs = self._values[mask].searchsorted(
5724 where._values, side="right" # type: ignore[call-overload]
5725 )
5726 locs = np.where(locs > 0, locs - 1, 0)
5728 result = np.arange(len(self), dtype=np.intp)[mask].take(locs)
5730 first_value = self._values[mask.argmax()]
5731 result[(locs == 0) & (where._values < first_value)] = -1
5733 return result
5735 def sort_values(
5736 self,
5737 return_indexer: bool = False,
5738 ascending: bool = True,
5739 na_position: str_t = "last",
5740 key: Callable | None = None,
5741 ):
5742 """
5743 Return a sorted copy of the index.
5745 Return a sorted copy of the index, and optionally return the indices
5746 that sorted the index itself.
5748 Parameters
5749 ----------
5750 return_indexer : bool, default False
5751 Should the indices that would sort the index be returned.
5752 ascending : bool, default True
5753 Should the index values be sorted in an ascending order.
5754 na_position : {'first' or 'last'}, default 'last'
5755 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
5756 the end.
5758 .. versionadded:: 1.2.0
5760 key : callable, optional
5761 If not None, apply the key function to the index values
5762 before sorting. This is similar to the `key` argument in the
5763 builtin :meth:`sorted` function, with the notable difference that
5764 this `key` function should be *vectorized*. It should expect an
5765 ``Index`` and return an ``Index`` of the same shape.
5767 .. versionadded:: 1.1.0
5769 Returns
5770 -------
5771 sorted_index : pandas.Index
5772 Sorted copy of the index.
5773 indexer : numpy.ndarray, optional
5774 The indices that the index itself was sorted by.
5776 See Also
5777 --------
5778 Series.sort_values : Sort values of a Series.
5779 DataFrame.sort_values : Sort values in a DataFrame.
5781 Examples
5782 --------
5783 >>> idx = pd.Index([10, 100, 1, 1000])
5784 >>> idx
5785 Int64Index([10, 100, 1, 1000], dtype='int64')
5787 Sort values in ascending order (default behavior).
5789 >>> idx.sort_values()
5790 Int64Index([1, 10, 100, 1000], dtype='int64')
5792 Sort values in descending order, and also get the indices `idx` was
5793 sorted by.
5795 >>> idx.sort_values(ascending=False, return_indexer=True)
5796 (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
5797 """
5798 idx = ensure_key_mapped(self, key)
5800 # GH 35584. Sort missing values according to na_position kwarg
5801 # ignore na_position for MultiIndex
5802 if not isinstance(self, ABCMultiIndex):
5803 _as = nargsort(
5804 items=idx, ascending=ascending, na_position=na_position, key=key
5805 )
5806 else:
5807 _as = idx.argsort()
5808 if not ascending:
5809 _as = _as[::-1]
5811 sorted_index = self.take(_as)
5813 if return_indexer:
5814 return sorted_index, _as
5815 else:
5816 return sorted_index
5818 @final
5819 def sort(self, *args, **kwargs):
5820 """
5821 Use sort_values instead.
5822 """
5823 raise TypeError("cannot sort an Index object in-place, use sort_values instead")
5825 def shift(self, periods=1, freq=None):
5826 """
5827 Shift index by desired number of time frequency increments.
5829 This method is for shifting the values of datetime-like indexes
5830 by a specified time increment a given number of times.
5832 Parameters
5833 ----------
5834 periods : int, default 1
5835 Number of periods (or increments) to shift by,
5836 can be positive or negative.
5837 freq : pandas.DateOffset, pandas.Timedelta or str, optional
5838 Frequency increment to shift by.
5839 If None, the index is shifted by its own `freq` attribute.
5840 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
5842 Returns
5843 -------
5844 pandas.Index
5845 Shifted index.
5847 See Also
5848 --------
5849 Series.shift : Shift values of Series.
5851 Notes
5852 -----
5853 This method is only implemented for datetime-like index classes,
5854 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.
5856 Examples
5857 --------
5858 Put the first 5 month starts of 2011 into an index.
5860 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')
5861 >>> month_starts
5862 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
5863 '2011-05-01'],
5864 dtype='datetime64[ns]', freq='MS')
5866 Shift the index by 10 days.
5868 >>> month_starts.shift(10, freq='D')
5869 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',
5870 '2011-05-11'],
5871 dtype='datetime64[ns]', freq=None)
5873 The default value of `freq` is the `freq` attribute of the index,
5874 which is 'MS' (month start) in this example.
5876 >>> month_starts.shift(10)
5877 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',
5878 '2012-03-01'],
5879 dtype='datetime64[ns]', freq='MS')
5880 """
5881 raise NotImplementedError(
5882 f"This method is only implemented for DatetimeIndex, PeriodIndex and "
5883 f"TimedeltaIndex; Got type {type(self).__name__}"
5884 )
5886 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
5887 """
5888 Return the integer indices that would sort the index.
5890 Parameters
5891 ----------
5892 *args
5893 Passed to `numpy.ndarray.argsort`.
5894 **kwargs
5895 Passed to `numpy.ndarray.argsort`.
5897 Returns
5898 -------
5899 np.ndarray[np.intp]
5900 Integer indices that would sort the index if used as
5901 an indexer.
5903 See Also
5904 --------
5905 numpy.argsort : Similar method for NumPy arrays.
5906 Index.sort_values : Return sorted copy of Index.
5908 Examples
5909 --------
5910 >>> idx = pd.Index(['b', 'a', 'd', 'c'])
5911 >>> idx
5912 Index(['b', 'a', 'd', 'c'], dtype='object')
5914 >>> order = idx.argsort()
5915 >>> order
5916 array([1, 0, 3, 2])
5918 >>> idx[order]
5919 Index(['a', 'b', 'c', 'd'], dtype='object')
5920 """
5921 # This works for either ndarray or EA, is overridden
5922 # by RangeIndex, MultIIndex
5923 return self._data.argsort(*args, **kwargs)
5925 @final
5926 def get_value(self, series: Series, key):
5927 """
5928 Fast lookup of value from 1-dimensional ndarray.
5930 Only use this if you know what you're doing.
5932 Returns
5933 -------
5934 scalar or Series
5935 """
5936 warnings.warn(
5937 "get_value is deprecated and will be removed in a future version. "
5938 "Use Series[key] instead.",
5939 FutureWarning,
5940 stacklevel=find_stack_level(),
5941 )
5943 self._check_indexing_error(key)
5945 try:
5946 # GH 20882, 21257
5947 # First try to convert the key to a location
5948 # If that fails, raise a KeyError if an integer
5949 # index, otherwise, see if key is an integer, and
5950 # try that
5951 loc = self.get_loc(key)
5952 except KeyError:
5953 if not self._should_fallback_to_positional:
5954 raise
5955 elif is_integer(key):
5956 # If the Index cannot hold integer, then this is unambiguously
5957 # a locational lookup.
5958 loc = key
5959 else:
5960 raise
5962 return self._get_values_for_loc(series, loc, key)
5964 def _check_indexing_error(self, key):
5965 if not is_scalar(key):
5966 # if key is not a scalar, directly raise an error (the code below
5967 # would convert to numpy arrays and raise later any way) - GH29926
5968 raise InvalidIndexError(key)
5970 @cache_readonly
5971 def _should_fallback_to_positional(self) -> bool:
5972 """
5973 Should an integer key be treated as positional?
5974 """
5975 return not self.holds_integer()
5977 def _get_values_for_loc(self, series: Series, loc, key):
5978 """
5979 Do a positional lookup on the given Series, returning either a scalar
5980 or a Series.
5982 Assumes that `series.index is self`
5984 key is included for MultiIndex compat.
5985 """
5986 if is_integer(loc):
5987 return series._values[loc]
5989 return series.iloc[loc]
5991 @final
5992 def set_value(self, arr, key, value) -> None:
5993 """
5994 Fast lookup of value from 1-dimensional ndarray.
5996 .. deprecated:: 1.0
5998 Notes
5999 -----
6000 Only use this if you know what you're doing.
6001 """
6002 warnings.warn(
6003 (
6004 "The 'set_value' method is deprecated, and "
6005 "will be removed in a future version."
6006 ),
6007 FutureWarning,
6008 stacklevel=find_stack_level(),
6009 )
6010 loc = self._engine.get_loc(key)
6011 if not can_hold_element(arr, value):
6012 raise ValueError
6013 arr[loc] = value
6015 _index_shared_docs[
6016 "get_indexer_non_unique"
6017 ] = """
6018 Compute indexer and mask for new index given the current index.
6020 The indexer should be then used as an input to ndarray.take to align the
6021 current data to the new index.
6023 Parameters
6024 ----------
6025 target : %(target_klass)s
6027 Returns
6028 -------
6029 indexer : np.ndarray[np.intp]
6030 Integers from 0 to n - 1 indicating that the index at these
6031 positions matches the corresponding target values. Missing values
6032 in the target are marked by -1.
6033 missing : np.ndarray[np.intp]
6034 An indexer into the target of the values not found.
6035 These correspond to the -1 in the indexer array.
6036 """
6038 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
6039 def get_indexer_non_unique(
6040 self, target
6041 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
6042 target = ensure_index(target)
6043 target = self._maybe_cast_listlike_indexer(target)
6045 if not self._should_compare(target) and not is_interval_dtype(self.dtype):
6046 # IntervalIndex get special treatment bc numeric scalars can be
6047 # matched to Interval scalars
6048 return self._get_indexer_non_comparable(target, method=None, unique=False)
6050 pself, ptarget = self._maybe_promote(target)
6051 if pself is not self or ptarget is not target:
6052 return pself.get_indexer_non_unique(ptarget)
6054 if not is_dtype_equal(self.dtype, target.dtype):
6055 # TODO: if object, could use infer_dtype to preempt costly
6056 # conversion if still non-comparable?
6057 dtype = self._find_common_type_compat(target)
6059 this = self.astype(dtype, copy=False)
6060 that = target.astype(dtype, copy=False)
6061 return this.get_indexer_non_unique(that)
6063 # Note: _maybe_promote ensures we never get here with MultiIndex
6064 # self and non-Multi target
6065 tgt_values = target._get_engine_target()
6066 if self._is_multi and target._is_multi:
6067 engine = self._engine
6068 # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has
6069 # no attribute "_extract_level_codes"
6070 tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr]
6072 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
6073 return ensure_platform_int(indexer), ensure_platform_int(missing)
6075 @final
6076 def get_indexer_for(self, target) -> npt.NDArray[np.intp]:
6077 """
6078 Guaranteed return of an indexer even when non-unique.
6080 This dispatches to get_indexer or get_indexer_non_unique
6081 as appropriate.
6083 Returns
6084 -------
6085 np.ndarray[np.intp]
6086 List of indices.
6088 Examples
6089 --------
6090 >>> idx = pd.Index([np.nan, 'var1', np.nan])
6091 >>> idx.get_indexer_for([np.nan])
6092 array([0, 2])
6093 """
6094 if self._index_as_unique:
6095 return self.get_indexer(target)
6096 indexer, _ = self.get_indexer_non_unique(target)
6097 return indexer
6099 def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]:
6100 """
6101 Analogue to get_indexer that raises if any elements are missing.
6102 """
6103 keyarr = key
6104 if not isinstance(keyarr, Index):
6105 keyarr = com.asarray_tuplesafe(keyarr)
6107 if self._index_as_unique:
6108 indexer = self.get_indexer_for(keyarr)
6109 keyarr = self.reindex(keyarr)[0]
6110 else:
6111 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
6113 self._raise_if_missing(keyarr, indexer, axis_name)
6115 keyarr = self.take(indexer)
6116 if isinstance(key, Index):
6117 # GH 42790 - Preserve name from an Index
6118 keyarr.name = key.name
6119 if keyarr.dtype.kind in ["m", "M"]:
6120 # DTI/TDI.take can infer a freq in some cases when we dont want one
6121 if isinstance(key, list) or (
6122 isinstance(key, type(self))
6123 # "Index" has no attribute "freq"
6124 and key.freq is None # type: ignore[attr-defined]
6125 ):
6126 keyarr = keyarr._with_freq(None)
6128 return keyarr, indexer
6130 def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None:
6131 """
6132 Check that indexer can be used to return a result.
6134 e.g. at least one element was found,
6135 unless the list of keys was actually empty.
6137 Parameters
6138 ----------
6139 key : list-like
6140 Targeted labels (only used to show correct error message).
6141 indexer: array-like of booleans
6142 Indices corresponding to the key,
6143 (with -1 indicating not found).
6144 axis_name : str
6146 Raises
6147 ------
6148 KeyError
6149 If at least one key was requested but none was found.
6150 """
6151 if len(key) == 0:
6152 return
6154 # Count missing values
6155 missing_mask = indexer < 0
6156 nmissing = missing_mask.sum()
6158 if nmissing:
6160 # TODO: remove special-case; this is just to keep exception
6161 # message tests from raising while debugging
6162 use_interval_msg = is_interval_dtype(self.dtype) or (
6163 is_categorical_dtype(self.dtype)
6164 # "Index" has no attribute "categories" [attr-defined]
6165 and is_interval_dtype(
6166 self.categories.dtype # type: ignore[attr-defined]
6167 )
6168 )
6170 if nmissing == len(indexer):
6171 if use_interval_msg:
6172 key = list(key)
6173 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
6175 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
6176 raise KeyError(f"{not_found} not in index")
6178 @overload
6179 def _get_indexer_non_comparable(
6180 self, target: Index, method, unique: Literal[True] = ...
6181 ) -> npt.NDArray[np.intp]:
6182 ...
6184 @overload
6185 def _get_indexer_non_comparable(
6186 self, target: Index, method, unique: Literal[False]
6187 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
6188 ...
6190 @overload
6191 def _get_indexer_non_comparable(
6192 self, target: Index, method, unique: bool = True
6193 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
6194 ...
6196 @final
6197 def _get_indexer_non_comparable(
6198 self, target: Index, method, unique: bool = True
6199 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
6200 """
6201 Called from get_indexer or get_indexer_non_unique when the target
6202 is of a non-comparable dtype.
6204 For get_indexer lookups with method=None, get_indexer is an _equality_
6205 check, so non-comparable dtypes mean we will always have no matches.
6207 For get_indexer lookups with a method, get_indexer is an _inequality_
6208 check, so non-comparable dtypes mean we will always raise TypeError.
6210 Parameters
6211 ----------
6212 target : Index
6213 method : str or None
6214 unique : bool, default True
6215 * True if called from get_indexer.
6216 * False if called from get_indexer_non_unique.
6218 Raises
6219 ------
6220 TypeError
6221 If doing an inequality check, i.e. method is not None.
6222 """
6223 if method is not None:
6224 other = unpack_nested_dtype(target)
6225 raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")
6227 no_matches = -1 * np.ones(target.shape, dtype=np.intp)
6228 if unique:
6229 # This is for get_indexer
6230 return no_matches
6231 else:
6232 # This is for get_indexer_non_unique
6233 missing = np.arange(len(target), dtype=np.intp)
6234 return no_matches, missing
6236 @property
6237 def _index_as_unique(self) -> bool:
6238 """
6239 Whether we should treat this as unique for the sake of
6240 get_indexer vs get_indexer_non_unique.
6242 For IntervalIndex compat.
6243 """
6244 return self.is_unique
6246 _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects"
6248 @final
6249 def _maybe_promote(self, other: Index) -> tuple[Index, Index]:
6250 """
6251 When dealing with an object-dtype Index and a non-object Index, see
6252 if we can upcast the object-dtype one to improve performance.
6253 """
6255 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):
6256 if (
6257 self.tz is not None
6258 and other.tz is not None
6259 and not tz_compare(self.tz, other.tz)
6260 ):
6261 # standardize on UTC
6262 return self.tz_convert("UTC"), other.tz_convert("UTC")
6264 elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):
6265 try:
6266 return type(other)(self), other
6267 except OutOfBoundsDatetime:
6268 return self, other
6269 elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex):
6270 # TODO: we dont have tests that get here
6271 return type(other)(self), other
6273 elif self.dtype.kind == "u" and other.dtype.kind == "i":
6274 # GH#41873
6275 if other.min() >= 0:
6276 # lookup min as it may be cached
6277 # TODO: may need itemsize check if we have non-64-bit Indexes
6278 return self, other.astype(self.dtype)
6280 elif self._is_multi and not other._is_multi:
6281 try:
6282 # "Type[Index]" has no attribute "from_tuples"
6283 other = type(self).from_tuples(other) # type: ignore[attr-defined]
6284 except (TypeError, ValueError):
6285 # let's instead try with a straight Index
6286 self = Index(self._values)
6288 if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
6289 # Reverse op so we dont need to re-implement on the subclasses
6290 other, self = other._maybe_promote(self)
6292 return self, other
6294 @final
6295 def _find_common_type_compat(self, target) -> DtypeObj:
6296 """
6297 Implementation of find_common_type that adjusts for Index-specific
6298 special cases.
6299 """
6300 if is_valid_na_for_dtype(target, self.dtype):
6301 # e.g. setting NA value into IntervalArray[int64]
6302 dtype = ensure_dtype_can_hold_na(self.dtype)
6303 if is_dtype_equal(self.dtype, dtype):
6304 raise NotImplementedError(
6305 "This should not be reached. Please report a bug at "
6306 "github.com/pandas-dev/pandas"
6307 )
6308 return dtype
6310 target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)
6312 # special case: if one dtype is uint64 and the other a signed int, return object
6313 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion
6314 # Now it's:
6315 # * float | [u]int -> float
6316 # * uint64 | signed int -> object
6317 # We may change union(float | [u]int) to go to object.
6318 if self.dtype == "uint64" or target_dtype == "uint64":
6319 if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype(
6320 target_dtype
6321 ):
6322 return _dtype_obj
6324 dtype = find_common_type([self.dtype, target_dtype])
6325 dtype = common_dtype_categorical_compat([self, target], dtype)
6326 return dtype
6328 @final
6329 def _should_compare(self, other: Index) -> bool:
6330 """
6331 Check if `self == other` can ever have non-False entries.
6332 """
6334 if (other.is_boolean() and self.is_numeric()) or (
6335 self.is_boolean() and other.is_numeric()
6336 ):
6337 # GH#16877 Treat boolean labels passed to a numeric index as not
6338 # found. Without this fix False and True would be treated as 0 and 1
6339 # respectively.
6340 return False
6342 other = unpack_nested_dtype(other)
6343 dtype = other.dtype
6344 return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)
6346 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
6347 """
6348 Can we compare values of the given dtype to our own?
6349 """
6350 if self.dtype.kind == "b":
6351 return dtype.kind == "b"
6352 elif is_numeric_dtype(self.dtype):
6353 return is_numeric_dtype(dtype)
6354 return True
6356 @final
6357 def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:
6358 """
6359 Group the index labels by a given array of values.
6361 Parameters
6362 ----------
6363 values : array
6364 Values used to determine the groups.
6366 Returns
6367 -------
6368 dict
6369 {group name -> group labels}
6370 """
6371 # TODO: if we are a MultiIndex, we can do better
6372 # that converting to tuples
6373 if isinstance(values, ABCMultiIndex):
6374 values = values._values
6375 values = Categorical(values)
6376 result = values._reverse_indexer()
6378 # map to the label
6379 result = {k: self.take(v) for k, v in result.items()}
6381 return PrettyDict(result)
6383 def map(self, mapper, na_action=None):
6384 """
6385 Map values using an input mapping or function.
6387 Parameters
6388 ----------
6389 mapper : function, dict, or Series
6390 Mapping correspondence.
6391 na_action : {None, 'ignore'}
6392 If 'ignore', propagate NA values, without passing them to the
6393 mapping correspondence.
6395 Returns
6396 -------
6397 applied : Union[Index, MultiIndex], inferred
6398 The output of the mapping function applied to the index.
6399 If the function returns a tuple with more than one element
6400 a MultiIndex will be returned.
6401 """
6402 from pandas.core.indexes.multi import MultiIndex
6404 new_values = self._map_values(mapper, na_action=na_action)
6406 # we can return a MultiIndex
6407 if new_values.size and isinstance(new_values[0], tuple):
6408 if isinstance(self, MultiIndex):
6409 names = self.names
6410 elif self.name:
6411 names = [self.name] * len(new_values[0])
6412 else:
6413 names = None
6414 return MultiIndex.from_tuples(new_values, names=names)
6416 dtype = None
6417 if not new_values.size:
6418 # empty
6419 dtype = self.dtype
6421 # e.g. if we are floating and new_values is all ints, then we
6422 # don't want to cast back to floating. But if we are UInt64
6423 # and new_values is all ints, we want to try.
6424 same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type
6425 if same_dtype:
6426 new_values = maybe_cast_pointwise_result(
6427 new_values, self.dtype, same_dtype=same_dtype
6428 )
6430 if self._is_backward_compat_public_numeric_index and is_numeric_dtype(
6431 new_values.dtype
6432 ):
6433 return self._constructor(
6434 new_values, dtype=dtype, copy=False, name=self.name
6435 )
6437 return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)
6439 # TODO: De-duplicate with map, xref GH#32349
6440 @final
6441 def _transform_index(self, func, *, level=None) -> Index:
6442 """
6443 Apply function to all values found in index.
6445 This includes transforming multiindex entries separately.
6446 Only apply function to one level of the MultiIndex if level is specified.
6447 """
6448 if isinstance(self, ABCMultiIndex):
6449 if level is not None:
6450 # Caller is responsible for ensuring level is positional.
6451 items = [
6452 tuple(func(y) if i == level else y for i, y in enumerate(x))
6453 for x in self
6454 ]
6455 else:
6456 items = [tuple(func(y) for y in x) for x in self]
6457 return type(self).from_tuples(items, names=self.names)
6458 else:
6459 items = [func(x) for x in self]
6460 return Index(items, name=self.name, tupleize_cols=False)
6462 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
6463 """
6464 Return a boolean array where the index values are in `values`.
6466 Compute boolean array of whether each index value is found in the
6467 passed set of values. The length of the returned boolean array matches
6468 the length of the index.
6470 Parameters
6471 ----------
6472 values : set or list-like
6473 Sought values.
6474 level : str or int, optional
6475 Name or position of the index level to use (if the index is a
6476 `MultiIndex`).
6478 Returns
6479 -------
6480 np.ndarray[bool]
6481 NumPy array of boolean values.
6483 See Also
6484 --------
6485 Series.isin : Same for Series.
6486 DataFrame.isin : Same method for DataFrames.
6488 Notes
6489 -----
6490 In the case of `MultiIndex` you must either specify `values` as a
6491 list-like object containing tuples that are the same length as the
6492 number of levels, or specify `level`. Otherwise it will raise a
6493 ``ValueError``.
6495 If `level` is specified:
6497 - if it is the name of one *and only one* index level, use that level;
6498 - otherwise it should be a number indicating level position.
6500 Examples
6501 --------
6502 >>> idx = pd.Index([1,2,3])
6503 >>> idx
6504 Int64Index([1, 2, 3], dtype='int64')
6506 Check whether each index value in a list of values.
6508 >>> idx.isin([1, 4])
6509 array([ True, False, False])
6511 >>> midx = pd.MultiIndex.from_arrays([[1,2,3],
6512 ... ['red', 'blue', 'green']],
6513 ... names=('number', 'color'))
6514 >>> midx
6515 MultiIndex([(1, 'red'),
6516 (2, 'blue'),
6517 (3, 'green')],
6518 names=['number', 'color'])
6520 Check whether the strings in the 'color' level of the MultiIndex
6521 are in a list of colors.
6523 >>> midx.isin(['red', 'orange', 'yellow'], level='color')
6524 array([ True, False, False])
6526 To check across the levels of a MultiIndex, pass a list of tuples:
6528 >>> midx.isin([(1, 'red'), (3, 'red')])
6529 array([ True, False, False])
6531 For a DatetimeIndex, string values in `values` are converted to
6532 Timestamps.
6534 >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
6535 >>> dti = pd.to_datetime(dates)
6536 >>> dti
6537 DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
6538 dtype='datetime64[ns]', freq=None)
6540 >>> dti.isin(['2000-03-11'])
6541 array([ True, False, False])
6542 """
6543 if level is not None:
6544 self._validate_index_level(level)
6545 return algos.isin(self._values, values)
6547 def _get_string_slice(self, key: str_t):
6548 # this is for partial string indexing,
6549 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex
6550 raise NotImplementedError
6552 def slice_indexer(
6553 self,
6554 start: Hashable | None = None,
6555 end: Hashable | None = None,
6556 step: int | None = None,
6557 kind=no_default,
6558 ) -> slice:
6559 """
6560 Compute the slice indexer for input labels and step.
6562 Index needs to be ordered and unique.
6564 Parameters
6565 ----------
6566 start : label, default None
6567 If None, defaults to the beginning.
6568 end : label, default None
6569 If None, defaults to the end.
6570 step : int, default None
6571 kind : str, default None
6573 .. deprecated:: 1.4.0
6575 Returns
6576 -------
6577 indexer : slice
6579 Raises
6580 ------
6581 KeyError : If key does not exist, or key is not unique and index is
6582 not ordered.
6584 Notes
6585 -----
6586 This function assumes that the data is sorted, so use at your own peril
6588 Examples
6589 --------
6590 This is a method on all index types. For example you can do:
6592 >>> idx = pd.Index(list('abcd'))
6593 >>> idx.slice_indexer(start='b', end='c')
6594 slice(1, 3, None)
6596 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])
6597 >>> idx.slice_indexer(start='b', end=('c', 'g'))
6598 slice(1, 3, None)
6599 """
6600 self._deprecated_arg(kind, "kind", "slice_indexer")
6602 start_slice, end_slice = self.slice_locs(start, end, step=step)
6604 # return a slice
6605 if not is_scalar(start_slice):
6606 raise AssertionError("Start slice bound is non-scalar")
6607 if not is_scalar(end_slice):
6608 raise AssertionError("End slice bound is non-scalar")
6610 return slice(start_slice, end_slice, step)
6612 def _maybe_cast_indexer(self, key):
6613 """
6614 If we have a float key and are not a floating index, then try to cast
6615 to an int if equivalent.
6616 """
6617 return key
6619 def _maybe_cast_listlike_indexer(self, target) -> Index:
6620 """
6621 Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
6622 """
6623 return ensure_index(target)
6625 @final
6626 def _validate_indexer(self, form: str_t, key, kind: str_t):
6627 """
6628 If we are positional indexer, validate that we have appropriate
6629 typed bounds must be an integer.
6630 """
6631 assert kind in ["getitem", "iloc"]
6633 if key is not None and not is_integer(key):
6634 raise self._invalid_indexer(form, key)
6636 def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default):
6637 """
6638 This function should be overloaded in subclasses that allow non-trivial
6639 casting on label-slice bounds, e.g. datetime-like indices allowing
6640 strings containing formatted datetimes.
6642 Parameters
6643 ----------
6644 label : object
6645 side : {'left', 'right'}
6646 kind : {'loc', 'getitem'} or None
6648 .. deprecated:: 1.3.0
6650 Returns
6651 -------
6652 label : object
6654 Notes
6655 -----
6656 Value of `side` parameter should be validated in caller.
6657 """
6658 assert kind in ["loc", "getitem", None, no_default]
6659 self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound")
6661 # We are a plain index here (sub-class override this method if they
6662 # wish to have special treatment for floats/ints, e.g. Float64Index and
6663 # datetimelike Indexes
6664 # reject them, if index does not contain label
6665 if (is_float(label) or is_integer(label)) and label not in self:
6666 raise self._invalid_indexer("slice", label)
6668 return label
6670 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
6671 if self.is_monotonic_increasing:
6672 return self.searchsorted(label, side=side)
6673 elif self.is_monotonic_decreasing:
6674 # np.searchsorted expects ascending sort order, have to reverse
6675 # everything for it to work (element ordering, search side and
6676 # resulting value).
6677 pos = self[::-1].searchsorted(
6678 label, side="right" if side == "left" else "left"
6679 )
6680 return len(self) - pos
6682 raise ValueError("index must be monotonic increasing or decreasing")
6684 def get_slice_bound(
6685 self, label, side: Literal["left", "right"], kind=no_default
6686 ) -> int:
6687 """
6688 Calculate slice bound that corresponds to given label.
6690 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
6691 of given label.
6693 Parameters
6694 ----------
6695 label : object
6696 side : {'left', 'right'}
6697 kind : {'loc', 'getitem'} or None
6699 .. deprecated:: 1.4.0
6701 Returns
6702 -------
6703 int
6704 Index of label.
6705 """
6706 assert kind in ["loc", "getitem", None, no_default]
6707 self._deprecated_arg(kind, "kind", "get_slice_bound")
6709 if side not in ("left", "right"):
6710 raise ValueError(
6711 "Invalid value for side kwarg, must be either "
6712 f"'left' or 'right': {side}"
6713 )
6715 original_label = label
6717 # For datetime indices label may be a string that has to be converted
6718 # to datetime boundary according to its resolution.
6719 label = self._maybe_cast_slice_bound(label, side)
6721 # we need to look up the label
6722 try:
6723 slc = self.get_loc(label)
6724 except KeyError as err:
6725 try:
6726 return self._searchsorted_monotonic(label, side)
6727 except ValueError:
6728 # raise the original KeyError
6729 raise err
6731 if isinstance(slc, np.ndarray):
6732 # get_loc may return a boolean array, which
6733 # is OK as long as they are representable by a slice.
6734 assert is_bool_dtype(slc.dtype)
6735 slc = lib.maybe_booleans_to_slice(slc.view("u1"))
6736 if isinstance(slc, np.ndarray):
6737 raise KeyError(
6738 f"Cannot get {side} slice bound for non-unique "
6739 f"label: {repr(original_label)}"
6740 )
6742 if isinstance(slc, slice):
6743 if side == "left":
6744 return slc.start
6745 else:
6746 return slc.stop
6747 else:
6748 if side == "right":
6749 return slc + 1
6750 else:
6751 return slc
6753 def slice_locs(
6754 self, start=None, end=None, step=None, kind=no_default
6755 ) -> tuple[int, int]:
6756 """
6757 Compute slice locations for input labels.
6759 Parameters
6760 ----------
6761 start : label, default None
6762 If None, defaults to the beginning.
6763 end : label, default None
6764 If None, defaults to the end.
6765 step : int, defaults None
6766 If None, defaults to 1.
6767 kind : {'loc', 'getitem'} or None
6769 .. deprecated:: 1.4.0
6771 Returns
6772 -------
6773 start, end : int
6775 See Also
6776 --------
6777 Index.get_loc : Get location for a single label.
6779 Notes
6780 -----
6781 This method only works if the index is monotonic or unique.
6783 Examples
6784 --------
6785 >>> idx = pd.Index(list('abcd'))
6786 >>> idx.slice_locs(start='b', end='c')
6787 (1, 3)
6788 """
6789 self._deprecated_arg(kind, "kind", "slice_locs")
6790 inc = step is None or step >= 0
6792 if not inc:
6793 # If it's a reverse slice, temporarily swap bounds.
6794 start, end = end, start
6796 # GH 16785: If start and end happen to be date strings with UTC offsets
6797 # attempt to parse and check that the offsets are the same
6798 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)):
6799 try:
6800 ts_start = Timestamp(start)
6801 ts_end = Timestamp(end)
6802 except (ValueError, TypeError):
6803 pass
6804 else:
6805 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):
6806 raise ValueError("Both dates must have the same UTC offset")
6808 start_slice = None
6809 if start is not None:
6810 start_slice = self.get_slice_bound(start, "left")
6811 if start_slice is None:
6812 start_slice = 0
6814 end_slice = None
6815 if end is not None:
6816 end_slice = self.get_slice_bound(end, "right")
6817 if end_slice is None:
6818 end_slice = len(self)
6820 if not inc:
6821 # Bounds at this moment are swapped, swap them back and shift by 1.
6822 #
6823 # slice_locs('B', 'A', step=-1): s='B', e='A'
6824 #
6825 # s='A' e='B'
6826 # AFTER SWAP: | |
6827 # v ------------------> V
6828 # -----------------------------------
6829 # | | |A|A|A|A| | | | | |B|B| | | | |
6830 # -----------------------------------
6831 # ^ <------------------ ^
6832 # SHOULD BE: | |
6833 # end=s-1 start=e-1
6834 #
6835 end_slice, start_slice = start_slice - 1, end_slice - 1
6837 # i == -1 triggers ``len(self) + i`` selection that points to the
6838 # last element, not before-the-first one, subtracting len(self)
6839 # compensates that.
6840 if end_slice == -1:
6841 end_slice -= len(self)
6842 if start_slice == -1:
6843 start_slice -= len(self)
6845 return start_slice, end_slice
6847 def delete(self: _IndexT, loc) -> _IndexT:
6848 """
6849 Make new Index with passed location(-s) deleted.
6851 Parameters
6852 ----------
6853 loc : int or list of int
6854 Location of item(-s) which will be deleted.
6855 Use a list of locations to delete more than one value at the same time.
6857 Returns
6858 -------
6859 Index
6860 Will be same type as self, except for RangeIndex.
6862 See Also
6863 --------
6864 numpy.delete : Delete any rows and column from NumPy array (ndarray).
6866 Examples
6867 --------
6868 >>> idx = pd.Index(['a', 'b', 'c'])
6869 >>> idx.delete(1)
6870 Index(['a', 'c'], dtype='object')
6872 >>> idx = pd.Index(['a', 'b', 'c'])
6873 >>> idx.delete([0, 2])
6874 Index(['b'], dtype='object')
6875 """
6876 values = self._values
6877 res_values: ArrayLike
6878 if isinstance(values, np.ndarray):
6879 # TODO(__array_function__): special casing will be unnecessary
6880 res_values = np.delete(values, loc)
6881 else:
6882 res_values = values.delete(loc)
6884 # _constructor so RangeIndex->Int64Index
6885 return self._constructor._simple_new(res_values, name=self.name)
6887 def insert(self, loc: int, item) -> Index:
6888 """
6889 Make new Index inserting new item at location.
6891 Follows Python numpy.insert semantics for negative values.
6893 Parameters
6894 ----------
6895 loc : int
6896 item : object
6898 Returns
6899 -------
6900 new_index : Index
6901 """
6902 item = lib.item_from_zerodim(item)
6903 if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:
6904 item = self._na_value
6906 arr = self._values
6908 try:
6909 if isinstance(arr, ExtensionArray):
6910 res_values = arr.insert(loc, item)
6911 return type(self)._simple_new(res_values, name=self.name)
6912 else:
6913 item = self._validate_fill_value(item)
6914 except (TypeError, ValueError, LossySetitemError):
6915 # e.g. trying to insert an integer into a DatetimeIndex
6916 # We cannot keep the same dtype, so cast to the (often object)
6917 # minimal shared dtype before doing the insert.
6918 dtype = self._find_common_type_compat(item)
6919 return self.astype(dtype).insert(loc, item)
6921 if arr.dtype != object or not isinstance(
6922 item, (tuple, np.datetime64, np.timedelta64)
6923 ):
6924 # with object-dtype we need to worry about numpy incorrectly casting
6925 # dt64/td64 to integer, also about treating tuples as sequences
6926 # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550
6927 casted = arr.dtype.type(item)
6928 new_values = np.insert(arr, loc, casted)
6930 else:
6931 # error: No overload variant of "insert" matches argument types
6932 # "ndarray[Any, Any]", "int", "None"
6933 new_values = np.insert(arr, loc, None) # type: ignore[call-overload]
6934 loc = loc if loc >= 0 else loc - 1
6935 new_values[loc] = item
6937 if self._typ == "numericindex":
6938 # Use self._constructor instead of Index to retain NumericIndex GH#43921
6939 # TODO(2.0) can use Index instead of self._constructor
6940 return self._constructor._with_infer(new_values, name=self.name)
6941 else:
6942 return Index._with_infer(new_values, name=self.name)
6944 def drop(
6945 self,
6946 labels: Index | np.ndarray | Iterable[Hashable],
6947 errors: IgnoreRaise = "raise",
6948 ) -> Index:
6949 """
6950 Make new Index with passed list of labels deleted.
6952 Parameters
6953 ----------
6954 labels : array-like or scalar
6955 errors : {'ignore', 'raise'}, default 'raise'
6956 If 'ignore', suppress error and existing labels are dropped.
6958 Returns
6959 -------
6960 dropped : Index
6961 Will be same type as self, except for RangeIndex.
6963 Raises
6964 ------
6965 KeyError
6966 If not all of the labels are found in the selected axis
6967 """
6968 if not isinstance(labels, Index):
6969 # avoid materializing e.g. RangeIndex
6970 arr_dtype = "object" if self.dtype == "object" else None
6971 labels = com.index_labels_to_array(labels, dtype=arr_dtype)
6973 indexer = self.get_indexer_for(labels)
6974 mask = indexer == -1
6975 if mask.any():
6976 if errors != "ignore":
6977 raise KeyError(f"{list(labels[mask])} not found in axis")
6978 indexer = indexer[~mask]
6979 return self.delete(indexer)
6981 # --------------------------------------------------------------------
6982 # Generated Arithmetic, Comparison, and Unary Methods
6984 def _cmp_method(self, other, op):
6985 """
6986 Wrapper used to dispatch comparison operations.
6987 """
6988 if self.is_(other):
6989 # fastpath
6990 if op in {operator.eq, operator.le, operator.ge}:
6991 arr = np.ones(len(self), dtype=bool)
6992 if self._can_hold_na and not isinstance(self, ABCMultiIndex):
6993 # TODO: should set MultiIndex._can_hold_na = False?
6994 arr[self.isna()] = False
6995 return arr
6996 elif op is operator.ne:
6997 arr = np.zeros(len(self), dtype=bool)
6998 if self._can_hold_na and not isinstance(self, ABCMultiIndex):
6999 arr[self.isna()] = True
7000 return arr
7002 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len(
7003 self
7004 ) != len(other):
7005 raise ValueError("Lengths must match to compare")
7007 if not isinstance(other, ABCMultiIndex):
7008 other = extract_array(other, extract_numpy=True)
7009 else:
7010 other = np.asarray(other)
7012 if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):
7013 # e.g. PeriodArray, Categorical
7014 with np.errstate(all="ignore"):
7015 result = op(self._values, other)
7017 elif isinstance(self._values, ExtensionArray):
7018 result = op(self._values, other)
7020 elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):
7021 # don't pass MultiIndex
7022 with np.errstate(all="ignore"):
7023 result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)
7025 else:
7026 with np.errstate(all="ignore"):
7027 result = ops.comparison_op(self._values, other, op)
7029 return result
7031 def _construct_result(self, result, name):
7032 if isinstance(result, tuple):
7033 return (
7034 Index._with_infer(result[0], name=name),
7035 Index._with_infer(result[1], name=name),
7036 )
7037 return Index._with_infer(result, name=name)
7039 def _arith_method(self, other, op):
7040 if (
7041 isinstance(other, Index)
7042 and is_object_dtype(other.dtype)
7043 and type(other) is not Index
7044 ):
7045 # We return NotImplemented for object-dtype index *subclasses* so they have
7046 # a chance to implement ops before we unwrap them.
7047 # See https://github.com/pandas-dev/pandas/issues/31109
7048 return NotImplemented
7050 return super()._arith_method(other, op)
7052 @final
7053 def _unary_method(self, op):
7054 result = op(self._values)
7055 return Index(result, name=self.name)
7057 def __abs__(self) -> Index:
7058 return self._unary_method(operator.abs)
7060 def __neg__(self) -> Index:
7061 return self._unary_method(operator.neg)
7063 def __pos__(self) -> Index:
7064 return self._unary_method(operator.pos)
7066 def __invert__(self) -> Index:
7067 # GH#8875
7068 return self._unary_method(operator.inv)
7070 # --------------------------------------------------------------------
7071 # Reductions
7073 def any(self, *args, **kwargs):
7074 """
7075 Return whether any element is Truthy.
7077 Parameters
7078 ----------
7079 *args
7080 Required for compatibility with numpy.
7081 **kwargs
7082 Required for compatibility with numpy.
7084 Returns
7085 -------
7086 any : bool or array-like (if axis is specified)
7087 A single element array-like may be converted to bool.
7089 See Also
7090 --------
7091 Index.all : Return whether all elements are True.
7092 Series.all : Return whether all elements are True.
7094 Notes
7095 -----
7096 Not a Number (NaN), positive infinity and negative infinity
7097 evaluate to True because these are not equal to zero.
7099 Examples
7100 --------
7101 >>> index = pd.Index([0, 1, 2])
7102 >>> index.any()
7103 True
7105 >>> index = pd.Index([0, 0, 0])
7106 >>> index.any()
7107 False
7108 """
7109 nv.validate_any(args, kwargs)
7110 self._maybe_disable_logical_methods("any")
7111 # error: Argument 1 to "any" has incompatible type "ArrayLike"; expected
7112 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,
7113 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],
7114 # _SupportsArray]"
7115 return np.any(self.values) # type: ignore[arg-type]
7117 def all(self, *args, **kwargs):
7118 """
7119 Return whether all elements are Truthy.
7121 Parameters
7122 ----------
7123 *args
7124 Required for compatibility with numpy.
7125 **kwargs
7126 Required for compatibility with numpy.
7128 Returns
7129 -------
7130 all : bool or array-like (if axis is specified)
7131 A single element array-like may be converted to bool.
7133 See Also
7134 --------
7135 Index.any : Return whether any element in an Index is True.
7136 Series.any : Return whether any element in a Series is True.
7137 Series.all : Return whether all elements in a Series are True.
7139 Notes
7140 -----
7141 Not a Number (NaN), positive infinity and negative infinity
7142 evaluate to True because these are not equal to zero.
7144 Examples
7145 --------
7146 True, because nonzero integers are considered True.
7148 >>> pd.Index([1, 2, 3]).all()
7149 True
7151 False, because ``0`` is considered False.
7153 >>> pd.Index([0, 1, 2]).all()
7154 False
7155 """
7156 nv.validate_all(args, kwargs)
7157 self._maybe_disable_logical_methods("all")
7158 # error: Argument 1 to "all" has incompatible type "ArrayLike"; expected
7159 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,
7160 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],
7161 # _SupportsArray]"
7162 return np.all(self.values) # type: ignore[arg-type]
7164 @final
7165 def _maybe_disable_logical_methods(self, opname: str_t) -> None:
7166 """
7167 raise if this Index subclass does not support any or all.
7168 """
7169 if (
7170 isinstance(self, ABCMultiIndex)
7171 or needs_i8_conversion(self.dtype)
7172 or is_interval_dtype(self.dtype)
7173 or is_categorical_dtype(self.dtype)
7174 or is_float_dtype(self.dtype)
7175 ):
7176 # This call will raise
7177 make_invalid_op(opname)(self)
7179 @Appender(IndexOpsMixin.argmin.__doc__)
7180 def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int:
7181 nv.validate_argmin(args, kwargs)
7182 nv.validate_minmax_axis(axis)
7184 if not self._is_multi and self.hasnans:
7185 # Take advantage of cache
7186 mask = self._isnan
7187 if not skipna or mask.all():
7188 return -1
7189 return super().argmin(skipna=skipna)
7191 @Appender(IndexOpsMixin.argmax.__doc__)
7192 def argmax(self, axis=None, skipna=True, *args, **kwargs) -> int:
7193 nv.validate_argmax(args, kwargs)
7194 nv.validate_minmax_axis(axis)
7196 if not self._is_multi and self.hasnans:
7197 # Take advantage of cache
7198 mask = self._isnan
7199 if not skipna or mask.all():
7200 return -1
7201 return super().argmax(skipna=skipna)
7203 @doc(IndexOpsMixin.min)
7204 def min(self, axis=None, skipna=True, *args, **kwargs):
7205 nv.validate_min(args, kwargs)
7206 nv.validate_minmax_axis(axis)
7208 if not len(self):
7209 return self._na_value
7211 if len(self) and self.is_monotonic_increasing:
7212 # quick check
7213 first = self[0]
7214 if not isna(first):
7215 return first
7217 if not self._is_multi and self.hasnans:
7218 # Take advantage of cache
7219 mask = self._isnan
7220 if not skipna or mask.all():
7221 return self._na_value
7223 if not self._is_multi and not isinstance(self._values, np.ndarray):
7224 # "ExtensionArray" has no attribute "min"
7225 return self._values.min(skipna=skipna) # type: ignore[attr-defined]
7227 return super().min(skipna=skipna)
7229 @doc(IndexOpsMixin.max)
7230 def max(self, axis=None, skipna=True, *args, **kwargs):
7231 nv.validate_max(args, kwargs)
7232 nv.validate_minmax_axis(axis)
7234 if not len(self):
7235 return self._na_value
7237 if len(self) and self.is_monotonic_increasing:
7238 # quick check
7239 last = self[-1]
7240 if not isna(last):
7241 return last
7243 if not self._is_multi and self.hasnans:
7244 # Take advantage of cache
7245 mask = self._isnan
7246 if not skipna or mask.all():
7247 return self._na_value
7249 if not self._is_multi and not isinstance(self._values, np.ndarray):
7250 # "ExtensionArray" has no attribute "max"
7251 return self._values.max(skipna=skipna) # type: ignore[attr-defined]
7253 return super().max(skipna=skipna)
7255 # --------------------------------------------------------------------
7257 @final
7258 @property
7259 def shape(self) -> Shape:
7260 """
7261 Return a tuple of the shape of the underlying data.
7262 """
7263 # See GH#27775, GH#27384 for history/reasoning in how this is defined.
7264 return (len(self),)
7266 @final
7267 def _deprecated_arg(self, value, name: str_t, methodname: str_t) -> None:
7268 """
7269 Issue a FutureWarning if the arg/kwarg is not no_default.
7270 """
7271 if value is not no_default:
7272 warnings.warn(
7273 f"'{name}' argument in {methodname} is deprecated "
7274 "and will be removed in a future version. Do not pass it.",
7275 FutureWarning,
7276 stacklevel=find_stack_level(),
7277 )
7280def ensure_index_from_sequences(sequences, names=None) -> Index:
7281 """
7282 Construct an index from sequences of data.
7284 A single sequence returns an Index. Many sequences returns a
7285 MultiIndex.
7287 Parameters
7288 ----------
7289 sequences : sequence of sequences
7290 names : sequence of str
7292 Returns
7293 -------
7294 index : Index or MultiIndex
7296 Examples
7297 --------
7298 >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])
7299 Int64Index([1, 2, 3], dtype='int64', name='name')
7301 >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
7302 MultiIndex([('a', 'a'),
7303 ('a', 'b')],
7304 names=['L1', 'L2'])
7306 See Also
7307 --------
7308 ensure_index
7309 """
7310 from pandas.core.indexes.multi import MultiIndex
7312 if len(sequences) == 1:
7313 if names is not None:
7314 names = names[0]
7315 return Index._with_infer(sequences[0], name=names)
7316 else:
7317 return MultiIndex.from_arrays(sequences, names=names)
7320def ensure_index(index_like: Axes, copy: bool = False) -> Index:
7321 """
7322 Ensure that we have an index from some index-like object.
7324 Parameters
7325 ----------
7326 index_like : sequence
7327 An Index or other sequence
7328 copy : bool, default False
7330 Returns
7331 -------
7332 index : Index or MultiIndex
7334 See Also
7335 --------
7336 ensure_index_from_sequences
7338 Examples
7339 --------
7340 >>> ensure_index(['a', 'b'])
7341 Index(['a', 'b'], dtype='object')
7343 >>> ensure_index([('a', 'a'), ('b', 'c')])
7344 Index([('a', 'a'), ('b', 'c')], dtype='object')
7346 >>> ensure_index([['a', 'a'], ['b', 'c']])
7347 MultiIndex([('a', 'b'),
7348 ('a', 'c')],
7349 )
7350 """
7351 if isinstance(index_like, Index):
7352 if copy:
7353 index_like = index_like.copy()
7354 return index_like
7356 if isinstance(index_like, ABCSeries):
7357 name = index_like.name
7358 return Index._with_infer(index_like, name=name, copy=copy)
7360 if is_iterator(index_like):
7361 index_like = list(index_like)
7363 if isinstance(index_like, list):
7364 if type(index_like) is not list:
7365 # must check for exactly list here because of strict type
7366 # check in clean_index_list
7367 index_like = list(index_like)
7369 if len(index_like) and lib.is_all_arraylike(index_like):
7370 from pandas.core.indexes.multi import MultiIndex
7372 return MultiIndex.from_arrays(index_like)
7373 else:
7374 return Index._with_infer(index_like, copy=copy, tupleize_cols=False)
7375 else:
7376 return Index._with_infer(index_like, copy=copy)
7379def ensure_has_len(seq):
7380 """
7381 If seq is an iterator, put its values into a list.
7382 """
7383 try:
7384 len(seq)
7385 except TypeError:
7386 return list(seq)
7387 else:
7388 return seq
7391def trim_front(strings: list[str]) -> list[str]:
7392 """
7393 Trims zeros and decimal points.
7395 Examples
7396 --------
7397 >>> trim_front([" a", " b"])
7398 ['a', 'b']
7400 >>> trim_front([" a", " "])
7401 ['a', '']
7402 """
7403 if not strings:
7404 return strings
7405 while all(strings) and all(x[0] == " " for x in strings):
7406 strings = [x[1:] for x in strings]
7407 return strings
7410def _validate_join_method(method: str) -> None:
7411 if method not in ["left", "right", "inner", "outer"]:
7412 raise ValueError(f"do not recognize join method {method}")
7415def maybe_extract_name(name, obj, cls) -> Hashable:
7416 """
7417 If no name is passed, then extract it from data, validating hashability.
7418 """
7419 if name is None and isinstance(obj, (Index, ABCSeries)):
7420 # Note we don't just check for "name" attribute since that would
7421 # pick up e.g. dtype.name
7422 name = obj.name
7424 # GH#29069
7425 if not is_hashable(name):
7426 raise TypeError(f"{cls.__name__}.name must be a hashable type")
7428 return name
7431_cast_depr_msg = (
7432 "In a future version, passing an object-dtype arraylike to pd.Index will "
7433 "not infer numeric values to numeric dtype (matching the Series behavior). "
7434 "To retain the old behavior, explicitly pass the desired dtype or use the "
7435 "desired Index subclass"
7436)
7439def _maybe_cast_data_without_dtype(
7440 subarr: np.ndarray, cast_numeric_deprecated: bool = True
7441) -> ArrayLike:
7442 """
7443 If we have an arraylike input but no passed dtype, try to infer
7444 a supported dtype.
7446 Parameters
7447 ----------
7448 subarr : np.ndarray[object]
7449 cast_numeric_deprecated : bool, default True
7450 Whether to issue a FutureWarning when inferring numeric dtypes.
7452 Returns
7453 -------
7454 np.ndarray or ExtensionArray
7455 """
7457 result = lib.maybe_convert_objects(
7458 subarr,
7459 convert_datetime=True,
7460 convert_timedelta=True,
7461 convert_period=True,
7462 convert_interval=True,
7463 dtype_if_all_nat=np.dtype("datetime64[ns]"),
7464 )
7465 if result.dtype.kind in ["i", "u", "f"]:
7466 if not cast_numeric_deprecated:
7467 # i.e. we started with a list, not an ndarray[object]
7468 return result
7470 warnings.warn(
7471 "In a future version, the Index constructor will not infer numeric "
7472 "dtypes when passed object-dtype sequences (matching Series behavior)",
7473 FutureWarning,
7474 stacklevel=find_stack_level(),
7475 )
7476 result = ensure_wrapped_if_datetimelike(result)
7477 return result
7480def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:
7481 """
7482 Return common name if all indices agree, otherwise None (level-by-level).
7484 Parameters
7485 ----------
7486 indexes : list of Index objects
7488 Returns
7489 -------
7490 list
7491 A list representing the unanimous 'names' found.
7492 """
7493 name_tups = [tuple(i.names) for i in indexes]
7494 name_sets = [{*ns} for ns in zip_longest(*name_tups)]
7495 names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)
7496 return names
7499def unpack_nested_dtype(other: _IndexT) -> _IndexT:
7500 """
7501 When checking if our dtype is comparable with another, we need
7502 to unpack CategoricalDtype to look at its categories.dtype.
7504 Parameters
7505 ----------
7506 other : Index
7508 Returns
7509 -------
7510 Index
7511 """
7512 dtype = other.dtype
7513 if is_categorical_dtype(dtype):
7514 # If there is ever a SparseIndex, this could get dispatched
7515 # here too.
7516 # error: Item "dtype[Any]"/"ExtensionDtype" of "Union[dtype[Any],
7517 # ExtensionDtype]" has no attribute "categories"
7518 return dtype.categories # type: ignore[union-attr]
7519 return other
7522def _maybe_try_sort(result, sort):
7523 if sort is None:
7524 try:
7525 result = algos.safe_sort(result)
7526 except TypeError as err:
7527 warnings.warn(
7528 f"{err}, sort order is undefined for incomparable objects.",
7529 RuntimeWarning,
7530 stacklevel=find_stack_level(),
7531 )
7532 return result