Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/multi.py: 11%
1404 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from functools import wraps
4from sys import getsizeof
5from typing import (
6 TYPE_CHECKING,
7 Any,
8 Callable,
9 Collection,
10 Hashable,
11 Iterable,
12 List,
13 Literal,
14 Sequence,
15 Tuple,
16 cast,
17)
18import warnings
20import numpy as np
22from pandas._config import get_option
24from pandas._libs import (
25 algos as libalgos,
26 index as libindex,
27 lib,
28)
29from pandas._libs.hashtable import duplicated
30from pandas._typing import (
31 AnyArrayLike,
32 DtypeObj,
33 F,
34 Scalar,
35 Shape,
36 npt,
37)
38from pandas.compat.numpy import function as nv
39from pandas.errors import (
40 InvalidIndexError,
41 PerformanceWarning,
42 UnsortedIndexError,
43)
44from pandas.util._decorators import (
45 Appender,
46 cache_readonly,
47 deprecate_nonkeyword_arguments,
48 doc,
49)
50from pandas.util._exceptions import find_stack_level
52from pandas.core.dtypes.cast import coerce_indexer_dtype
53from pandas.core.dtypes.common import (
54 ensure_int64,
55 ensure_platform_int,
56 is_categorical_dtype,
57 is_extension_array_dtype,
58 is_hashable,
59 is_integer,
60 is_iterator,
61 is_list_like,
62 is_object_dtype,
63 is_scalar,
64 pandas_dtype,
65)
66from pandas.core.dtypes.dtypes import ExtensionDtype
67from pandas.core.dtypes.generic import (
68 ABCDataFrame,
69 ABCDatetimeIndex,
70 ABCTimedeltaIndex,
71)
72from pandas.core.dtypes.missing import (
73 array_equivalent,
74 isna,
75)
77import pandas.core.algorithms as algos
78from pandas.core.arrays import Categorical
79from pandas.core.arrays.categorical import factorize_from_iterables
80import pandas.core.common as com
81import pandas.core.indexes.base as ibase
82from pandas.core.indexes.base import (
83 Index,
84 _index_shared_docs,
85 ensure_index,
86 get_unanimous_names,
87)
88from pandas.core.indexes.frozen import FrozenList
89from pandas.core.ops.invalid import make_invalid_op
90from pandas.core.sorting import (
91 get_group_index,
92 indexer_from_factorized,
93 lexsort_indexer,
94)
96from pandas.io.formats.printing import pprint_thing
98if TYPE_CHECKING: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true
99 from pandas import (
100 CategoricalIndex,
101 DataFrame,
102 Series,
103 )
105_index_doc_kwargs = dict(ibase._index_doc_kwargs)
106_index_doc_kwargs.update(
107 {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"}
108)
111class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):
112 """
113 This class manages a MultiIndex by mapping label combinations to positive
114 integers.
115 """
117 _base = libindex.UInt64Engine
119 def _codes_to_ints(self, codes):
120 """
121 Transform combination(s) of uint64 in one uint64 (each), in a strictly
122 monotonic way (i.e. respecting the lexicographic order of integer
123 combinations): see BaseMultiIndexCodesEngine documentation.
125 Parameters
126 ----------
127 codes : 1- or 2-dimensional array of dtype uint64
128 Combinations of integers (one per row)
130 Returns
131 -------
132 scalar or 1-dimensional array, of dtype uint64
133 Integer(s) representing one combination (each).
134 """
135 # Shift the representation of each level by the pre-calculated number
136 # of bits:
137 codes <<= self.offsets
139 # Now sum and OR are in fact interchangeable. This is a simple
140 # composition of the (disjunct) significant bits of each level (i.e.
141 # each column in "codes") in a single positive integer:
142 if codes.ndim == 1:
143 # Single key
144 return np.bitwise_or.reduce(codes)
146 # Multiple keys
147 return np.bitwise_or.reduce(codes, axis=1)
150class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine):
151 """
152 This class manages those (extreme) cases in which the number of possible
153 label combinations overflows the 64 bits integers, and uses an ObjectEngine
154 containing Python integers.
155 """
157 _base = libindex.ObjectEngine
159 def _codes_to_ints(self, codes):
160 """
161 Transform combination(s) of uint64 in one Python integer (each), in a
162 strictly monotonic way (i.e. respecting the lexicographic order of
163 integer combinations): see BaseMultiIndexCodesEngine documentation.
165 Parameters
166 ----------
167 codes : 1- or 2-dimensional array of dtype uint64
168 Combinations of integers (one per row)
170 Returns
171 -------
172 int, or 1-dimensional array of dtype object
173 Integer(s) representing one combination (each).
174 """
175 # Shift the representation of each level by the pre-calculated number
176 # of bits. Since this can overflow uint64, first make sure we are
177 # working with Python integers:
178 codes = codes.astype("object") << self.offsets
180 # Now sum and OR are in fact interchangeable. This is a simple
181 # composition of the (disjunct) significant bits of each level (i.e.
182 # each column in "codes") in a single positive integer (per row):
183 if codes.ndim == 1:
184 # Single key
185 return np.bitwise_or.reduce(codes)
187 # Multiple keys
188 return np.bitwise_or.reduce(codes, axis=1)
191def names_compat(meth: F) -> F:
192 """
193 A decorator to allow either `name` or `names` keyword but not both.
195 This makes it easier to share code with base class.
196 """
198 @wraps(meth)
199 def new_meth(self_or_cls, *args, **kwargs):
200 if "name" in kwargs and "names" in kwargs:
201 raise TypeError("Can only provide one of `names` and `name`")
202 elif "name" in kwargs:
203 kwargs["names"] = kwargs.pop("name")
205 return meth(self_or_cls, *args, **kwargs)
207 return cast(F, new_meth)
210class MultiIndex(Index):
211 """
212 A multi-level, or hierarchical, index object for pandas objects.
214 Parameters
215 ----------
216 levels : sequence of arrays
217 The unique labels for each level.
218 codes : sequence of arrays
219 Integers for each level designating which label at each location.
220 sortorder : optional int
221 Level of sortedness (must be lexicographically sorted by that
222 level).
223 names : optional sequence of objects
224 Names for each of the index levels. (name is accepted for compat).
225 copy : bool, default False
226 Copy the meta-data.
227 verify_integrity : bool, default True
228 Check that the levels/codes are consistent and valid.
230 Attributes
231 ----------
232 names
233 levels
234 codes
235 nlevels
236 levshape
238 Methods
239 -------
240 from_arrays
241 from_tuples
242 from_product
243 from_frame
244 set_levels
245 set_codes
246 to_frame
247 to_flat_index
248 sortlevel
249 droplevel
250 swaplevel
251 reorder_levels
252 remove_unused_levels
253 get_locs
255 See Also
256 --------
257 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
258 MultiIndex.from_product : Create a MultiIndex from the cartesian product
259 of iterables.
260 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.
261 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
262 Index : The base pandas Index type.
264 Notes
265 -----
266 See the `user guide
267 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__
268 for more.
270 Examples
271 --------
272 A new ``MultiIndex`` is typically constructed using one of the helper
273 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`
274 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):
276 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
277 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
278 MultiIndex([(1, 'red'),
279 (1, 'blue'),
280 (2, 'red'),
281 (2, 'blue')],
282 names=['number', 'color'])
284 See further examples for how to construct a MultiIndex in the doc strings
285 of the mentioned helper methods.
286 """
288 _hidden_attrs = Index._hidden_attrs | frozenset()
290 # initialize to zero-length tuples to make everything work
291 _typ = "multiindex"
292 _names: list[Hashable | None] = []
293 _levels = FrozenList()
294 _codes = FrozenList()
295 _comparables = ["names"]
297 sortorder: int | None
299 # --------------------------------------------------------------------
300 # Constructors
302 def __new__(
303 cls,
304 levels=None,
305 codes=None,
306 sortorder=None,
307 names=None,
308 dtype=None,
309 copy=False,
310 name=None,
311 verify_integrity: bool = True,
312 ) -> MultiIndex:
314 # compat with Index
315 if name is not None:
316 names = name
317 if levels is None or codes is None:
318 raise TypeError("Must pass both levels and codes")
319 if len(levels) != len(codes):
320 raise ValueError("Length of levels and codes must be the same.")
321 if len(levels) == 0:
322 raise ValueError("Must pass non-zero number of levels/codes")
324 result = object.__new__(cls)
325 result._cache = {}
327 # we've already validated levels and codes, so shortcut here
328 result._set_levels(levels, copy=copy, validate=False)
329 result._set_codes(codes, copy=copy, validate=False)
331 result._names = [None] * len(levels)
332 if names is not None:
333 # handles name validation
334 result._set_names(names)
336 if sortorder is not None:
337 result.sortorder = int(sortorder)
338 else:
339 result.sortorder = sortorder
341 if verify_integrity:
342 new_codes = result._verify_integrity()
343 result._codes = new_codes
345 result._reset_identity()
347 return result
349 def _validate_codes(self, level: list, code: list):
350 """
351 Reassign code values as -1 if their corresponding levels are NaN.
353 Parameters
354 ----------
355 code : list
356 Code to reassign.
357 level : list
358 Level to check for missing values (NaN, NaT, None).
360 Returns
361 -------
362 new code where code value = -1 if it corresponds
363 to a level with missing values (NaN, NaT, None).
364 """
365 null_mask = isna(level)
366 if np.any(null_mask):
367 # error: Incompatible types in assignment
368 # (expression has type "ndarray[Any, dtype[Any]]",
369 # variable has type "List[Any]")
370 code = np.where(null_mask[code], -1, code) # type: ignore[assignment]
371 return code
373 def _verify_integrity(self, codes: list | None = None, levels: list | None = None):
374 """
375 Parameters
376 ----------
377 codes : optional list
378 Codes to check for validity. Defaults to current codes.
379 levels : optional list
380 Levels to check for validity. Defaults to current levels.
382 Raises
383 ------
384 ValueError
385 If length of levels and codes don't match, if the codes for any
386 level would exceed level bounds, or there are any duplicate levels.
388 Returns
389 -------
390 new codes where code value = -1 if it corresponds to a
391 NaN level.
392 """
393 # NOTE: Currently does not check, among other things, that cached
394 # nlevels matches nor that sortorder matches actually sortorder.
395 codes = codes or self.codes
396 levels = levels or self.levels
398 if len(levels) != len(codes):
399 raise ValueError(
400 "Length of levels and codes must match. NOTE: "
401 "this index is in an inconsistent state."
402 )
403 codes_length = len(codes[0])
404 for i, (level, level_codes) in enumerate(zip(levels, codes)):
405 if len(level_codes) != codes_length:
406 raise ValueError(
407 f"Unequal code lengths: {[len(code_) for code_ in codes]}"
408 )
409 if len(level_codes) and level_codes.max() >= len(level):
410 raise ValueError(
411 f"On level {i}, code max ({level_codes.max()}) >= length of "
412 f"level ({len(level)}). NOTE: this index is in an "
413 "inconsistent state"
414 )
415 if len(level_codes) and level_codes.min() < -1:
416 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1")
417 if not level.is_unique:
418 raise ValueError(
419 f"Level values must be unique: {list(level)} on level {i}"
420 )
421 if self.sortorder is not None:
422 if self.sortorder > _lexsort_depth(self.codes, self.nlevels):
423 raise ValueError(
424 "Value for sortorder must be inferior or equal to actual "
425 f"lexsort_depth: sortorder {self.sortorder} "
426 f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}"
427 )
429 codes = [
430 self._validate_codes(level, code) for level, code in zip(levels, codes)
431 ]
432 new_codes = FrozenList(codes)
433 return new_codes
435 @classmethod
436 def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex:
437 """
438 Convert arrays to MultiIndex.
440 Parameters
441 ----------
442 arrays : list / sequence of array-likes
443 Each array-like gives one level's value for each data point.
444 len(arrays) is the number of levels.
445 sortorder : int or None
446 Level of sortedness (must be lexicographically sorted by that
447 level).
448 names : list / sequence of str, optional
449 Names for the levels in the index.
451 Returns
452 -------
453 MultiIndex
455 See Also
456 --------
457 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
458 MultiIndex.from_product : Make a MultiIndex from cartesian product
459 of iterables.
460 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
462 Examples
463 --------
464 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
465 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
466 MultiIndex([(1, 'red'),
467 (1, 'blue'),
468 (2, 'red'),
469 (2, 'blue')],
470 names=['number', 'color'])
471 """
472 error_msg = "Input must be a list / sequence of array-likes."
473 if not is_list_like(arrays):
474 raise TypeError(error_msg)
475 elif is_iterator(arrays):
476 arrays = list(arrays)
478 # Check if elements of array are list-like
479 for array in arrays:
480 if not is_list_like(array):
481 raise TypeError(error_msg)
483 # Check if lengths of all arrays are equal or not,
484 # raise ValueError, if not
485 for i in range(1, len(arrays)):
486 if len(arrays[i]) != len(arrays[i - 1]):
487 raise ValueError("all arrays must be same length")
489 codes, levels = factorize_from_iterables(arrays)
490 if names is lib.no_default:
491 names = [getattr(arr, "name", None) for arr in arrays]
493 return cls(
494 levels=levels,
495 codes=codes,
496 sortorder=sortorder,
497 names=names,
498 verify_integrity=False,
499 )
501 @classmethod
502 @names_compat
503 def from_tuples(
504 cls,
505 tuples: Iterable[tuple[Hashable, ...]],
506 sortorder: int | None = None,
507 names: Sequence[Hashable] | Hashable | None = None,
508 ) -> MultiIndex:
509 """
510 Convert list of tuples to MultiIndex.
512 Parameters
513 ----------
514 tuples : list / sequence of tuple-likes
515 Each tuple is the index of one row/column.
516 sortorder : int or None
517 Level of sortedness (must be lexicographically sorted by that
518 level).
519 names : list / sequence of str, optional
520 Names for the levels in the index.
522 Returns
523 -------
524 MultiIndex
526 See Also
527 --------
528 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
529 MultiIndex.from_product : Make a MultiIndex from cartesian product
530 of iterables.
531 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
533 Examples
534 --------
535 >>> tuples = [(1, 'red'), (1, 'blue'),
536 ... (2, 'red'), (2, 'blue')]
537 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
538 MultiIndex([(1, 'red'),
539 (1, 'blue'),
540 (2, 'red'),
541 (2, 'blue')],
542 names=['number', 'color'])
543 """
544 if not is_list_like(tuples):
545 raise TypeError("Input must be a list / sequence of tuple-likes.")
546 elif is_iterator(tuples):
547 tuples = list(tuples)
548 tuples = cast(Collection[Tuple[Hashable, ...]], tuples)
550 # handling the empty tuple cases
551 if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples):
552 codes = [np.zeros(len(tuples))]
553 levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))]
554 return cls(
555 levels=levels,
556 codes=codes,
557 sortorder=sortorder,
558 names=names,
559 verify_integrity=False,
560 )
562 arrays: list[Sequence[Hashable]]
563 if len(tuples) == 0:
564 if names is None:
565 raise TypeError("Cannot infer number of levels from empty list")
566 # error: Argument 1 to "len" has incompatible type "Hashable";
567 # expected "Sized"
568 arrays = [[]] * len(names) # type: ignore[arg-type]
569 elif isinstance(tuples, (np.ndarray, Index)):
570 if isinstance(tuples, Index):
571 tuples = np.asarray(tuples._values)
573 arrays = list(lib.tuples_to_object_array(tuples).T)
574 elif isinstance(tuples, list):
575 arrays = list(lib.to_object_array_tuples(tuples).T)
576 else:
577 arrs = zip(*tuples)
578 arrays = cast(List[Sequence[Hashable]], arrs)
580 return cls.from_arrays(arrays, sortorder=sortorder, names=names)
582 @classmethod
583 def from_product(
584 cls,
585 iterables: Sequence[Iterable[Hashable]],
586 sortorder: int | None = None,
587 names: Sequence[Hashable] | lib.NoDefault = lib.no_default,
588 ) -> MultiIndex:
589 """
590 Make a MultiIndex from the cartesian product of multiple iterables.
592 Parameters
593 ----------
594 iterables : list / sequence of iterables
595 Each iterable has unique labels for each level of the index.
596 sortorder : int or None
597 Level of sortedness (must be lexicographically sorted by that
598 level).
599 names : list / sequence of str, optional
600 Names for the levels in the index.
602 .. versionchanged:: 1.0.0
604 If not explicitly provided, names will be inferred from the
605 elements of iterables if an element has a name attribute
607 Returns
608 -------
609 MultiIndex
611 See Also
612 --------
613 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
614 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
615 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
617 Examples
618 --------
619 >>> numbers = [0, 1, 2]
620 >>> colors = ['green', 'purple']
621 >>> pd.MultiIndex.from_product([numbers, colors],
622 ... names=['number', 'color'])
623 MultiIndex([(0, 'green'),
624 (0, 'purple'),
625 (1, 'green'),
626 (1, 'purple'),
627 (2, 'green'),
628 (2, 'purple')],
629 names=['number', 'color'])
630 """
631 from pandas.core.reshape.util import cartesian_product
633 if not is_list_like(iterables):
634 raise TypeError("Input must be a list / sequence of iterables.")
635 elif is_iterator(iterables):
636 iterables = list(iterables)
638 codes, levels = factorize_from_iterables(iterables)
639 if names is lib.no_default:
640 names = [getattr(it, "name", None) for it in iterables]
642 # codes are all ndarrays, so cartesian_product is lossless
643 codes = cartesian_product(codes)
644 return cls(levels, codes, sortorder=sortorder, names=names)
646 @classmethod
647 def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex:
648 """
649 Make a MultiIndex from a DataFrame.
651 Parameters
652 ----------
653 df : DataFrame
654 DataFrame to be converted to MultiIndex.
655 sortorder : int, optional
656 Level of sortedness (must be lexicographically sorted by that
657 level).
658 names : list-like, optional
659 If no names are provided, use the column names, or tuple of column
660 names if the columns is a MultiIndex. If a sequence, overwrite
661 names with the given sequence.
663 Returns
664 -------
665 MultiIndex
666 The MultiIndex representation of the given DataFrame.
668 See Also
669 --------
670 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
671 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
672 MultiIndex.from_product : Make a MultiIndex from cartesian product
673 of iterables.
675 Examples
676 --------
677 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],
678 ... ['NJ', 'Temp'], ['NJ', 'Precip']],
679 ... columns=['a', 'b'])
680 >>> df
681 a b
682 0 HI Temp
683 1 HI Precip
684 2 NJ Temp
685 3 NJ Precip
687 >>> pd.MultiIndex.from_frame(df)
688 MultiIndex([('HI', 'Temp'),
689 ('HI', 'Precip'),
690 ('NJ', 'Temp'),
691 ('NJ', 'Precip')],
692 names=['a', 'b'])
694 Using explicit names, instead of the column names
696 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])
697 MultiIndex([('HI', 'Temp'),
698 ('HI', 'Precip'),
699 ('NJ', 'Temp'),
700 ('NJ', 'Precip')],
701 names=['state', 'observation'])
702 """
703 if not isinstance(df, ABCDataFrame):
704 raise TypeError("Input must be a DataFrame")
706 column_names, columns = zip(*df.items())
707 names = column_names if names is None else names
708 return cls.from_arrays(columns, sortorder=sortorder, names=names)
710 # --------------------------------------------------------------------
712 @cache_readonly
713 def _values(self) -> np.ndarray:
714 # We override here, since our parent uses _data, which we don't use.
715 values = []
717 for i in range(self.nlevels):
718 index = self.levels[i]
719 codes = self.codes[i]
721 vals = index
722 if is_categorical_dtype(vals.dtype):
723 vals = cast("CategoricalIndex", vals)
724 vals = vals._data._internal_get_values()
726 is_dti = isinstance(vals, ABCDatetimeIndex)
728 if is_dti:
729 # TODO: this can be removed after Timestamp.freq is removed
730 # The astype(object) below does not remove the freq from
731 # the underlying Timestamps so we remove it here to match
732 # the behavior of self._get_level_values
733 vals = algos.take_nd(vals, codes, fill_value=index._na_value)
735 if isinstance(vals.dtype, ExtensionDtype) or isinstance(
736 vals, (ABCDatetimeIndex, ABCTimedeltaIndex)
737 ):
738 vals = vals.astype(object)
740 vals = np.array(vals, copy=False)
741 if not is_dti:
742 vals = algos.take_nd(vals, codes, fill_value=index._na_value)
743 values.append(vals)
745 arr = lib.fast_zip(values)
746 return arr
748 @property
749 def values(self) -> np.ndarray:
750 return self._values
752 @property
753 def array(self):
754 """
755 Raises a ValueError for `MultiIndex` because there's no single
756 array backing a MultiIndex.
758 Raises
759 ------
760 ValueError
761 """
762 raise ValueError(
763 "MultiIndex has no single backing array. Use "
764 "'MultiIndex.to_numpy()' to get a NumPy array of tuples."
765 )
767 @cache_readonly
768 def dtypes(self) -> Series:
769 """
770 Return the dtypes as a Series for the underlying MultiIndex.
771 """
772 from pandas import Series
774 names = com.fill_missing_names([level.name for level in self.levels])
775 return Series([level.dtype for level in self.levels], index=Index(names))
777 def __len__(self) -> int:
778 return len(self.codes[0])
780 # --------------------------------------------------------------------
781 # Levels Methods
783 @cache_readonly
784 def levels(self) -> FrozenList:
785 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly
786 # create new IndexEngine
787 # https://github.com/pandas-dev/pandas/issues/31648
788 result = [x._rename(name=name) for x, name in zip(self._levels, self._names)]
789 for level in result:
790 # disallow midx.levels[0].name = "foo"
791 level._no_setting_name = True
792 return FrozenList(result)
794 def _set_levels(
795 self,
796 levels,
797 *,
798 level=None,
799 copy: bool = False,
800 validate: bool = True,
801 verify_integrity: bool = False,
802 ) -> None:
803 # This is NOT part of the levels property because it should be
804 # externally not allowed to set levels. User beware if you change
805 # _levels directly
806 if validate:
807 if len(levels) == 0:
808 raise ValueError("Must set non-zero number of levels.")
809 if level is None and len(levels) != self.nlevels:
810 raise ValueError("Length of levels must match number of levels.")
811 if level is not None and len(levels) != len(level):
812 raise ValueError("Length of levels must match length of level.")
814 if level is None:
815 new_levels = FrozenList(
816 ensure_index(lev, copy=copy)._view() for lev in levels
817 )
818 else:
819 level_numbers = [self._get_level_number(lev) for lev in level]
820 new_levels_list = list(self._levels)
821 for lev_num, lev in zip(level_numbers, levels):
822 new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view()
823 new_levels = FrozenList(new_levels_list)
825 if verify_integrity:
826 new_codes = self._verify_integrity(levels=new_levels)
827 self._codes = new_codes
829 names = self.names
830 self._levels = new_levels
831 if any(names):
832 self._set_names(names)
834 self._reset_cache()
836 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "levels"])
837 def set_levels(
838 self, levels, level=None, inplace=None, verify_integrity: bool = True
839 ):
840 """
841 Set new levels on MultiIndex. Defaults to returning new index.
843 Parameters
844 ----------
845 levels : sequence or list of sequence
846 New level(s) to apply.
847 level : int, level name, or sequence of int/level names (default None)
848 Level(s) to set (None for all levels).
849 inplace : bool
850 If True, mutates in place.
852 .. deprecated:: 1.2.0
853 verify_integrity : bool, default True
854 If True, checks that levels and codes are compatible.
856 Returns
857 -------
858 new index (of same type and class...etc) or None
859 The same type as the caller or None if ``inplace=True``.
861 Examples
862 --------
863 >>> idx = pd.MultiIndex.from_tuples(
864 ... [
865 ... (1, "one"),
866 ... (1, "two"),
867 ... (2, "one"),
868 ... (2, "two"),
869 ... (3, "one"),
870 ... (3, "two")
871 ... ],
872 ... names=["foo", "bar"]
873 ... )
874 >>> idx
875 MultiIndex([(1, 'one'),
876 (1, 'two'),
877 (2, 'one'),
878 (2, 'two'),
879 (3, 'one'),
880 (3, 'two')],
881 names=['foo', 'bar'])
883 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]])
884 MultiIndex([('a', 1),
885 ('a', 2),
886 ('b', 1),
887 ('b', 2),
888 ('c', 1),
889 ('c', 2)],
890 names=['foo', 'bar'])
891 >>> idx.set_levels(['a', 'b', 'c'], level=0)
892 MultiIndex([('a', 'one'),
893 ('a', 'two'),
894 ('b', 'one'),
895 ('b', 'two'),
896 ('c', 'one'),
897 ('c', 'two')],
898 names=['foo', 'bar'])
899 >>> idx.set_levels(['a', 'b'], level='bar')
900 MultiIndex([(1, 'a'),
901 (1, 'b'),
902 (2, 'a'),
903 (2, 'b'),
904 (3, 'a'),
905 (3, 'b')],
906 names=['foo', 'bar'])
908 If any of the levels passed to ``set_levels()`` exceeds the
909 existing length, all of the values from that argument will
910 be stored in the MultiIndex levels, though the values will
911 be truncated in the MultiIndex output.
913 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1])
914 MultiIndex([('a', 1),
915 ('a', 2),
916 ('b', 1),
917 ('b', 2),
918 ('c', 1),
919 ('c', 2)],
920 names=['foo', 'bar'])
921 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels
922 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]])
923 """
924 if inplace is not None:
925 warnings.warn(
926 "inplace is deprecated and will be removed in a future version.",
927 FutureWarning,
928 stacklevel=find_stack_level(),
929 )
930 else:
931 inplace = False
933 if is_list_like(levels) and not isinstance(levels, Index):
934 levels = list(levels)
936 level, levels = _require_listlike(level, levels, "Levels")
938 if inplace:
939 idx = self
940 else:
941 idx = self._view()
942 idx._reset_identity()
943 idx._set_levels(
944 levels, level=level, validate=True, verify_integrity=verify_integrity
945 )
946 if not inplace:
947 return idx
949 @property
950 def nlevels(self) -> int:
951 """
952 Integer number of levels in this MultiIndex.
954 Examples
955 --------
956 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
957 >>> mi
958 MultiIndex([('a', 'b', 'c')],
959 )
960 >>> mi.nlevels
961 3
962 """
963 return len(self._levels)
965 @property
966 def levshape(self) -> Shape:
967 """
968 A tuple with the length of each level.
970 Examples
971 --------
972 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
973 >>> mi
974 MultiIndex([('a', 'b', 'c')],
975 )
976 >>> mi.levshape
977 (1, 1, 1)
978 """
979 return tuple(len(x) for x in self.levels)
981 # --------------------------------------------------------------------
982 # Codes Methods
984 @property
985 def codes(self):
986 return self._codes
988 def _set_codes(
989 self,
990 codes,
991 *,
992 level=None,
993 copy: bool = False,
994 validate: bool = True,
995 verify_integrity: bool = False,
996 ) -> None:
997 if validate:
998 if level is None and len(codes) != self.nlevels:
999 raise ValueError("Length of codes must match number of levels")
1000 if level is not None and len(codes) != len(level):
1001 raise ValueError("Length of codes must match length of levels.")
1003 if level is None:
1004 new_codes = FrozenList(
1005 _coerce_indexer_frozen(level_codes, lev, copy=copy).view()
1006 for lev, level_codes in zip(self._levels, codes)
1007 )
1008 else:
1009 level_numbers = [self._get_level_number(lev) for lev in level]
1010 new_codes_list = list(self._codes)
1011 for lev_num, level_codes in zip(level_numbers, codes):
1012 lev = self.levels[lev_num]
1013 new_codes_list[lev_num] = _coerce_indexer_frozen(
1014 level_codes, lev, copy=copy
1015 )
1016 new_codes = FrozenList(new_codes_list)
1018 if verify_integrity:
1019 new_codes = self._verify_integrity(codes=new_codes)
1021 self._codes = new_codes
1023 self._reset_cache()
1025 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "codes"])
1026 def set_codes(self, codes, level=None, inplace=None, verify_integrity: bool = True):
1027 """
1028 Set new codes on MultiIndex. Defaults to returning new index.
1030 Parameters
1031 ----------
1032 codes : sequence or list of sequence
1033 New codes to apply.
1034 level : int, level name, or sequence of int/level names (default None)
1035 Level(s) to set (None for all levels).
1036 inplace : bool
1037 If True, mutates in place.
1039 .. deprecated:: 1.2.0
1040 verify_integrity : bool, default True
1041 If True, checks that levels and codes are compatible.
1043 Returns
1044 -------
1045 new index (of same type and class...etc) or None
1046 The same type as the caller or None if ``inplace=True``.
1048 Examples
1049 --------
1050 >>> idx = pd.MultiIndex.from_tuples(
1051 ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"]
1052 ... )
1053 >>> idx
1054 MultiIndex([(1, 'one'),
1055 (1, 'two'),
1056 (2, 'one'),
1057 (2, 'two')],
1058 names=['foo', 'bar'])
1060 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]])
1061 MultiIndex([(2, 'one'),
1062 (1, 'one'),
1063 (2, 'two'),
1064 (1, 'two')],
1065 names=['foo', 'bar'])
1066 >>> idx.set_codes([1, 0, 1, 0], level=0)
1067 MultiIndex([(2, 'one'),
1068 (1, 'two'),
1069 (2, 'one'),
1070 (1, 'two')],
1071 names=['foo', 'bar'])
1072 >>> idx.set_codes([0, 0, 1, 1], level='bar')
1073 MultiIndex([(1, 'one'),
1074 (1, 'one'),
1075 (2, 'two'),
1076 (2, 'two')],
1077 names=['foo', 'bar'])
1078 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1])
1079 MultiIndex([(2, 'one'),
1080 (1, 'one'),
1081 (2, 'two'),
1082 (1, 'two')],
1083 names=['foo', 'bar'])
1084 """
1085 if inplace is not None:
1086 warnings.warn(
1087 "inplace is deprecated and will be removed in a future version.",
1088 FutureWarning,
1089 stacklevel=find_stack_level(),
1090 )
1091 else:
1092 inplace = False
1094 level, codes = _require_listlike(level, codes, "Codes")
1096 if inplace:
1097 idx = self
1098 else:
1099 idx = self._view()
1100 idx._reset_identity()
1101 idx._set_codes(codes, level=level, verify_integrity=verify_integrity)
1102 if not inplace:
1103 return idx
1105 # --------------------------------------------------------------------
1106 # Index Internals
1108 @cache_readonly
1109 def _engine(self):
1110 # Calculate the number of bits needed to represent labels in each
1111 # level, as log2 of their sizes (including -1 for NaN):
1112 sizes = np.ceil(np.log2([len(level) + 1 for level in self.levels]))
1114 # Sum bit counts, starting from the _right_....
1115 lev_bits = np.cumsum(sizes[::-1])[::-1]
1117 # ... in order to obtain offsets such that sorting the combination of
1118 # shifted codes (one for each level, resulting in a unique integer) is
1119 # equivalent to sorting lexicographically the codes themselves. Notice
1120 # that each level needs to be shifted by the number of bits needed to
1121 # represent the _previous_ ones:
1122 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64")
1124 # Check the total number of bits needed for our representation:
1125 if lev_bits[0] > 64:
1126 # The levels would overflow a 64 bit uint - use Python integers:
1127 return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
1128 return MultiIndexUIntEngine(self.levels, self.codes, offsets)
1130 # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return
1131 # type "Type[MultiIndex]" in supertype "Index"
1132 @property
1133 def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override]
1134 return type(self).from_tuples
1136 @doc(Index._shallow_copy)
1137 def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex:
1138 names = name if name is not lib.no_default else self.names
1140 return type(self).from_tuples(values, sortorder=None, names=names)
1142 def _view(self) -> MultiIndex:
1143 result = type(self)(
1144 levels=self.levels,
1145 codes=self.codes,
1146 sortorder=self.sortorder,
1147 names=self.names,
1148 verify_integrity=False,
1149 )
1150 result._cache = self._cache.copy()
1151 result._cache.pop("levels", None) # GH32669
1152 return result
1154 # --------------------------------------------------------------------
1156 def copy(
1157 self,
1158 names=None,
1159 dtype=None,
1160 levels=None,
1161 codes=None,
1162 deep=False,
1163 name=None,
1164 ):
1165 """
1166 Make a copy of this object. Names, dtype, levels and codes can be
1167 passed and will be set on new copy.
1169 Parameters
1170 ----------
1171 names : sequence, optional
1172 dtype : numpy dtype or pandas type, optional
1174 .. deprecated:: 1.2.0
1175 levels : sequence, optional
1177 .. deprecated:: 1.2.0
1178 codes : sequence, optional
1180 .. deprecated:: 1.2.0
1181 deep : bool, default False
1182 name : Label
1183 Kept for compatibility with 1-dimensional Index. Should not be used.
1185 Returns
1186 -------
1187 MultiIndex
1189 Notes
1190 -----
1191 In most cases, there should be no functional difference from using
1192 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
1193 This could be potentially expensive on large MultiIndex objects.
1194 """
1195 names = self._validate_names(name=name, names=names, deep=deep)
1196 keep_id = not deep
1197 if levels is not None:
1198 warnings.warn(
1199 "parameter levels is deprecated and will be removed in a future "
1200 "version. Use the set_levels method instead.",
1201 FutureWarning,
1202 stacklevel=find_stack_level(),
1203 )
1204 keep_id = False
1205 if codes is not None:
1206 warnings.warn(
1207 "parameter codes is deprecated and will be removed in a future "
1208 "version. Use the set_codes method instead.",
1209 FutureWarning,
1210 stacklevel=find_stack_level(),
1211 )
1212 keep_id = False
1214 if deep:
1215 from copy import deepcopy
1217 if levels is None:
1218 levels = deepcopy(self.levels)
1219 if codes is None:
1220 codes = deepcopy(self.codes)
1222 levels = levels if levels is not None else self.levels
1223 codes = codes if codes is not None else self.codes
1225 new_index = type(self)(
1226 levels=levels,
1227 codes=codes,
1228 sortorder=self.sortorder,
1229 names=names,
1230 verify_integrity=False,
1231 )
1232 new_index._cache = self._cache.copy()
1233 new_index._cache.pop("levels", None) # GH32669
1234 if keep_id:
1235 new_index._id = self._id
1237 if dtype:
1238 warnings.warn(
1239 "parameter dtype is deprecated and will be removed in a future "
1240 "version. Use the astype method instead.",
1241 FutureWarning,
1242 stacklevel=find_stack_level(),
1243 )
1244 new_index = new_index.astype(dtype)
1245 return new_index
1247 def __array__(self, dtype=None) -> np.ndarray:
1248 """the array interface, return my values"""
1249 return self.values
1251 def view(self, cls=None):
1252 """this is defined as a copy with the same identity"""
1253 result = self.copy()
1254 result._id = self._id
1255 return result
1257 @doc(Index.__contains__)
1258 def __contains__(self, key: Any) -> bool:
1259 hash(key)
1260 try:
1261 self.get_loc(key)
1262 return True
1263 except (LookupError, TypeError, ValueError):
1264 return False
1266 @cache_readonly
1267 def dtype(self) -> np.dtype:
1268 return np.dtype("O")
1270 def _is_memory_usage_qualified(self) -> bool:
1271 """return a boolean if we need a qualified .info display"""
1273 def f(level):
1274 return "mixed" in level or "string" in level or "unicode" in level
1276 return any(f(level) for level in self._inferred_type_levels)
1278 @doc(Index.memory_usage)
1279 def memory_usage(self, deep: bool = False) -> int:
1280 # we are overwriting our base class to avoid
1281 # computing .values here which could materialize
1282 # a tuple representation unnecessarily
1283 return self._nbytes(deep)
1285 @cache_readonly
1286 def nbytes(self) -> int:
1287 """return the number of bytes in the underlying data"""
1288 return self._nbytes(False)
1290 def _nbytes(self, deep: bool = False) -> int:
1291 """
1292 return the number of bytes in the underlying data
1293 deeply introspect the level data if deep=True
1295 include the engine hashtable
1297 *this is in internal routine*
1299 """
1300 # for implementations with no useful getsizeof (PyPy)
1301 objsize = 24
1303 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)
1304 label_nbytes = sum(i.nbytes for i in self.codes)
1305 names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
1306 result = level_nbytes + label_nbytes + names_nbytes
1308 # include our engine hashtable
1309 result += self._engine.sizeof(deep=deep)
1310 return result
1312 # --------------------------------------------------------------------
1313 # Rendering Methods
1315 def _formatter_func(self, tup):
1316 """
1317 Formats each item in tup according to its level's formatter function.
1318 """
1319 formatter_funcs = [level._formatter_func for level in self.levels]
1320 return tuple(func(val) for func, val in zip(formatter_funcs, tup))
1322 def _format_native_types(
1323 self, *, na_rep="nan", **kwargs
1324 ) -> npt.NDArray[np.object_]:
1325 new_levels = []
1326 new_codes = []
1328 # go through the levels and format them
1329 for level, level_codes in zip(self.levels, self.codes):
1330 level_strs = level._format_native_types(na_rep=na_rep, **kwargs)
1331 # add nan values, if there are any
1332 mask = level_codes == -1
1333 if mask.any():
1334 nan_index = len(level_strs)
1335 # numpy 1.21 deprecated implicit string casting
1336 level_strs = level_strs.astype(str)
1337 level_strs = np.append(level_strs, na_rep)
1338 assert not level_codes.flags.writeable # i.e. copy is needed
1339 level_codes = level_codes.copy() # make writeable
1340 level_codes[mask] = nan_index
1341 new_levels.append(level_strs)
1342 new_codes.append(level_codes)
1344 if len(new_levels) == 1:
1345 # a single-level multi-index
1346 return Index(new_levels[0].take(new_codes[0]))._format_native_types()
1347 else:
1348 # reconstruct the multi-index
1349 mi = MultiIndex(
1350 levels=new_levels,
1351 codes=new_codes,
1352 names=self.names,
1353 sortorder=self.sortorder,
1354 verify_integrity=False,
1355 )
1356 return mi._values
1358 def format(
1359 self,
1360 name: bool | None = None,
1361 formatter: Callable | None = None,
1362 na_rep: str | None = None,
1363 names: bool = False,
1364 space: int = 2,
1365 sparsify=None,
1366 adjoin: bool = True,
1367 ) -> list:
1368 if name is not None:
1369 names = name
1371 if len(self) == 0:
1372 return []
1374 stringified_levels = []
1375 for lev, level_codes in zip(self.levels, self.codes):
1376 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype)
1378 if len(lev) > 0:
1380 formatted = lev.take(level_codes).format(formatter=formatter)
1382 # we have some NA
1383 mask = level_codes == -1
1384 if mask.any():
1385 formatted = np.array(formatted, dtype=object)
1386 formatted[mask] = na
1387 formatted = formatted.tolist()
1389 else:
1390 # weird all NA case
1391 formatted = [
1392 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))
1393 for x in algos.take_nd(lev._values, level_codes)
1394 ]
1395 stringified_levels.append(formatted)
1397 result_levels = []
1398 for lev, lev_name in zip(stringified_levels, self.names):
1399 level = []
1401 if names:
1402 level.append(
1403 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))
1404 if lev_name is not None
1405 else ""
1406 )
1408 level.extend(np.array(lev, dtype=object))
1409 result_levels.append(level)
1411 if sparsify is None:
1412 sparsify = get_option("display.multi_sparse")
1414 if sparsify:
1415 sentinel: Literal[""] | bool | lib.NoDefault = ""
1416 # GH3547 use value of sparsify as sentinel if it's "Falsey"
1417 assert isinstance(sparsify, bool) or sparsify is lib.no_default
1418 if sparsify in [False, lib.no_default]:
1419 sentinel = sparsify
1420 # little bit of a kludge job for #1217
1421 result_levels = sparsify_labels(
1422 result_levels, start=int(names), sentinel=sentinel
1423 )
1425 if adjoin:
1426 from pandas.io.formats.format import get_adjustment
1428 adj = get_adjustment()
1429 return adj.adjoin(space, *result_levels).split("\n")
1430 else:
1431 return result_levels
1433 # --------------------------------------------------------------------
1434 # Names Methods
1436 def _get_names(self) -> FrozenList:
1437 return FrozenList(self._names)
1439 def _set_names(self, names, *, level=None, validate: bool = True):
1440 """
1441 Set new names on index. Each name has to be a hashable type.
1443 Parameters
1444 ----------
1445 values : str or sequence
1446 name(s) to set
1447 level : int, level name, or sequence of int/level names (default None)
1448 If the index is a MultiIndex (hierarchical), level(s) to set (None
1449 for all levels). Otherwise level must be None
1450 validate : bool, default True
1451 validate that the names match level lengths
1453 Raises
1454 ------
1455 TypeError if each name is not hashable.
1457 Notes
1458 -----
1459 sets names on levels. WARNING: mutates!
1461 Note that you generally want to set this *after* changing levels, so
1462 that it only acts on copies
1463 """
1464 # GH 15110
1465 # Don't allow a single string for names in a MultiIndex
1466 if names is not None and not is_list_like(names):
1467 raise ValueError("Names should be list-like for a MultiIndex")
1468 names = list(names)
1470 if validate:
1471 if level is not None and len(names) != len(level):
1472 raise ValueError("Length of names must match length of level.")
1473 if level is None and len(names) != self.nlevels:
1474 raise ValueError(
1475 "Length of names must match number of levels in MultiIndex."
1476 )
1478 if level is None:
1479 level = range(self.nlevels)
1480 else:
1481 level = [self._get_level_number(lev) for lev in level]
1483 # set the name
1484 for lev, name in zip(level, names):
1485 if name is not None:
1486 # GH 20527
1487 # All items in 'names' need to be hashable:
1488 if not is_hashable(name):
1489 raise TypeError(
1490 f"{type(self).__name__}.name must be a hashable type"
1491 )
1492 self._names[lev] = name
1494 # If .levels has been accessed, the names in our cache will be stale.
1495 self._reset_cache()
1497 names = property(
1498 fset=_set_names,
1499 fget=_get_names,
1500 doc="""
1501 Names of levels in MultiIndex.
1503 Examples
1504 --------
1505 >>> mi = pd.MultiIndex.from_arrays(
1506 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
1507 >>> mi
1508 MultiIndex([(1, 3, 5),
1509 (2, 4, 6)],
1510 names=['x', 'y', 'z'])
1511 >>> mi.names
1512 FrozenList(['x', 'y', 'z'])
1513 """,
1514 )
1516 # --------------------------------------------------------------------
1518 @doc(Index._get_grouper_for_level)
1519 def _get_grouper_for_level(
1520 self,
1521 mapper,
1522 *,
1523 level=None,
1524 dropna: bool = True,
1525 ) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]:
1526 if mapper is not None:
1527 indexer = self.codes[level]
1528 # Handle group mapping function and return
1529 level_values = self.levels[level].take(indexer)
1530 grouper = level_values.map(mapper)
1531 return grouper, None, None
1533 values = self.get_level_values(level)
1534 codes, uniques = algos.factorize(values, sort=True, use_na_sentinel=dropna)
1535 assert isinstance(uniques, Index)
1537 if self.levels[level]._can_hold_na:
1538 grouper = uniques.take(codes, fill_value=True)
1539 else:
1540 grouper = uniques.take(codes)
1542 return grouper, codes, uniques
1544 @cache_readonly
1545 def inferred_type(self) -> str:
1546 return "mixed"
1548 def _get_level_number(self, level) -> int:
1549 count = self.names.count(level)
1550 if (count > 1) and not is_integer(level):
1551 raise ValueError(
1552 f"The name {level} occurs multiple times, use a level number"
1553 )
1554 try:
1555 level = self.names.index(level)
1556 except ValueError as err:
1557 if not is_integer(level):
1558 raise KeyError(f"Level {level} not found") from err
1559 elif level < 0:
1560 level += self.nlevels
1561 if level < 0:
1562 orig_level = level - self.nlevels
1563 raise IndexError(
1564 f"Too many levels: Index has only {self.nlevels} levels, "
1565 f"{orig_level} is not a valid level number"
1566 ) from err
1567 # Note: levels are zero-based
1568 elif level >= self.nlevels:
1569 raise IndexError(
1570 f"Too many levels: Index has only {self.nlevels} levels, "
1571 f"not {level + 1}"
1572 ) from err
1573 return level
1575 @cache_readonly
1576 def is_monotonic_increasing(self) -> bool:
1577 """
1578 Return a boolean if the values are equal or increasing.
1579 """
1580 if any(-1 in code for code in self.codes):
1581 return False
1583 if all(level.is_monotonic_increasing for level in self.levels):
1584 # If each level is sorted, we can operate on the codes directly. GH27495
1585 return libalgos.is_lexsorted(
1586 [x.astype("int64", copy=False) for x in self.codes]
1587 )
1589 # reversed() because lexsort() wants the most significant key last.
1590 values = [
1591 self._get_level_values(i)._values for i in reversed(range(len(self.levels)))
1592 ]
1593 try:
1594 # error: Argument 1 to "lexsort" has incompatible type
1595 # "List[Union[ExtensionArray, ndarray[Any, Any]]]";
1596 # expected "Union[_SupportsArray[dtype[Any]],
1597 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,
1598 # int, float, complex, str, bytes, _NestedSequence[Union
1599 # [bool, int, float, complex, str, bytes]]]"
1600 sort_order = np.lexsort(values) # type: ignore[arg-type]
1601 return Index(sort_order).is_monotonic_increasing
1602 except TypeError:
1604 # we have mixed types and np.lexsort is not happy
1605 return Index(self._values).is_monotonic_increasing
1607 @cache_readonly
1608 def is_monotonic_decreasing(self) -> bool:
1609 """
1610 Return a boolean if the values are equal or decreasing.
1611 """
1612 # monotonic decreasing if and only if reverse is monotonic increasing
1613 return self[::-1].is_monotonic_increasing
1615 @cache_readonly
1616 def _inferred_type_levels(self) -> list[str]:
1617 """return a list of the inferred types, one for each level"""
1618 return [i.inferred_type for i in self.levels]
1620 @doc(Index.duplicated)
1621 def duplicated(self, keep="first") -> npt.NDArray[np.bool_]:
1622 shape = tuple(len(lev) for lev in self.levels)
1623 ids = get_group_index(self.codes, shape, sort=False, xnull=False)
1625 return duplicated(ids, keep)
1627 # error: Cannot override final attribute "_duplicated"
1628 # (previously declared in base class "IndexOpsMixin")
1629 _duplicated = duplicated # type: ignore[misc]
1631 def fillna(self, value=None, downcast=None):
1632 """
1633 fillna is not implemented for MultiIndex
1634 """
1635 raise NotImplementedError("isna is not defined for MultiIndex")
1637 @doc(Index.dropna)
1638 def dropna(self, how: str = "any") -> MultiIndex:
1639 nans = [level_codes == -1 for level_codes in self.codes]
1640 if how == "any":
1641 indexer = np.any(nans, axis=0)
1642 elif how == "all":
1643 indexer = np.all(nans, axis=0)
1644 else:
1645 raise ValueError(f"invalid how option: {how}")
1647 new_codes = [level_codes[~indexer] for level_codes in self.codes]
1648 return self.set_codes(codes=new_codes)
1650 def _get_level_values(self, level: int, unique: bool = False) -> Index:
1651 """
1652 Return vector of label values for requested level,
1653 equal to the length of the index
1655 **this is an internal method**
1657 Parameters
1658 ----------
1659 level : int
1660 unique : bool, default False
1661 if True, drop duplicated values
1663 Returns
1664 -------
1665 Index
1666 """
1667 lev = self.levels[level]
1668 level_codes = self.codes[level]
1669 name = self._names[level]
1670 if unique:
1671 level_codes = algos.unique(level_codes)
1672 filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value)
1673 return lev._shallow_copy(filled, name=name)
1675 def get_level_values(self, level):
1676 """
1677 Return vector of label values for requested level.
1679 Length of returned vector is equal to the length of the index.
1681 Parameters
1682 ----------
1683 level : int or str
1684 ``level`` is either the integer position of the level in the
1685 MultiIndex, or the name of the level.
1687 Returns
1688 -------
1689 values : Index
1690 Values is a level of this MultiIndex converted to
1691 a single :class:`Index` (or subclass thereof).
1693 Notes
1694 -----
1695 If the level contains missing values, the result may be casted to
1696 ``float`` with missing values specified as ``NaN``. This is because
1697 the level is converted to a regular ``Index``.
1699 Examples
1700 --------
1701 Create a MultiIndex:
1703 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))
1704 >>> mi.names = ['level_1', 'level_2']
1706 Get level values by supplying level as either integer or name:
1708 >>> mi.get_level_values(0)
1709 Index(['a', 'b', 'c'], dtype='object', name='level_1')
1710 >>> mi.get_level_values('level_2')
1711 Index(['d', 'e', 'f'], dtype='object', name='level_2')
1713 If a level contains missing values, the return type of the level
1714 maybe casted to ``float``.
1716 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes
1717 level_0 int64
1718 level_1 int64
1719 dtype: object
1720 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0)
1721 Float64Index([1.0, nan, 2.0], dtype='float64')
1722 """
1723 level = self._get_level_number(level)
1724 values = self._get_level_values(level)
1725 return values
1727 @doc(Index.unique)
1728 def unique(self, level=None):
1730 if level is None:
1731 return super().unique()
1732 else:
1733 level = self._get_level_number(level)
1734 return self._get_level_values(level=level, unique=True)
1736 def to_frame(
1737 self,
1738 index: bool = True,
1739 name=lib.no_default,
1740 allow_duplicates: bool = False,
1741 ) -> DataFrame:
1742 """
1743 Create a DataFrame with the levels of the MultiIndex as columns.
1745 Column ordering is determined by the DataFrame constructor with data as
1746 a dict.
1748 Parameters
1749 ----------
1750 index : bool, default True
1751 Set the index of the returned DataFrame as the original MultiIndex.
1753 name : list / sequence of str, optional
1754 The passed names should substitute index level names.
1756 allow_duplicates : bool, optional default False
1757 Allow duplicate column labels to be created.
1759 .. versionadded:: 1.5.0
1761 Returns
1762 -------
1763 DataFrame : a DataFrame containing the original MultiIndex data.
1765 See Also
1766 --------
1767 DataFrame : Two-dimensional, size-mutable, potentially heterogeneous
1768 tabular data.
1770 Examples
1771 --------
1772 >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']])
1773 >>> mi
1774 MultiIndex([('a', 'c'),
1775 ('b', 'd')],
1776 )
1778 >>> df = mi.to_frame()
1779 >>> df
1780 0 1
1781 a c a c
1782 b d b d
1784 >>> df = mi.to_frame(index=False)
1785 >>> df
1786 0 1
1787 0 a c
1788 1 b d
1790 >>> df = mi.to_frame(name=['x', 'y'])
1791 >>> df
1792 x y
1793 a c a c
1794 b d b d
1795 """
1796 from pandas import DataFrame
1798 if name is None:
1799 warnings.warn(
1800 "Explicitly passing `name=None` currently preserves the Index's name "
1801 "or uses a default name of 0. This behaviour is deprecated, and in "
1802 "the future `None` will be used as the name of the resulting "
1803 "DataFrame column.",
1804 FutureWarning,
1805 stacklevel=find_stack_level(),
1806 )
1807 name = lib.no_default
1809 if name is not lib.no_default:
1810 if not is_list_like(name):
1811 raise TypeError("'name' must be a list / sequence of column names.")
1813 if len(name) != len(self.levels):
1814 raise ValueError(
1815 "'name' should have same length as number of levels on index."
1816 )
1817 idx_names = name
1818 else:
1819 idx_names = self._get_level_names()
1821 if not allow_duplicates and len(set(idx_names)) != len(idx_names):
1822 raise ValueError(
1823 "Cannot create duplicate column labels if allow_duplicates is False"
1824 )
1826 # Guarantee resulting column order - PY36+ dict maintains insertion order
1827 result = DataFrame(
1828 {level: self._get_level_values(level) for level in range(len(self.levels))},
1829 copy=False,
1830 )
1831 result.columns = idx_names
1833 if index:
1834 result.index = self
1835 return result
1837 # error: Return type "Index" of "to_flat_index" incompatible with return type
1838 # "MultiIndex" in supertype "Index"
1839 def to_flat_index(self) -> Index: # type: ignore[override]
1840 """
1841 Convert a MultiIndex to an Index of Tuples containing the level values.
1843 Returns
1844 -------
1845 pd.Index
1846 Index with the MultiIndex data represented in Tuples.
1848 See Also
1849 --------
1850 MultiIndex.from_tuples : Convert flat index back to MultiIndex.
1852 Notes
1853 -----
1854 This method will simply return the caller if called by anything other
1855 than a MultiIndex.
1857 Examples
1858 --------
1859 >>> index = pd.MultiIndex.from_product(
1860 ... [['foo', 'bar'], ['baz', 'qux']],
1861 ... names=['a', 'b'])
1862 >>> index.to_flat_index()
1863 Index([('foo', 'baz'), ('foo', 'qux'),
1864 ('bar', 'baz'), ('bar', 'qux')],
1865 dtype='object')
1866 """
1867 return Index(self._values, tupleize_cols=False)
1869 def is_lexsorted(self) -> bool:
1870 warnings.warn(
1871 "MultiIndex.is_lexsorted is deprecated as a public function, "
1872 "users should use MultiIndex.is_monotonic_increasing instead.",
1873 FutureWarning,
1874 stacklevel=find_stack_level(),
1875 )
1876 return self._is_lexsorted()
1878 def _is_lexsorted(self) -> bool:
1879 """
1880 Return True if the codes are lexicographically sorted.
1882 Returns
1883 -------
1884 bool
1886 Examples
1887 --------
1888 In the below examples, the first level of the MultiIndex is sorted because
1889 a<b<c, so there is no need to look at the next level.
1891 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'e', 'f']]).is_lexsorted()
1892 True
1893 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'f', 'e']]).is_lexsorted()
1894 True
1896 In case there is a tie, the lexicographical sorting looks
1897 at the next level of the MultiIndex.
1899 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']]).is_lexsorted()
1900 True
1901 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']]).is_lexsorted()
1902 False
1903 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],
1904 ... ['aa', 'bb', 'aa', 'bb']]).is_lexsorted()
1905 True
1906 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],
1907 ... ['bb', 'aa', 'aa', 'bb']]).is_lexsorted()
1908 False
1909 """
1910 return self._lexsort_depth == self.nlevels
1912 @property
1913 def lexsort_depth(self) -> int:
1914 warnings.warn(
1915 "MultiIndex.lexsort_depth is deprecated as a public function, "
1916 "users should use MultiIndex.is_monotonic_increasing instead.",
1917 FutureWarning,
1918 stacklevel=find_stack_level(),
1919 )
1920 return self._lexsort_depth
1922 @cache_readonly
1923 def _lexsort_depth(self) -> int:
1924 """
1925 Compute and return the lexsort_depth, the number of levels of the
1926 MultiIndex that are sorted lexically
1928 Returns
1929 -------
1930 int
1931 """
1932 if self.sortorder is not None:
1933 return self.sortorder
1934 return _lexsort_depth(self.codes, self.nlevels)
1936 def _sort_levels_monotonic(self) -> MultiIndex:
1937 """
1938 This is an *internal* function.
1940 Create a new MultiIndex from the current to monotonically sorted
1941 items IN the levels. This does not actually make the entire MultiIndex
1942 monotonic, JUST the levels.
1944 The resulting MultiIndex will have the same outward
1945 appearance, meaning the same .values and ordering. It will also
1946 be .equals() to the original.
1948 Returns
1949 -------
1950 MultiIndex
1952 Examples
1953 --------
1954 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
1955 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
1956 >>> mi
1957 MultiIndex([('a', 'bb'),
1958 ('a', 'aa'),
1959 ('b', 'bb'),
1960 ('b', 'aa')],
1961 )
1963 >>> mi.sort_values()
1964 MultiIndex([('a', 'aa'),
1965 ('a', 'bb'),
1966 ('b', 'aa'),
1967 ('b', 'bb')],
1968 )
1969 """
1970 if self._is_lexsorted() and self.is_monotonic_increasing:
1971 return self
1973 new_levels = []
1974 new_codes = []
1976 for lev, level_codes in zip(self.levels, self.codes):
1978 if not lev.is_monotonic_increasing:
1979 try:
1980 # indexer to reorder the levels
1981 indexer = lev.argsort()
1982 except TypeError:
1983 pass
1984 else:
1985 lev = lev.take(indexer)
1987 # indexer to reorder the level codes
1988 indexer = ensure_platform_int(indexer)
1989 ri = lib.get_reverse_indexer(indexer, len(indexer))
1990 level_codes = algos.take_nd(ri, level_codes)
1992 new_levels.append(lev)
1993 new_codes.append(level_codes)
1995 return MultiIndex(
1996 new_levels,
1997 new_codes,
1998 names=self.names,
1999 sortorder=self.sortorder,
2000 verify_integrity=False,
2001 )
2003 def remove_unused_levels(self) -> MultiIndex:
2004 """
2005 Create new MultiIndex from current that removes unused levels.
2007 Unused level(s) means levels that are not expressed in the
2008 labels. The resulting MultiIndex will have the same outward
2009 appearance, meaning the same .values and ordering. It will
2010 also be .equals() to the original.
2012 Returns
2013 -------
2014 MultiIndex
2016 Examples
2017 --------
2018 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')])
2019 >>> mi
2020 MultiIndex([(0, 'a'),
2021 (0, 'b'),
2022 (1, 'a'),
2023 (1, 'b')],
2024 )
2026 >>> mi[2:]
2027 MultiIndex([(1, 'a'),
2028 (1, 'b')],
2029 )
2031 The 0 from the first level is not represented
2032 and can be removed
2034 >>> mi2 = mi[2:].remove_unused_levels()
2035 >>> mi2.levels
2036 FrozenList([[1], ['a', 'b']])
2037 """
2038 new_levels = []
2039 new_codes = []
2041 changed = False
2042 for lev, level_codes in zip(self.levels, self.codes):
2044 # Since few levels are typically unused, bincount() is more
2045 # efficient than unique() - however it only accepts positive values
2046 # (and drops order):
2047 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1
2048 has_na = int(len(uniques) and (uniques[0] == -1))
2050 if len(uniques) != len(lev) + has_na:
2052 if lev.isna().any() and len(uniques) == len(lev):
2053 break
2054 # We have unused levels
2055 changed = True
2057 # Recalculate uniques, now preserving order.
2058 # Can easily be cythonized by exploiting the already existing
2059 # "uniques" and stop parsing "level_codes" when all items
2060 # are found:
2061 uniques = algos.unique(level_codes)
2062 if has_na:
2063 na_idx = np.where(uniques == -1)[0]
2064 # Just ensure that -1 is in first position:
2065 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
2067 # codes get mapped from uniques to 0:len(uniques)
2068 # -1 (if present) is mapped to last position
2069 code_mapping = np.zeros(len(lev) + has_na)
2070 # ... and reassigned value -1:
2071 code_mapping[uniques] = np.arange(len(uniques)) - has_na
2073 level_codes = code_mapping[level_codes]
2075 # new levels are simple
2076 lev = lev.take(uniques[has_na:])
2078 new_levels.append(lev)
2079 new_codes.append(level_codes)
2081 result = self.view()
2083 if changed:
2084 result._reset_identity()
2085 result._set_levels(new_levels, validate=False)
2086 result._set_codes(new_codes, validate=False)
2088 return result
2090 # --------------------------------------------------------------------
2091 # Pickling Methods
2093 def __reduce__(self):
2094 """Necessary for making this object picklable"""
2095 d = {
2096 "levels": list(self.levels),
2097 "codes": list(self.codes),
2098 "sortorder": self.sortorder,
2099 "names": list(self.names),
2100 }
2101 return ibase._new_Index, (type(self), d), None
2103 # --------------------------------------------------------------------
2105 def __getitem__(self, key):
2106 if is_scalar(key):
2107 key = com.cast_scalar_indexer(key, warn_float=True)
2109 retval = []
2110 for lev, level_codes in zip(self.levels, self.codes):
2111 if level_codes[key] == -1:
2112 retval.append(np.nan)
2113 else:
2114 retval.append(lev[level_codes[key]])
2116 return tuple(retval)
2117 else:
2118 # in general cannot be sure whether the result will be sorted
2119 sortorder = None
2120 if com.is_bool_indexer(key):
2121 key = np.asarray(key, dtype=bool)
2122 sortorder = self.sortorder
2123 elif isinstance(key, slice):
2124 if key.step is None or key.step > 0:
2125 sortorder = self.sortorder
2126 elif isinstance(key, Index):
2127 key = np.asarray(key)
2129 new_codes = [level_codes[key] for level_codes in self.codes]
2131 return MultiIndex(
2132 levels=self.levels,
2133 codes=new_codes,
2134 names=self.names,
2135 sortorder=sortorder,
2136 verify_integrity=False,
2137 )
2139 def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex:
2140 """
2141 Fastpath for __getitem__ when we know we have a slice.
2142 """
2143 sortorder = None
2144 if slobj.step is None or slobj.step > 0:
2145 sortorder = self.sortorder
2147 new_codes = [level_codes[slobj] for level_codes in self.codes]
2149 return type(self)(
2150 levels=self.levels,
2151 codes=new_codes,
2152 names=self._names,
2153 sortorder=sortorder,
2154 verify_integrity=False,
2155 )
2157 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
2158 def take(
2159 self: MultiIndex,
2160 indices,
2161 axis: int = 0,
2162 allow_fill: bool = True,
2163 fill_value=None,
2164 **kwargs,
2165 ) -> MultiIndex:
2166 nv.validate_take((), kwargs)
2167 indices = ensure_platform_int(indices)
2169 # only fill if we are passing a non-None fill_value
2170 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)
2172 na_value = -1
2174 taken = [lab.take(indices) for lab in self.codes]
2175 if allow_fill:
2176 mask = indices == -1
2177 if mask.any():
2178 masked = []
2179 for new_label in taken:
2180 label_values = new_label
2181 label_values[mask] = na_value
2182 masked.append(np.asarray(label_values))
2183 taken = masked
2185 return MultiIndex(
2186 levels=self.levels, codes=taken, names=self.names, verify_integrity=False
2187 )
2189 def append(self, other):
2190 """
2191 Append a collection of Index options together
2193 Parameters
2194 ----------
2195 other : Index or list/tuple of indices
2197 Returns
2198 -------
2199 appended : Index
2200 """
2201 if not isinstance(other, (list, tuple)):
2202 other = [other]
2204 if all(
2205 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other
2206 ):
2207 arrays = []
2208 for i in range(self.nlevels):
2209 label = self._get_level_values(i)
2210 appended = [o._get_level_values(i) for o in other]
2211 arrays.append(label.append(appended))
2212 return MultiIndex.from_arrays(arrays, names=self.names)
2214 to_concat = (self._values,) + tuple(k._values for k in other)
2215 new_tuples = np.concatenate(to_concat)
2217 # if all(isinstance(x, MultiIndex) for x in other):
2218 try:
2219 return MultiIndex.from_tuples(new_tuples, names=self.names)
2220 except (TypeError, IndexError):
2221 return Index._with_infer(new_tuples)
2223 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
2224 return self._values.argsort(*args, **kwargs)
2226 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
2227 def repeat(self, repeats: int, axis=None) -> MultiIndex:
2228 nv.validate_repeat((), {"axis": axis})
2229 # error: Incompatible types in assignment (expression has type "ndarray",
2230 # variable has type "int")
2231 repeats = ensure_platform_int(repeats) # type: ignore[assignment]
2232 return MultiIndex(
2233 levels=self.levels,
2234 codes=[
2235 level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats)
2236 for level_codes in self.codes
2237 ],
2238 names=self.names,
2239 sortorder=self.sortorder,
2240 verify_integrity=False,
2241 )
2243 def drop(self, codes, level=None, errors="raise"):
2244 """
2245 Make new MultiIndex with passed list of codes deleted
2247 Parameters
2248 ----------
2249 codes : array-like
2250 Must be a list of tuples when level is not specified
2251 level : int or level name, default None
2252 errors : str, default 'raise'
2254 Returns
2255 -------
2256 dropped : MultiIndex
2257 """
2258 if level is not None:
2259 return self._drop_from_level(codes, level, errors)
2261 if not isinstance(codes, (np.ndarray, Index)):
2262 try:
2263 codes = com.index_labels_to_array(codes, dtype=np.dtype("object"))
2264 except ValueError:
2265 pass
2267 inds = []
2268 for level_codes in codes:
2269 try:
2270 loc = self.get_loc(level_codes)
2271 # get_loc returns either an integer, a slice, or a boolean
2272 # mask
2273 if isinstance(loc, int):
2274 inds.append(loc)
2275 elif isinstance(loc, slice):
2276 step = loc.step if loc.step is not None else 1
2277 inds.extend(range(loc.start, loc.stop, step))
2278 elif com.is_bool_indexer(loc):
2279 if self._lexsort_depth == 0:
2280 warnings.warn(
2281 "dropping on a non-lexsorted multi-index "
2282 "without a level parameter may impact performance.",
2283 PerformanceWarning,
2284 stacklevel=find_stack_level(),
2285 )
2286 loc = loc.nonzero()[0]
2287 inds.extend(loc)
2288 else:
2289 msg = f"unsupported indexer of type {type(loc)}"
2290 raise AssertionError(msg)
2291 except KeyError:
2292 if errors != "ignore":
2293 raise
2295 return self.delete(inds)
2297 def _drop_from_level(self, codes, level, errors="raise") -> MultiIndex:
2298 codes = com.index_labels_to_array(codes)
2299 i = self._get_level_number(level)
2300 index = self.levels[i]
2301 values = index.get_indexer(codes)
2302 # If nan should be dropped it will equal -1 here. We have to check which values
2303 # are not nan and equal -1, this means they are missing in the index
2304 nan_codes = isna(codes)
2305 values[(np.equal(nan_codes, False)) & (values == -1)] = -2
2306 if index.shape[0] == self.shape[0]:
2307 values[np.equal(nan_codes, True)] = -2
2309 not_found = codes[values == -2]
2310 if len(not_found) != 0 and errors != "ignore":
2311 raise KeyError(f"labels {not_found} not found in level")
2312 mask = ~algos.isin(self.codes[i], values)
2314 return self[mask]
2316 def swaplevel(self, i=-2, j=-1) -> MultiIndex:
2317 """
2318 Swap level i with level j.
2320 Calling this method does not change the ordering of the values.
2322 Parameters
2323 ----------
2324 i : int, str, default -2
2325 First level of index to be swapped. Can pass level name as string.
2326 Type of parameters can be mixed.
2327 j : int, str, default -1
2328 Second level of index to be swapped. Can pass level name as string.
2329 Type of parameters can be mixed.
2331 Returns
2332 -------
2333 MultiIndex
2334 A new MultiIndex.
2336 See Also
2337 --------
2338 Series.swaplevel : Swap levels i and j in a MultiIndex.
2339 DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a
2340 particular axis.
2342 Examples
2343 --------
2344 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
2345 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
2346 >>> mi
2347 MultiIndex([('a', 'bb'),
2348 ('a', 'aa'),
2349 ('b', 'bb'),
2350 ('b', 'aa')],
2351 )
2352 >>> mi.swaplevel(0, 1)
2353 MultiIndex([('bb', 'a'),
2354 ('aa', 'a'),
2355 ('bb', 'b'),
2356 ('aa', 'b')],
2357 )
2358 """
2359 new_levels = list(self.levels)
2360 new_codes = list(self.codes)
2361 new_names = list(self.names)
2363 i = self._get_level_number(i)
2364 j = self._get_level_number(j)
2366 new_levels[i], new_levels[j] = new_levels[j], new_levels[i]
2367 new_codes[i], new_codes[j] = new_codes[j], new_codes[i]
2368 new_names[i], new_names[j] = new_names[j], new_names[i]
2370 return MultiIndex(
2371 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
2372 )
2374 def reorder_levels(self, order) -> MultiIndex:
2375 """
2376 Rearrange levels using input order. May not drop or duplicate levels.
2378 Parameters
2379 ----------
2380 order : list of int or list of str
2381 List representing new level order. Reference level by number
2382 (position) or by key (label).
2384 Returns
2385 -------
2386 MultiIndex
2388 Examples
2389 --------
2390 >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y'])
2391 >>> mi
2392 MultiIndex([(1, 3),
2393 (2, 4)],
2394 names=['x', 'y'])
2396 >>> mi.reorder_levels(order=[1, 0])
2397 MultiIndex([(3, 1),
2398 (4, 2)],
2399 names=['y', 'x'])
2401 >>> mi.reorder_levels(order=['y', 'x'])
2402 MultiIndex([(3, 1),
2403 (4, 2)],
2404 names=['y', 'x'])
2405 """
2406 order = [self._get_level_number(i) for i in order]
2407 if len(order) != self.nlevels:
2408 raise AssertionError(
2409 f"Length of order must be same as number of levels ({self.nlevels}), "
2410 f"got {len(order)}"
2411 )
2412 new_levels = [self.levels[i] for i in order]
2413 new_codes = [self.codes[i] for i in order]
2414 new_names = [self.names[i] for i in order]
2416 return MultiIndex(
2417 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
2418 )
2420 def _get_codes_for_sorting(self) -> list[Categorical]:
2421 """
2422 we are categorizing our codes by using the
2423 available categories (all, not just observed)
2424 excluding any missing ones (-1); this is in preparation
2425 for sorting, where we need to disambiguate that -1 is not
2426 a valid valid
2427 """
2429 def cats(level_codes):
2430 return np.arange(
2431 np.array(level_codes).max() + 1 if len(level_codes) else 0,
2432 dtype=level_codes.dtype,
2433 )
2435 return [
2436 Categorical.from_codes(level_codes, cats(level_codes), ordered=True)
2437 for level_codes in self.codes
2438 ]
2440 def sortlevel(
2441 self, level=0, ascending: bool = True, sort_remaining: bool = True
2442 ) -> tuple[MultiIndex, npt.NDArray[np.intp]]:
2443 """
2444 Sort MultiIndex at the requested level.
2446 The result will respect the original ordering of the associated
2447 factor at that level.
2449 Parameters
2450 ----------
2451 level : list-like, int or str, default 0
2452 If a string is given, must be a name of the level.
2453 If list-like must be names or ints of levels.
2454 ascending : bool, default True
2455 False to sort in descending order.
2456 Can also be a list to specify a directed ordering.
2457 sort_remaining : sort by the remaining levels after level
2459 Returns
2460 -------
2461 sorted_index : pd.MultiIndex
2462 Resulting index.
2463 indexer : np.ndarray[np.intp]
2464 Indices of output values in original index.
2466 Examples
2467 --------
2468 >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]])
2469 >>> mi
2470 MultiIndex([(0, 2),
2471 (0, 1)],
2472 )
2474 >>> mi.sortlevel()
2475 (MultiIndex([(0, 1),
2476 (0, 2)],
2477 ), array([1, 0]))
2479 >>> mi.sortlevel(sort_remaining=False)
2480 (MultiIndex([(0, 2),
2481 (0, 1)],
2482 ), array([0, 1]))
2484 >>> mi.sortlevel(1)
2485 (MultiIndex([(0, 1),
2486 (0, 2)],
2487 ), array([1, 0]))
2489 >>> mi.sortlevel(1, ascending=False)
2490 (MultiIndex([(0, 2),
2491 (0, 1)],
2492 ), array([0, 1]))
2493 """
2494 if isinstance(level, (str, int)):
2495 level = [level]
2496 level = [self._get_level_number(lev) for lev in level]
2497 sortorder = None
2499 # we have a directed ordering via ascending
2500 if isinstance(ascending, list):
2501 if not len(level) == len(ascending):
2502 raise ValueError("level must have same length as ascending")
2504 indexer = lexsort_indexer(
2505 [self.codes[lev] for lev in level], orders=ascending
2506 )
2508 # level ordering
2509 else:
2511 codes = list(self.codes)
2512 shape = list(self.levshape)
2514 # partition codes and shape
2515 primary = tuple(codes[lev] for lev in level)
2516 primshp = tuple(shape[lev] for lev in level)
2518 # Reverse sorted to retain the order of
2519 # smaller indices that needs to be removed
2520 for lev in sorted(level, reverse=True):
2521 codes.pop(lev)
2522 shape.pop(lev)
2524 if sort_remaining:
2525 primary += primary + tuple(codes)
2526 primshp += primshp + tuple(shape)
2527 else:
2528 sortorder = level[0]
2530 indexer = indexer_from_factorized(primary, primshp, compress=False)
2532 if not ascending:
2533 indexer = indexer[::-1]
2535 indexer = ensure_platform_int(indexer)
2536 new_codes = [level_codes.take(indexer) for level_codes in self.codes]
2538 new_index = MultiIndex(
2539 codes=new_codes,
2540 levels=self.levels,
2541 names=self.names,
2542 sortorder=sortorder,
2543 verify_integrity=False,
2544 )
2546 return new_index, indexer
2548 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
2549 if not isinstance(target, MultiIndex):
2550 if indexer is None:
2551 target = self
2552 elif (indexer >= 0).all():
2553 target = self.take(indexer)
2554 else:
2555 try:
2556 target = MultiIndex.from_tuples(target)
2557 except TypeError:
2558 # not all tuples, see test_constructor_dict_multiindex_reindex_flat
2559 return target
2561 target = self._maybe_preserve_names(target, preserve_names)
2562 return target
2564 def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index:
2565 if (
2566 preserve_names
2567 and target.nlevels == self.nlevels
2568 and target.names != self.names
2569 ):
2570 target = target.copy(deep=False)
2571 target.names = self.names
2572 return target
2574 # --------------------------------------------------------------------
2575 # Indexing Methods
2577 def _check_indexing_error(self, key) -> None:
2578 if not is_hashable(key) or is_iterator(key):
2579 # We allow tuples if they are hashable, whereas other Index
2580 # subclasses require scalar.
2581 # We have to explicitly exclude generators, as these are hashable.
2582 raise InvalidIndexError(key)
2584 @cache_readonly
2585 def _should_fallback_to_positional(self) -> bool:
2586 """
2587 Should integer key(s) be treated as positional?
2588 """
2589 # GH#33355
2590 return self.levels[0]._should_fallback_to_positional
2592 def _get_values_for_loc(self, series: Series, loc, key):
2593 """
2594 Do a positional lookup on the given Series, returning either a scalar
2595 or a Series.
2597 Assumes that `series.index is self`
2598 """
2599 new_values = series._values[loc]
2600 if is_scalar(loc):
2601 return new_values
2603 if len(new_values) == 1 and not self.nlevels > 1:
2604 # If more than one level left, we can not return a scalar
2605 return new_values[0]
2607 new_index = self[loc]
2608 new_index = maybe_droplevels(new_index, key)
2609 new_ser = series._constructor(new_values, index=new_index, name=series.name)
2610 return new_ser.__finalize__(series)
2612 def _get_indexer_strict(
2613 self, key, axis_name: str
2614 ) -> tuple[Index, npt.NDArray[np.intp]]:
2616 keyarr = key
2617 if not isinstance(keyarr, Index):
2618 keyarr = com.asarray_tuplesafe(keyarr)
2620 if len(keyarr) and not isinstance(keyarr[0], tuple):
2621 indexer = self._get_indexer_level_0(keyarr)
2623 self._raise_if_missing(key, indexer, axis_name)
2624 return self[indexer], indexer
2626 return super()._get_indexer_strict(key, axis_name)
2628 def _raise_if_missing(self, key, indexer, axis_name: str) -> None:
2629 keyarr = key
2630 if not isinstance(key, Index):
2631 keyarr = com.asarray_tuplesafe(key)
2633 if len(keyarr) and not isinstance(keyarr[0], tuple):
2634 # i.e. same condition for special case in MultiIndex._get_indexer_strict
2636 mask = indexer == -1
2637 if mask.any():
2638 check = self.levels[0].get_indexer(keyarr)
2639 cmask = check == -1
2640 if cmask.any():
2641 raise KeyError(f"{keyarr[cmask]} not in index")
2642 # We get here when levels still contain values which are not
2643 # actually in Index anymore
2644 raise KeyError(f"{keyarr} not in index")
2645 else:
2646 return super()._raise_if_missing(key, indexer, axis_name)
2648 def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]:
2649 """
2650 Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`.
2651 """
2652 lev = self.levels[0]
2653 codes = self._codes[0]
2654 cat = Categorical.from_codes(codes=codes, categories=lev)
2655 ci = Index(cat)
2656 return ci.get_indexer_for(target)
2658 def get_slice_bound(
2659 self,
2660 label: Hashable | Sequence[Hashable],
2661 side: Literal["left", "right"],
2662 kind=lib.no_default,
2663 ) -> int:
2664 """
2665 For an ordered MultiIndex, compute slice bound
2666 that corresponds to given label.
2668 Returns leftmost (one-past-the-rightmost if `side=='right') position
2669 of given label.
2671 Parameters
2672 ----------
2673 label : object or tuple of objects
2674 side : {'left', 'right'}
2675 kind : {'loc', 'getitem', None}
2677 .. deprecated:: 1.4.0
2679 Returns
2680 -------
2681 int
2682 Index of label.
2684 Notes
2685 -----
2686 This method only works if level 0 index of the MultiIndex is lexsorted.
2688 Examples
2689 --------
2690 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')])
2692 Get the locations from the leftmost 'b' in the first level
2693 until the end of the multiindex:
2695 >>> mi.get_slice_bound('b', side="left")
2696 1
2698 Like above, but if you get the locations from the rightmost
2699 'b' in the first level and 'f' in the second level:
2701 >>> mi.get_slice_bound(('b','f'), side="right")
2702 3
2704 See Also
2705 --------
2706 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2707 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2708 sequence of such.
2709 """
2710 self._deprecated_arg(kind, "kind", "get_slice_bound")
2712 if not isinstance(label, tuple):
2713 label = (label,)
2714 return self._partial_tup_index(label, side=side)
2716 def slice_locs(
2717 self, start=None, end=None, step=None, kind=lib.no_default
2718 ) -> tuple[int, int]:
2719 """
2720 For an ordered MultiIndex, compute the slice locations for input
2721 labels.
2723 The input labels can be tuples representing partial levels, e.g. for a
2724 MultiIndex with 3 levels, you can pass a single value (corresponding to
2725 the first level), or a 1-, 2-, or 3-tuple.
2727 Parameters
2728 ----------
2729 start : label or tuple, default None
2730 If None, defaults to the beginning
2731 end : label or tuple
2732 If None, defaults to the end
2733 step : int or None
2734 Slice step
2735 kind : string, optional, defaults None
2737 .. deprecated:: 1.4.0
2739 Returns
2740 -------
2741 (start, end) : (int, int)
2743 Notes
2744 -----
2745 This method only works if the MultiIndex is properly lexsorted. So,
2746 if only the first 2 levels of a 3-level MultiIndex are lexsorted,
2747 you can only pass two levels to ``.slice_locs``.
2749 Examples
2750 --------
2751 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')],
2752 ... names=['A', 'B'])
2754 Get the slice locations from the beginning of 'b' in the first level
2755 until the end of the multiindex:
2757 >>> mi.slice_locs(start='b')
2758 (1, 4)
2760 Like above, but stop at the end of 'b' in the first level and 'f' in
2761 the second level:
2763 >>> mi.slice_locs(start='b', end=('b', 'f'))
2764 (1, 3)
2766 See Also
2767 --------
2768 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2769 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2770 sequence of such.
2771 """
2772 self._deprecated_arg(kind, "kind", "slice_locs")
2773 # This function adds nothing to its parent implementation (the magic
2774 # happens in get_slice_bound method), but it adds meaningful doc.
2775 return super().slice_locs(start, end, step)
2777 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"):
2778 if len(tup) > self._lexsort_depth:
2779 raise UnsortedIndexError(
2780 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth "
2781 f"({self._lexsort_depth})"
2782 )
2784 n = len(tup)
2785 start, end = 0, len(self)
2786 zipped = zip(tup, self.levels, self.codes)
2787 for k, (lab, lev, level_codes) in enumerate(zipped):
2788 section = level_codes[start:end]
2790 if lab not in lev and not isna(lab):
2791 # short circuit
2792 try:
2793 loc = algos.searchsorted(lev, lab, side=side)
2794 except TypeError as err:
2795 # non-comparable e.g. test_slice_locs_with_type_mismatch
2796 raise TypeError(f"Level type mismatch: {lab}") from err
2797 if not is_integer(loc):
2798 # non-comparable level, e.g. test_groupby_example
2799 raise TypeError(f"Level type mismatch: {lab}")
2800 if side == "right" and loc >= 0:
2801 loc -= 1
2802 return start + algos.searchsorted(section, loc, side=side)
2804 idx = self._get_loc_single_level_index(lev, lab)
2805 if isinstance(idx, slice) and k < n - 1:
2806 # Get start and end value from slice, necessary when a non-integer
2807 # interval is given as input GH#37707
2808 start = idx.start
2809 end = idx.stop
2810 elif k < n - 1:
2811 # error: Incompatible types in assignment (expression has type
2812 # "Union[ndarray[Any, dtype[signedinteger[Any]]]
2813 end = start + algos.searchsorted( # type: ignore[assignment]
2814 section, idx, side="right"
2815 )
2816 # error: Incompatible types in assignment (expression has type
2817 # "Union[ndarray[Any, dtype[signedinteger[Any]]]
2818 start = start + algos.searchsorted( # type: ignore[assignment]
2819 section, idx, side="left"
2820 )
2821 elif isinstance(idx, slice):
2822 idx = idx.start
2823 return start + algos.searchsorted(section, idx, side=side)
2824 else:
2825 return start + algos.searchsorted(section, idx, side=side)
2827 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int:
2828 """
2829 If key is NA value, location of index unify as -1.
2831 Parameters
2832 ----------
2833 level_index: Index
2834 key : label
2836 Returns
2837 -------
2838 loc : int
2839 If key is NA value, loc is -1
2840 Else, location of key in index.
2842 See Also
2843 --------
2844 Index.get_loc : The get_loc method for (single-level) index.
2845 """
2846 if is_scalar(key) and isna(key):
2847 return -1
2848 else:
2849 return level_index.get_loc(key)
2851 def get_loc(self, key, method=None):
2852 """
2853 Get location for a label or a tuple of labels.
2855 The location is returned as an integer/slice or boolean
2856 mask.
2858 Parameters
2859 ----------
2860 key : label or tuple of labels (one for each level)
2861 method : None
2863 Returns
2864 -------
2865 loc : int, slice object or boolean mask
2866 If the key is past the lexsort depth, the return may be a
2867 boolean mask array, otherwise it is always a slice or int.
2869 See Also
2870 --------
2871 Index.get_loc : The get_loc method for (single-level) index.
2872 MultiIndex.slice_locs : Get slice location given start label(s) and
2873 end label(s).
2874 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2875 sequence of such.
2877 Notes
2878 -----
2879 The key cannot be a slice, list of same-level labels, a boolean mask,
2880 or a sequence of such. If you want to use those, use
2881 :meth:`MultiIndex.get_locs` instead.
2883 Examples
2884 --------
2885 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
2887 >>> mi.get_loc('b')
2888 slice(1, 3, None)
2890 >>> mi.get_loc(('b', 'e'))
2891 1
2892 """
2893 if method is not None:
2894 raise NotImplementedError(
2895 "only the default get_loc method is "
2896 "currently supported for MultiIndex"
2897 )
2899 self._check_indexing_error(key)
2901 def _maybe_to_slice(loc):
2902 """convert integer indexer to boolean mask or slice if possible"""
2903 if not isinstance(loc, np.ndarray) or loc.dtype != np.intp:
2904 return loc
2906 loc = lib.maybe_indices_to_slice(loc, len(self))
2907 if isinstance(loc, slice):
2908 return loc
2910 mask = np.empty(len(self), dtype="bool")
2911 mask.fill(False)
2912 mask[loc] = True
2913 return mask
2915 if not isinstance(key, tuple):
2916 loc = self._get_level_indexer(key, level=0)
2917 return _maybe_to_slice(loc)
2919 keylen = len(key)
2920 if self.nlevels < keylen:
2921 raise KeyError(
2922 f"Key length ({keylen}) exceeds index depth ({self.nlevels})"
2923 )
2925 if keylen == self.nlevels and self.is_unique:
2926 try:
2927 return self._engine.get_loc(key)
2928 except TypeError:
2929 # e.g. test_partial_slicing_with_multiindex partial string slicing
2930 loc, _ = self.get_loc_level(key, list(range(self.nlevels)))
2931 return loc
2933 # -- partial selection or non-unique index
2934 # break the key into 2 parts based on the lexsort_depth of the index;
2935 # the first part returns a continuous slice of the index; the 2nd part
2936 # needs linear search within the slice
2937 i = self._lexsort_depth
2938 lead_key, follow_key = key[:i], key[i:]
2940 if not lead_key:
2941 start = 0
2942 stop = len(self)
2943 else:
2944 try:
2945 start, stop = self.slice_locs(lead_key, lead_key)
2946 except TypeError as err:
2947 # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col")
2948 # when self has 5 integer levels
2949 raise KeyError(key) from err
2951 if start == stop:
2952 raise KeyError(key)
2954 if not follow_key:
2955 return slice(start, stop)
2957 warnings.warn(
2958 "indexing past lexsort depth may impact performance.",
2959 PerformanceWarning,
2960 stacklevel=find_stack_level(),
2961 )
2963 loc = np.arange(start, stop, dtype=np.intp)
2965 for i, k in enumerate(follow_key, len(lead_key)):
2966 mask = self.codes[i][loc] == self._get_loc_single_level_index(
2967 self.levels[i], k
2968 )
2969 if not mask.all():
2970 loc = loc[mask]
2971 if not len(loc):
2972 raise KeyError(key)
2974 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop)
2976 def get_loc_level(self, key, level=0, drop_level: bool = True):
2977 """
2978 Get location and sliced index for requested label(s)/level(s).
2980 Parameters
2981 ----------
2982 key : label or sequence of labels
2983 level : int/level name or list thereof, optional
2984 drop_level : bool, default True
2985 If ``False``, the resulting index will not drop any level.
2987 Returns
2988 -------
2989 loc : A 2-tuple where the elements are:
2990 Element 0: int, slice object or boolean array
2991 Element 1: The resulting sliced multiindex/index. If the key
2992 contains all levels, this will be ``None``.
2994 See Also
2995 --------
2996 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2997 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2998 sequence of such.
3000 Examples
3001 --------
3002 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],
3003 ... names=['A', 'B'])
3005 >>> mi.get_loc_level('b')
3006 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))
3008 >>> mi.get_loc_level('e', level='B')
3009 (array([False, True, False]), Index(['b'], dtype='object', name='A'))
3011 >>> mi.get_loc_level(['b', 'e'])
3012 (1, None)
3013 """
3014 if not isinstance(level, (list, tuple)):
3015 level = self._get_level_number(level)
3016 else:
3017 level = [self._get_level_number(lev) for lev in level]
3019 loc, mi = self._get_loc_level(key, level=level)
3020 if not drop_level:
3021 if lib.is_integer(loc):
3022 mi = self[loc : loc + 1]
3023 else:
3024 mi = self[loc]
3025 return loc, mi
3027 def _get_loc_level(self, key, level: int | list[int] = 0):
3028 """
3029 get_loc_level but with `level` known to be positional, not name-based.
3030 """
3032 # different name to distinguish from maybe_droplevels
3033 def maybe_mi_droplevels(indexer, levels):
3034 """
3035 If level does not exist or all levels were dropped, the exception
3036 has to be handled outside.
3037 """
3038 new_index = self[indexer]
3040 for i in sorted(levels, reverse=True):
3041 new_index = new_index._drop_level_numbers([i])
3043 return new_index
3045 if isinstance(level, (tuple, list)):
3046 if len(key) != len(level):
3047 raise AssertionError(
3048 "Key for location must have same length as number of levels"
3049 )
3050 result = None
3051 for lev, k in zip(level, key):
3052 loc, new_index = self._get_loc_level(k, level=lev)
3053 if isinstance(loc, slice):
3054 mask = np.zeros(len(self), dtype=bool)
3055 mask[loc] = True
3056 loc = mask
3057 result = loc if result is None else result & loc
3059 try:
3060 # FIXME: we should be only dropping levels on which we are
3061 # scalar-indexing
3062 mi = maybe_mi_droplevels(result, level)
3063 except ValueError:
3064 # droplevel failed because we tried to drop all levels,
3065 # i.e. len(level) == self.nlevels
3066 mi = self[result]
3068 return result, mi
3070 # kludge for #1796
3071 if isinstance(key, list):
3072 key = tuple(key)
3074 if isinstance(key, tuple) and level == 0:
3076 try:
3077 # Check if this tuple is a single key in our first level
3078 if key in self.levels[0]:
3079 indexer = self._get_level_indexer(key, level=level)
3080 new_index = maybe_mi_droplevels(indexer, [0])
3081 return indexer, new_index
3082 except (TypeError, InvalidIndexError):
3083 pass
3085 if not any(isinstance(k, slice) for k in key):
3087 if len(key) == self.nlevels and self.is_unique:
3088 # Complete key in unique index -> standard get_loc
3089 try:
3090 return (self._engine.get_loc(key), None)
3091 except KeyError as err:
3092 raise KeyError(key) from err
3093 except TypeError:
3094 # e.g. partial string indexing
3095 # test_partial_string_timestamp_multiindex
3096 pass
3098 # partial selection
3099 indexer = self.get_loc(key)
3100 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]
3101 if len(ilevels) == self.nlevels:
3102 if is_integer(indexer):
3103 # we are dropping all levels
3104 return indexer, None
3106 # TODO: in some cases we still need to drop some levels,
3107 # e.g. test_multiindex_perf_warn
3108 # test_partial_string_timestamp_multiindex
3109 ilevels = [
3110 i
3111 for i in range(len(key))
3112 if (
3113 not isinstance(key[i], str)
3114 or not self.levels[i]._supports_partial_string_indexing
3115 )
3116 and key[i] != slice(None, None)
3117 ]
3118 if len(ilevels) == self.nlevels:
3119 # TODO: why?
3120 ilevels = []
3121 return indexer, maybe_mi_droplevels(indexer, ilevels)
3123 else:
3124 indexer = None
3125 for i, k in enumerate(key):
3126 if not isinstance(k, slice):
3127 loc_level = self._get_level_indexer(k, level=i)
3128 if isinstance(loc_level, slice):
3129 if com.is_null_slice(loc_level) or com.is_full_slice(
3130 loc_level, len(self)
3131 ):
3132 # everything
3133 continue
3134 else:
3135 # e.g. test_xs_IndexSlice_argument_not_implemented
3136 k_index = np.zeros(len(self), dtype=bool)
3137 k_index[loc_level] = True
3139 else:
3140 k_index = loc_level
3142 elif com.is_null_slice(k):
3143 # taking everything, does not affect `indexer` below
3144 continue
3146 else:
3147 # FIXME: this message can be inaccurate, e.g.
3148 # test_series_varied_multiindex_alignment
3149 raise TypeError(f"Expected label or tuple of labels, got {key}")
3151 if indexer is None:
3152 indexer = k_index
3153 else:
3154 indexer &= k_index
3155 if indexer is None:
3156 indexer = slice(None, None)
3157 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]
3158 return indexer, maybe_mi_droplevels(indexer, ilevels)
3159 else:
3160 indexer = self._get_level_indexer(key, level=level)
3161 if (
3162 isinstance(key, str)
3163 and self.levels[level]._supports_partial_string_indexing
3164 ):
3165 # check to see if we did an exact lookup vs sliced
3166 check = self.levels[level].get_loc(key)
3167 if not is_integer(check):
3168 # e.g. test_partial_string_timestamp_multiindex
3169 return indexer, self[indexer]
3171 try:
3172 result_index = maybe_mi_droplevels(indexer, [level])
3173 except ValueError:
3174 result_index = self[indexer]
3176 return indexer, result_index
3178 def _get_level_indexer(
3179 self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None
3180 ):
3181 # `level` kwarg is _always_ positional, never name
3182 # return a boolean array or slice showing where the key is
3183 # in the totality of values
3184 # if the indexer is provided, then use this
3186 level_index = self.levels[level]
3187 level_codes = self.codes[level]
3189 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
3190 # Compute a bool indexer to identify the positions to take.
3191 # If we have an existing indexer, we only need to examine the
3192 # subset of positions where the existing indexer is True.
3193 if indexer is not None:
3194 # we only need to look at the subset of codes where the
3195 # existing indexer equals True
3196 codes = codes[indexer]
3198 if step is None or step == 1:
3199 new_indexer = (codes >= start) & (codes < stop)
3200 else:
3201 r = np.arange(start, stop, step, dtype=codes.dtype)
3202 new_indexer = algos.isin(codes, r)
3204 if indexer is None:
3205 return new_indexer
3207 indexer = indexer.copy()
3208 indexer[indexer] = new_indexer
3209 return indexer
3211 if isinstance(key, slice):
3212 # handle a slice, returning a slice if we can
3213 # otherwise a boolean indexer
3214 step = key.step
3215 is_negative_step = step is not None and step < 0
3217 try:
3218 if key.start is not None:
3219 start = level_index.get_loc(key.start)
3220 elif is_negative_step:
3221 start = len(level_index) - 1
3222 else:
3223 start = 0
3225 if key.stop is not None:
3226 stop = level_index.get_loc(key.stop)
3227 elif is_negative_step:
3228 stop = 0
3229 elif isinstance(start, slice):
3230 stop = len(level_index)
3231 else:
3232 stop = len(level_index) - 1
3233 except KeyError:
3235 # we have a partial slice (like looking up a partial date
3236 # string)
3237 start = stop = level_index.slice_indexer(key.start, key.stop, key.step)
3238 step = start.step
3240 if isinstance(start, slice) or isinstance(stop, slice):
3241 # we have a slice for start and/or stop
3242 # a partial date slicer on a DatetimeIndex generates a slice
3243 # note that the stop ALREADY includes the stopped point (if
3244 # it was a string sliced)
3245 start = getattr(start, "start", start)
3246 stop = getattr(stop, "stop", stop)
3247 return convert_indexer(start, stop, step)
3249 elif level > 0 or self._lexsort_depth == 0 or step is not None:
3250 # need to have like semantics here to right
3251 # searching as when we are using a slice
3252 # so adjust the stop by 1 (so we include stop)
3253 stop = (stop - 1) if is_negative_step else (stop + 1)
3254 return convert_indexer(start, stop, step)
3255 else:
3256 # sorted, so can return slice object -> view
3257 i = algos.searchsorted(level_codes, start, side="left")
3258 j = algos.searchsorted(level_codes, stop, side="right")
3259 return slice(i, j, step)
3261 else:
3263 idx = self._get_loc_single_level_index(level_index, key)
3265 if level > 0 or self._lexsort_depth == 0:
3266 # Desired level is not sorted
3267 if isinstance(idx, slice):
3268 # test_get_loc_partial_timestamp_multiindex
3269 locs = (level_codes >= idx.start) & (level_codes < idx.stop)
3270 return locs
3272 locs = np.array(level_codes == idx, dtype=bool, copy=False)
3274 if not locs.any():
3275 # The label is present in self.levels[level] but unused:
3276 raise KeyError(key)
3277 return locs
3279 if isinstance(idx, slice):
3280 # e.g. test_partial_string_timestamp_multiindex
3281 start = algos.searchsorted(level_codes, idx.start, side="left")
3282 # NB: "left" here bc of slice semantics
3283 end = algos.searchsorted(level_codes, idx.stop, side="left")
3284 else:
3285 start = algos.searchsorted(level_codes, idx, side="left")
3286 end = algos.searchsorted(level_codes, idx, side="right")
3288 if start == end:
3289 # The label is present in self.levels[level] but unused:
3290 raise KeyError(key)
3291 return slice(start, end)
3293 def get_locs(self, seq):
3294 """
3295 Get location for a sequence of labels.
3297 Parameters
3298 ----------
3299 seq : label, slice, list, mask or a sequence of such
3300 You should use one of the above for each level.
3301 If a level should not be used, set it to ``slice(None)``.
3303 Returns
3304 -------
3305 numpy.ndarray
3306 NumPy array of integers suitable for passing to iloc.
3308 See Also
3309 --------
3310 MultiIndex.get_loc : Get location for a label or a tuple of labels.
3311 MultiIndex.slice_locs : Get slice location given start label(s) and
3312 end label(s).
3314 Examples
3315 --------
3316 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
3318 >>> mi.get_locs('b') # doctest: +SKIP
3319 array([1, 2], dtype=int64)
3321 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP
3322 array([1, 2], dtype=int64)
3324 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP
3325 array([2], dtype=int64)
3326 """
3328 # must be lexsorted to at least as many levels
3329 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
3330 if true_slices and true_slices[-1] >= self._lexsort_depth:
3331 raise UnsortedIndexError(
3332 "MultiIndex slicing requires the index to be lexsorted: slicing "
3333 f"on levels {true_slices}, lexsort depth {self._lexsort_depth}"
3334 )
3336 if any(x is Ellipsis for x in seq):
3337 raise NotImplementedError(
3338 "MultiIndex does not support indexing with Ellipsis"
3339 )
3341 n = len(self)
3343 def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:
3344 if isinstance(indexer, slice):
3345 new_indexer = np.zeros(n, dtype=np.bool_)
3346 new_indexer[indexer] = True
3347 return new_indexer
3348 return indexer
3350 # a bool indexer for the positions we want to take
3351 indexer: npt.NDArray[np.bool_] | None = None
3353 for i, k in enumerate(seq):
3355 lvl_indexer: npt.NDArray[np.bool_] | slice | None = None
3357 if com.is_bool_indexer(k):
3358 if len(k) != n:
3359 raise ValueError(
3360 "cannot index with a boolean indexer that "
3361 "is not the same length as the index"
3362 )
3363 lvl_indexer = np.asarray(k)
3365 elif is_list_like(k):
3366 # a collection of labels to include from this level (these are or'd)
3368 # GH#27591 check if this is a single tuple key in the level
3369 try:
3370 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)
3371 except (InvalidIndexError, TypeError, KeyError) as err:
3372 # InvalidIndexError e.g. non-hashable, fall back to treating
3373 # this as a sequence of labels
3374 # KeyError it can be ambiguous if this is a label or sequence
3375 # of labels
3376 # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708
3377 for x in k:
3378 if not is_hashable(x):
3379 # e.g. slice
3380 raise err
3381 try:
3382 item_indexer = self._get_level_indexer(
3383 x, level=i, indexer=indexer
3384 )
3385 except KeyError:
3386 # ignore not founds; see discussion in GH#39424
3387 warnings.warn(
3388 "The behavior of indexing on a MultiIndex with a "
3389 "nested sequence of labels is deprecated and will "
3390 "change in a future version. "
3391 "`series.loc[label, sequence]` will raise if any "
3392 "members of 'sequence' or not present in "
3393 "the index's second level. To retain the old "
3394 "behavior, use `series.index.isin(sequence, level=1)`",
3395 # TODO: how to opt in to the future behavior?
3396 # TODO: how to handle IntervalIndex level?
3397 # (no test cases)
3398 FutureWarning,
3399 stacklevel=find_stack_level(),
3400 )
3401 continue
3402 else:
3403 if lvl_indexer is None:
3404 lvl_indexer = _to_bool_indexer(item_indexer)
3405 elif isinstance(item_indexer, slice):
3406 lvl_indexer[item_indexer] = True # type: ignore[index]
3407 else:
3408 lvl_indexer |= item_indexer
3410 if lvl_indexer is None:
3411 # no matches we are done
3412 # test_loc_getitem_duplicates_multiindex_empty_indexer
3413 return np.array([], dtype=np.intp)
3415 elif com.is_null_slice(k):
3416 # empty slice
3417 if indexer is None and i == len(seq) - 1:
3418 return np.arange(n, dtype=np.intp)
3419 continue
3421 else:
3422 # a slice or a single label
3423 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)
3425 # update indexer
3426 lvl_indexer = _to_bool_indexer(lvl_indexer)
3427 if indexer is None:
3428 indexer = lvl_indexer
3429 else:
3430 indexer &= lvl_indexer
3431 if not np.any(indexer) and np.any(lvl_indexer):
3432 raise KeyError(seq)
3434 # empty indexer
3435 if indexer is None:
3436 return np.array([], dtype=np.intp)
3438 pos_indexer = indexer.nonzero()[0]
3439 return self._reorder_indexer(seq, pos_indexer)
3441 # --------------------------------------------------------------------
3443 def _reorder_indexer(
3444 self,
3445 seq: tuple[Scalar | Iterable | AnyArrayLike, ...],
3446 indexer: npt.NDArray[np.intp],
3447 ) -> npt.NDArray[np.intp]:
3448 """
3449 Reorder an indexer of a MultiIndex (self) so that the labels are in the
3450 same order as given in seq
3452 Parameters
3453 ----------
3454 seq : label/slice/list/mask or a sequence of such
3455 indexer: a position indexer of self
3457 Returns
3458 -------
3459 indexer : a sorted position indexer of self ordered as seq
3460 """
3461 # If the index is lexsorted and the list_like label in seq are sorted
3462 # then we do not need to sort
3463 if self._is_lexsorted():
3464 need_sort = False
3465 for i, k in enumerate(seq):
3466 if is_list_like(k):
3467 if not need_sort:
3468 k_codes = self.levels[i].get_indexer(k)
3469 k_codes = k_codes[k_codes >= 0] # Filter absent keys
3470 # True if the given codes are not ordered
3471 need_sort = (k_codes[:-1] > k_codes[1:]).any()
3472 elif isinstance(k, slice) and k.step is not None and k.step < 0:
3473 need_sort = True
3474 # Bail out if both index and seq are sorted
3475 if not need_sort:
3476 return indexer
3478 n = len(self)
3479 keys: tuple[np.ndarray, ...] = ()
3480 # For each level of the sequence in seq, map the level codes with the
3481 # order they appears in a list-like sequence
3482 # This mapping is then use to reorder the indexer
3483 for i, k in enumerate(seq):
3484 if is_scalar(k):
3485 # GH#34603 we want to treat a scalar the same as an all equal list
3486 k = [k]
3487 if com.is_bool_indexer(k):
3488 new_order = np.arange(n)[indexer]
3489 elif is_list_like(k):
3490 # Generate a map with all level codes as sorted initially
3491 k = algos.unique(k)
3492 key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len(
3493 self.levels[i]
3494 )
3495 # Set order as given in the indexer list
3496 level_indexer = self.levels[i].get_indexer(k)
3497 level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys
3498 key_order_map[level_indexer] = np.arange(len(level_indexer))
3500 new_order = key_order_map[self.codes[i][indexer]]
3501 elif isinstance(k, slice) and k.step is not None and k.step < 0:
3502 # flip order for negative step
3503 new_order = np.arange(n)[::-1][indexer]
3504 elif isinstance(k, slice) and k.start is None and k.stop is None:
3505 # slice(None) should not determine order GH#31330
3506 new_order = np.ones((n,))[indexer]
3507 else:
3508 # For all other case, use the same order as the level
3509 new_order = np.arange(n)[indexer]
3510 keys = (new_order,) + keys
3512 # Find the reordering using lexsort on the keys mapping
3513 ind = np.lexsort(keys)
3514 return indexer[ind]
3516 def truncate(self, before=None, after=None) -> MultiIndex:
3517 """
3518 Slice index between two labels / tuples, return new MultiIndex
3520 Parameters
3521 ----------
3522 before : label or tuple, can be partial. Default None
3523 None defaults to start
3524 after : label or tuple, can be partial. Default None
3525 None defaults to end
3527 Returns
3528 -------
3529 truncated : MultiIndex
3530 """
3531 if after and before and after < before:
3532 raise ValueError("after < before")
3534 i, j = self.levels[0].slice_locs(before, after)
3535 left, right = self.slice_locs(before, after)
3537 new_levels = list(self.levels)
3538 new_levels[0] = new_levels[0][i:j]
3540 new_codes = [level_codes[left:right] for level_codes in self.codes]
3541 new_codes[0] = new_codes[0] - i
3543 return MultiIndex(
3544 levels=new_levels,
3545 codes=new_codes,
3546 names=self._names,
3547 verify_integrity=False,
3548 )
3550 def equals(self, other: object) -> bool:
3551 """
3552 Determines if two MultiIndex objects have the same labeling information
3553 (the levels themselves do not necessarily have to be the same)
3555 See Also
3556 --------
3557 equal_levels
3558 """
3559 if self.is_(other):
3560 return True
3562 if not isinstance(other, Index):
3563 return False
3565 if len(self) != len(other):
3566 return False
3568 if not isinstance(other, MultiIndex):
3569 # d-level MultiIndex can equal d-tuple Index
3570 if not self._should_compare(other):
3571 # object Index or Categorical[object] may contain tuples
3572 return False
3573 return array_equivalent(self._values, other._values)
3575 if self.nlevels != other.nlevels:
3576 return False
3578 for i in range(self.nlevels):
3579 self_codes = self.codes[i]
3580 other_codes = other.codes[i]
3581 self_mask = self_codes == -1
3582 other_mask = other_codes == -1
3583 if not np.array_equal(self_mask, other_mask):
3584 return False
3585 self_codes = self_codes[~self_mask]
3586 self_values = self.levels[i]._values.take(self_codes)
3588 other_codes = other_codes[~other_mask]
3589 other_values = other.levels[i]._values.take(other_codes)
3591 # since we use NaT both datetime64 and timedelta64 we can have a
3592 # situation where a level is typed say timedelta64 in self (IOW it
3593 # has other values than NaT) but types datetime64 in other (where
3594 # its all NaT) but these are equivalent
3595 if len(self_values) == 0 and len(other_values) == 0:
3596 continue
3598 if not isinstance(self_values, np.ndarray):
3599 # i.e. ExtensionArray
3600 if not self_values.equals(other_values):
3601 return False
3602 elif not isinstance(other_values, np.ndarray):
3603 # i.e. other is ExtensionArray
3604 if not other_values.equals(self_values):
3605 return False
3606 else:
3607 if not array_equivalent(self_values, other_values):
3608 return False
3610 return True
3612 def equal_levels(self, other: MultiIndex) -> bool:
3613 """
3614 Return True if the levels of both MultiIndex objects are the same
3616 """
3617 if self.nlevels != other.nlevels:
3618 return False
3620 for i in range(self.nlevels):
3621 if not self.levels[i].equals(other.levels[i]):
3622 return False
3623 return True
3625 # --------------------------------------------------------------------
3626 # Set Methods
3628 def _union(self, other, sort) -> MultiIndex:
3629 other, result_names = self._convert_can_do_setop(other)
3630 if (
3631 any(-1 in code for code in self.codes)
3632 and any(-1 in code for code in other.codes)
3633 or self.has_duplicates
3634 or other.has_duplicates
3635 ):
3636 # This is only necessary if both sides have nans or one has dups,
3637 # fast_unique_multiple is faster
3638 result = super()._union(other, sort)
3639 else:
3640 rvals = other._values.astype(object, copy=False)
3641 result = lib.fast_unique_multiple([self._values, rvals], sort=sort)
3643 return MultiIndex.from_arrays(zip(*result), sortorder=None, names=result_names)
3645 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
3646 return is_object_dtype(dtype)
3648 def _get_reconciled_name_object(self, other) -> MultiIndex:
3649 """
3650 If the result of a set operation will be self,
3651 return self, unless the names change, in which
3652 case make a shallow copy of self.
3653 """
3654 names = self._maybe_match_names(other)
3655 if self.names != names:
3656 # Incompatible return value type (got "Optional[MultiIndex]", expected
3657 # "MultiIndex")
3658 return self.rename(names) # type: ignore[return-value]
3659 return self
3661 def _maybe_match_names(self, other):
3662 """
3663 Try to find common names to attach to the result of an operation between
3664 a and b. Return a consensus list of names if they match at least partly
3665 or list of None if they have completely different names.
3666 """
3667 if len(self.names) != len(other.names):
3668 return [None] * len(self.names)
3669 names = []
3670 for a_name, b_name in zip(self.names, other.names):
3671 if a_name == b_name:
3672 names.append(a_name)
3673 else:
3674 # TODO: what if they both have np.nan for their names?
3675 names.append(None)
3676 return names
3678 def _wrap_intersection_result(self, other, result) -> MultiIndex:
3679 _, result_names = self._convert_can_do_setop(other)
3681 if len(result) == 0:
3682 return MultiIndex(
3683 levels=self.levels,
3684 codes=[[]] * self.nlevels,
3685 names=result_names,
3686 verify_integrity=False,
3687 )
3688 else:
3689 return MultiIndex.from_arrays(zip(*result), sortorder=0, names=result_names)
3691 def _wrap_difference_result(self, other, result) -> MultiIndex:
3692 _, result_names = self._convert_can_do_setop(other)
3694 if len(result) == 0:
3695 return MultiIndex(
3696 levels=[[]] * self.nlevels,
3697 codes=[[]] * self.nlevels,
3698 names=result_names,
3699 verify_integrity=False,
3700 )
3701 else:
3702 return MultiIndex.from_tuples(result, sortorder=0, names=result_names)
3704 def _convert_can_do_setop(self, other):
3705 result_names = self.names
3707 if not isinstance(other, Index):
3709 if len(other) == 0:
3710 return self[:0], self.names
3711 else:
3712 msg = "other must be a MultiIndex or a list of tuples"
3713 try:
3714 other = MultiIndex.from_tuples(other, names=self.names)
3715 except (ValueError, TypeError) as err:
3716 # ValueError raised by tuples_to_object_array if we
3717 # have non-object dtype
3718 raise TypeError(msg) from err
3719 else:
3720 result_names = get_unanimous_names(self, other)
3722 return other, result_names
3724 # --------------------------------------------------------------------
3726 @doc(Index.astype)
3727 def astype(self, dtype, copy: bool = True):
3728 dtype = pandas_dtype(dtype)
3729 if is_categorical_dtype(dtype):
3730 msg = "> 1 ndim Categorical are not supported at this time"
3731 raise NotImplementedError(msg)
3732 elif not is_object_dtype(dtype):
3733 raise TypeError(
3734 "Setting a MultiIndex dtype to anything other than object "
3735 "is not supported"
3736 )
3737 elif copy is True:
3738 return self._view()
3739 return self
3741 def _validate_fill_value(self, item):
3742 if isinstance(item, MultiIndex):
3743 # GH#43212
3744 if item.nlevels != self.nlevels:
3745 raise ValueError("Item must have length equal to number of levels.")
3746 return item._values
3747 elif not isinstance(item, tuple):
3748 # Pad the key with empty strings if lower levels of the key
3749 # aren't specified:
3750 item = (item,) + ("",) * (self.nlevels - 1)
3751 elif len(item) != self.nlevels:
3752 raise ValueError("Item must have length equal to number of levels.")
3753 return item
3755 def insert(self, loc: int, item) -> MultiIndex:
3756 """
3757 Make new MultiIndex inserting new item at location
3759 Parameters
3760 ----------
3761 loc : int
3762 item : tuple
3763 Must be same length as number of levels in the MultiIndex
3765 Returns
3766 -------
3767 new_index : Index
3768 """
3769 item = self._validate_fill_value(item)
3771 new_levels = []
3772 new_codes = []
3773 for k, level, level_codes in zip(item, self.levels, self.codes):
3774 if k not in level:
3775 # have to insert into level
3776 # must insert at end otherwise you have to recompute all the
3777 # other codes
3778 lev_loc = len(level)
3779 level = level.insert(lev_loc, k)
3780 else:
3781 lev_loc = level.get_loc(k)
3783 new_levels.append(level)
3784 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc))
3786 return MultiIndex(
3787 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False
3788 )
3790 def delete(self, loc) -> MultiIndex:
3791 """
3792 Make new index with passed location deleted
3794 Returns
3795 -------
3796 new_index : MultiIndex
3797 """
3798 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes]
3799 return MultiIndex(
3800 levels=self.levels,
3801 codes=new_codes,
3802 names=self.names,
3803 verify_integrity=False,
3804 )
3806 @doc(Index.isin)
3807 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
3808 if level is None:
3809 values = MultiIndex.from_tuples(values, names=self.names)._values
3810 return algos.isin(self._values, values)
3811 else:
3812 num = self._get_level_number(level)
3813 levs = self.get_level_values(num)
3815 if levs.size == 0:
3816 return np.zeros(len(levs), dtype=np.bool_)
3817 return levs.isin(values)
3819 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"])
3820 def set_names(self, names, level=None, inplace: bool = False) -> MultiIndex | None:
3821 return super().set_names(names=names, level=level, inplace=inplace)
3823 rename = set_names
3825 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
3826 def drop_duplicates(self, keep: str | bool = "first") -> MultiIndex:
3827 return super().drop_duplicates(keep=keep)
3829 # ---------------------------------------------------------------
3830 # Arithmetic/Numeric Methods - Disabled
3832 __add__ = make_invalid_op("__add__")
3833 __radd__ = make_invalid_op("__radd__")
3834 __iadd__ = make_invalid_op("__iadd__")
3835 __sub__ = make_invalid_op("__sub__")
3836 __rsub__ = make_invalid_op("__rsub__")
3837 __isub__ = make_invalid_op("__isub__")
3838 __pow__ = make_invalid_op("__pow__")
3839 __rpow__ = make_invalid_op("__rpow__")
3840 __mul__ = make_invalid_op("__mul__")
3841 __rmul__ = make_invalid_op("__rmul__")
3842 __floordiv__ = make_invalid_op("__floordiv__")
3843 __rfloordiv__ = make_invalid_op("__rfloordiv__")
3844 __truediv__ = make_invalid_op("__truediv__")
3845 __rtruediv__ = make_invalid_op("__rtruediv__")
3846 __mod__ = make_invalid_op("__mod__")
3847 __rmod__ = make_invalid_op("__rmod__")
3848 __divmod__ = make_invalid_op("__divmod__")
3849 __rdivmod__ = make_invalid_op("__rdivmod__")
3850 # Unary methods disabled
3851 __neg__ = make_invalid_op("__neg__")
3852 __pos__ = make_invalid_op("__pos__")
3853 __abs__ = make_invalid_op("__abs__")
3854 __invert__ = make_invalid_op("__invert__")
3857def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int:
3858 """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted."""
3859 int64_codes = [ensure_int64(level_codes) for level_codes in codes]
3860 for k in range(nlevels, 0, -1):
3861 if libalgos.is_lexsorted(int64_codes[:k]):
3862 return k
3863 return 0
3866def sparsify_labels(label_list, start: int = 0, sentinel=""):
3867 pivoted = list(zip(*label_list))
3868 k = len(label_list)
3870 result = pivoted[: start + 1]
3871 prev = pivoted[start]
3873 for cur in pivoted[start + 1 :]:
3874 sparse_cur = []
3876 for i, (p, t) in enumerate(zip(prev, cur)):
3877 if i == k - 1:
3878 sparse_cur.append(t)
3879 result.append(sparse_cur)
3880 break
3882 if p == t:
3883 sparse_cur.append(sentinel)
3884 else:
3885 sparse_cur.extend(cur[i:])
3886 result.append(sparse_cur)
3887 break
3889 prev = cur
3891 return list(zip(*result))
3894def _get_na_rep(dtype) -> str:
3895 if is_extension_array_dtype(dtype):
3896 return f"{dtype.na_value}"
3897 else:
3898 dtype = dtype.type
3900 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN")
3903def maybe_droplevels(index: Index, key) -> Index:
3904 """
3905 Attempt to drop level or levels from the given index.
3907 Parameters
3908 ----------
3909 index: Index
3910 key : scalar or tuple
3912 Returns
3913 -------
3914 Index
3915 """
3916 # drop levels
3917 original_index = index
3918 if isinstance(key, tuple):
3919 for _ in key:
3920 try:
3921 index = index._drop_level_numbers([0])
3922 except ValueError:
3923 # we have dropped too much, so back out
3924 return original_index
3925 else:
3926 try:
3927 index = index._drop_level_numbers([0])
3928 except ValueError:
3929 pass
3931 return index
3934def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray:
3935 """
3936 Coerce the array-like indexer to the smallest integer dtype that can encode all
3937 of the given categories.
3939 Parameters
3940 ----------
3941 array_like : array-like
3942 categories : array-like
3943 copy : bool
3945 Returns
3946 -------
3947 np.ndarray
3948 Non-writeable.
3949 """
3950 array_like = coerce_indexer_dtype(array_like, categories)
3951 if copy:
3952 array_like = array_like.copy()
3953 array_like.flags.writeable = False
3954 return array_like
3957def _require_listlike(level, arr, arrname: str):
3958 """
3959 Ensure that level is either None or listlike, and arr is list-of-listlike.
3960 """
3961 if level is not None and not is_list_like(level):
3962 if not is_list_like(arr):
3963 raise TypeError(f"{arrname} must be list-like")
3964 if is_list_like(arr[0]):
3965 raise TypeError(f"{arrname} must be list-like")
3966 level = [level]
3967 arr = [arr]
3968 elif level is None or is_list_like(level):
3969 if not is_list_like(arr) or not is_list_like(arr[0]):
3970 raise TypeError(f"{arrname} must be list of lists-like")
3971 return level, arr