Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/multi.py: 11%

1from __future__ import annotations

3from functools import wraps

4from sys import getsizeof

5from typing import (

6 TYPE_CHECKING,

7 Any,

8 Callable,

9 Collection,

10 Hashable,

11 Iterable,

12 List,

13 Literal,

14 Sequence,

15 Tuple,

16 cast,

17)

18import warnings

20import numpy as np

22from pandas._config import get_option

24from pandas._libs import (

25 algos as libalgos,

26 index as libindex,

27 lib,

28)

29from pandas._libs.hashtable import duplicated

30from pandas._typing import (

31 AnyArrayLike,

32 DtypeObj,

33 F,

34 Scalar,

35 Shape,

36 npt,

37)

38from pandas.compat.numpy import function as nv

39from pandas.errors import (

40 InvalidIndexError,

41 PerformanceWarning,

42 UnsortedIndexError,

43)

44from pandas.util._decorators import (

45 Appender,

46 cache_readonly,

47 deprecate_nonkeyword_arguments,

48 doc,

49)

50from pandas.util._exceptions import find_stack_level

52from pandas.core.dtypes.cast import coerce_indexer_dtype

53from pandas.core.dtypes.common import (

54 ensure_int64,

55 ensure_platform_int,

56 is_categorical_dtype,

57 is_extension_array_dtype,

58 is_hashable,

59 is_integer,

60 is_iterator,

61 is_list_like,

62 is_object_dtype,

63 is_scalar,

64 pandas_dtype,

65)

66from pandas.core.dtypes.dtypes import ExtensionDtype

67from pandas.core.dtypes.generic import (

68 ABCDataFrame,

69 ABCDatetimeIndex,

70 ABCTimedeltaIndex,

71)

72from pandas.core.dtypes.missing import (

73 array_equivalent,

74 isna,

75)

77import pandas.core.algorithms as algos

78from pandas.core.arrays import Categorical

79from pandas.core.arrays.categorical import factorize_from_iterables

80import pandas.core.common as com

81import pandas.core.indexes.base as ibase

82from pandas.core.indexes.base import (

83 Index,

84 _index_shared_docs,

85 ensure_index,

86 get_unanimous_names,

87)

88from pandas.core.indexes.frozen import FrozenList

89from pandas.core.ops.invalid import make_invalid_op

90from pandas.core.sorting import (

91 get_group_index,

92 indexer_from_factorized,

93 lexsort_indexer,

94)

96from pandas.io.formats.printing import pprint_thing

98if TYPE_CHECKING: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true

99 from pandas import (

100 CategoricalIndex,

101 DataFrame,

102 Series,

103 )

104

105_index_doc_kwargs = dict(ibase._index_doc_kwargs)

106_index_doc_kwargs.update(

107 {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"}

108)

109

110

111class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):

112 """

113 This class manages a MultiIndex by mapping label combinations to positive

114 integers.

115 """

116

117 _base = libindex.UInt64Engine

118

119 def _codes_to_ints(self, codes):

120 """

121 Transform combination(s) of uint64 in one uint64 (each), in a strictly

122 monotonic way (i.e. respecting the lexicographic order of integer

123 combinations): see BaseMultiIndexCodesEngine documentation.

124

125 Parameters

126 ----------

127 codes : 1- or 2-dimensional array of dtype uint64

128 Combinations of integers (one per row)

129

130 Returns

131 -------

132 scalar or 1-dimensional array, of dtype uint64

133 Integer(s) representing one combination (each).

134 """

135 # Shift the representation of each level by the pre-calculated number

136 # of bits:

137 codes <<= self.offsets

138

139 # Now sum and OR are in fact interchangeable. This is a simple

140 # composition of the (disjunct) significant bits of each level (i.e.

141 # each column in "codes") in a single positive integer:

142 if codes.ndim == 1:

143 # Single key

144 return np.bitwise_or.reduce(codes)

145

146 # Multiple keys

147 return np.bitwise_or.reduce(codes, axis=1)

148

149

150class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine):

151 """

152 This class manages those (extreme) cases in which the number of possible

153 label combinations overflows the 64 bits integers, and uses an ObjectEngine

154 containing Python integers.

155 """

156

157 _base = libindex.ObjectEngine

158

159 def _codes_to_ints(self, codes):

160 """

161 Transform combination(s) of uint64 in one Python integer (each), in a

162 strictly monotonic way (i.e. respecting the lexicographic order of

163 integer combinations): see BaseMultiIndexCodesEngine documentation.

164

165 Parameters

166 ----------

167 codes : 1- or 2-dimensional array of dtype uint64

168 Combinations of integers (one per row)

169

170 Returns

171 -------

172 int, or 1-dimensional array of dtype object

173 Integer(s) representing one combination (each).

174 """

175 # Shift the representation of each level by the pre-calculated number

176 # of bits. Since this can overflow uint64, first make sure we are

177 # working with Python integers:

178 codes = codes.astype("object") << self.offsets

179

180 # Now sum and OR are in fact interchangeable. This is a simple

181 # composition of the (disjunct) significant bits of each level (i.e.

182 # each column in "codes") in a single positive integer (per row):

183 if codes.ndim == 1:

184 # Single key

185 return np.bitwise_or.reduce(codes)

186

187 # Multiple keys

188 return np.bitwise_or.reduce(codes, axis=1)

189

190

191def names_compat(meth: F) -> F:

192 """

193 A decorator to allow either `name` or `names` keyword but not both.

194

195 This makes it easier to share code with base class.

196 """

197

198 @wraps(meth)

199 def new_meth(self_or_cls, *args, **kwargs):

200 if "name" in kwargs and "names" in kwargs:

201 raise TypeError("Can only provide one of `names` and `name`")

202 elif "name" in kwargs:

203 kwargs["names"] = kwargs.pop("name")

204

205 return meth(self_or_cls, *args, **kwargs)

206

207 return cast(F, new_meth)

208

209

210class MultiIndex(Index):

211 """

212 A multi-level, or hierarchical, index object for pandas objects.

213

214 Parameters

215 ----------

216 levels : sequence of arrays

217 The unique labels for each level.

218 codes : sequence of arrays

219 Integers for each level designating which label at each location.

220 sortorder : optional int

221 Level of sortedness (must be lexicographically sorted by that

222 level).

223 names : optional sequence of objects

224 Names for each of the index levels. (name is accepted for compat).

225 copy : bool, default False

226 Copy the meta-data.

227 verify_integrity : bool, default True

228 Check that the levels/codes are consistent and valid.

229

230 Attributes

231 ----------

232 names

233 levels

234 codes

235 nlevels

236 levshape

237

238 Methods

239 -------

240 from_arrays

241 from_tuples

242 from_product

243 from_frame

244 set_levels

245 set_codes

246 to_frame

247 to_flat_index

248 sortlevel

249 droplevel

250 swaplevel

251 reorder_levels

252 remove_unused_levels

253 get_locs

254

255 See Also

256 --------

257 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

258 MultiIndex.from_product : Create a MultiIndex from the cartesian product

259 of iterables.

260 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.

261 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

262 Index : The base pandas Index type.

263

264 Notes

265 -----

266 See the `user guide

267 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__

268 for more.

269

270 Examples

271 --------

272 A new ``MultiIndex`` is typically constructed using one of the helper

273 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`

274 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):

275

276 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]

277 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))

278 MultiIndex([(1, 'red'),

279 (1, 'blue'),

280 (2, 'red'),

281 (2, 'blue')],

282 names=['number', 'color'])

283

284 See further examples for how to construct a MultiIndex in the doc strings

285 of the mentioned helper methods.

286 """

287

288 _hidden_attrs = Index._hidden_attrs | frozenset()

289

290 # initialize to zero-length tuples to make everything work

291 _typ = "multiindex"

292 _names: list[Hashable | None] = []

293 _levels = FrozenList()

294 _codes = FrozenList()

295 _comparables = ["names"]

296

297 sortorder: int | None

298

299 # --------------------------------------------------------------------

300 # Constructors

301

302 def __new__(

303 cls,

304 levels=None,

305 codes=None,

306 sortorder=None,

307 names=None,

308 dtype=None,

309 copy=False,

310 name=None,

311 verify_integrity: bool = True,

312 ) -> MultiIndex:

313

314 # compat with Index

315 if name is not None:

316 names = name

317 if levels is None or codes is None:

318 raise TypeError("Must pass both levels and codes")

319 if len(levels) != len(codes):

320 raise ValueError("Length of levels and codes must be the same.")

321 if len(levels) == 0:

322 raise ValueError("Must pass non-zero number of levels/codes")

323

324 result = object.__new__(cls)

325 result._cache = {}

326

327 # we've already validated levels and codes, so shortcut here

328 result._set_levels(levels, copy=copy, validate=False)

329 result._set_codes(codes, copy=copy, validate=False)

330

331 result._names = [None] * len(levels)

332 if names is not None:

333 # handles name validation

334 result._set_names(names)

335

336 if sortorder is not None:

337 result.sortorder = int(sortorder)

338 else:

339 result.sortorder = sortorder

340

341 if verify_integrity:

342 new_codes = result._verify_integrity()

343 result._codes = new_codes

344

345 result._reset_identity()

346

347 return result

348

349 def _validate_codes(self, level: list, code: list):

350 """

351 Reassign code values as -1 if their corresponding levels are NaN.

352

353 Parameters

354 ----------

355 code : list

356 Code to reassign.

357 level : list

358 Level to check for missing values (NaN, NaT, None).

359

360 Returns

361 -------

362 new code where code value = -1 if it corresponds

363 to a level with missing values (NaN, NaT, None).

364 """

365 null_mask = isna(level)

366 if np.any(null_mask):

367 # error: Incompatible types in assignment

368 # (expression has type "ndarray[Any, dtype[Any]]",

369 # variable has type "List[Any]")

370 code = np.where(null_mask[code], -1, code) # type: ignore[assignment]

371 return code

372

373 def _verify_integrity(self, codes: list | None = None, levels: list | None = None):

374 """

375 Parameters

376 ----------

377 codes : optional list

378 Codes to check for validity. Defaults to current codes.

379 levels : optional list

380 Levels to check for validity. Defaults to current levels.

381

382 Raises

383 ------

384 ValueError

385 If length of levels and codes don't match, if the codes for any

386 level would exceed level bounds, or there are any duplicate levels.

387

388 Returns

389 -------

390 new codes where code value = -1 if it corresponds to a

391 NaN level.

392 """

393 # NOTE: Currently does not check, among other things, that cached

394 # nlevels matches nor that sortorder matches actually sortorder.

395 codes = codes or self.codes

396 levels = levels or self.levels

397

398 if len(levels) != len(codes):

399 raise ValueError(

400 "Length of levels and codes must match. NOTE: "

401 "this index is in an inconsistent state."

402 )

403 codes_length = len(codes[0])

404 for i, (level, level_codes) in enumerate(zip(levels, codes)):

405 if len(level_codes) != codes_length:

406 raise ValueError(

407 f"Unequal code lengths: {[len(code_) for code_ in codes]}"

408 )

409 if len(level_codes) and level_codes.max() >= len(level):

410 raise ValueError(

411 f"On level {i}, code max ({level_codes.max()}) >= length of "

412 f"level ({len(level)}). NOTE: this index is in an "

413 "inconsistent state"

414 )

415 if len(level_codes) and level_codes.min() < -1:

416 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1")

417 if not level.is_unique:

418 raise ValueError(

419 f"Level values must be unique: {list(level)} on level {i}"

420 )

421 if self.sortorder is not None:

422 if self.sortorder > _lexsort_depth(self.codes, self.nlevels):

423 raise ValueError(

424 "Value for sortorder must be inferior or equal to actual "

425 f"lexsort_depth: sortorder {self.sortorder} "

426 f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}"

427 )

428

429 codes = [

430 self._validate_codes(level, code) for level, code in zip(levels, codes)

431 ]

432 new_codes = FrozenList(codes)

433 return new_codes

434

435 @classmethod

436 def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex:

437 """

438 Convert arrays to MultiIndex.

439

440 Parameters

441 ----------

442 arrays : list / sequence of array-likes

443 Each array-like gives one level's value for each data point.

444 len(arrays) is the number of levels.

445 sortorder : int or None

446 Level of sortedness (must be lexicographically sorted by that

447 level).

448 names : list / sequence of str, optional

449 Names for the levels in the index.

450

451 Returns

452 -------

453 MultiIndex

454

455 See Also

456 --------

457 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.

458 MultiIndex.from_product : Make a MultiIndex from cartesian product

459 of iterables.

460 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

461

462 Examples

463 --------

464 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]

465 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))

466 MultiIndex([(1, 'red'),

467 (1, 'blue'),

468 (2, 'red'),

469 (2, 'blue')],

470 names=['number', 'color'])

471 """

472 error_msg = "Input must be a list / sequence of array-likes."

473 if not is_list_like(arrays):

474 raise TypeError(error_msg)

475 elif is_iterator(arrays):

476 arrays = list(arrays)

477

478 # Check if elements of array are list-like

479 for array in arrays:

480 if not is_list_like(array):

481 raise TypeError(error_msg)

482

483 # Check if lengths of all arrays are equal or not,

484 # raise ValueError, if not

485 for i in range(1, len(arrays)):

486 if len(arrays[i]) != len(arrays[i - 1]):

487 raise ValueError("all arrays must be same length")

488

489 codes, levels = factorize_from_iterables(arrays)

490 if names is lib.no_default:

491 names = [getattr(arr, "name", None) for arr in arrays]

492

493 return cls(

494 levels=levels,

495 codes=codes,

496 sortorder=sortorder,

497 names=names,

498 verify_integrity=False,

499 )

500

501 @classmethod

502 @names_compat

503 def from_tuples(

504 cls,

505 tuples: Iterable[tuple[Hashable, ...]],

506 sortorder: int | None = None,

507 names: Sequence[Hashable] | Hashable | None = None,

508 ) -> MultiIndex:

509 """

510 Convert list of tuples to MultiIndex.

511

512 Parameters

513 ----------

514 tuples : list / sequence of tuple-likes

515 Each tuple is the index of one row/column.

516 sortorder : int or None

517 Level of sortedness (must be lexicographically sorted by that

518 level).

519 names : list / sequence of str, optional

520 Names for the levels in the index.

521

522 Returns

523 -------

524 MultiIndex

525

526 See Also

527 --------

528 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

529 MultiIndex.from_product : Make a MultiIndex from cartesian product

530 of iterables.

531 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

532

533 Examples

534 --------

535 >>> tuples = [(1, 'red'), (1, 'blue'),

536 ... (2, 'red'), (2, 'blue')]

537 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))

538 MultiIndex([(1, 'red'),

539 (1, 'blue'),

540 (2, 'red'),

541 (2, 'blue')],

542 names=['number', 'color'])

543 """

544 if not is_list_like(tuples):

545 raise TypeError("Input must be a list / sequence of tuple-likes.")

546 elif is_iterator(tuples):

547 tuples = list(tuples)

548 tuples = cast(Collection[Tuple[Hashable, ...]], tuples)

549

550 # handling the empty tuple cases

551 if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples):

552 codes = [np.zeros(len(tuples))]

553 levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))]

554 return cls(

555 levels=levels,

556 codes=codes,

557 sortorder=sortorder,

558 names=names,

559 verify_integrity=False,

560 )

561

562 arrays: list[Sequence[Hashable]]

563 if len(tuples) == 0:

564 if names is None:

565 raise TypeError("Cannot infer number of levels from empty list")

566 # error: Argument 1 to "len" has incompatible type "Hashable";

567 # expected "Sized"

568 arrays = [[]] * len(names) # type: ignore[arg-type]

569 elif isinstance(tuples, (np.ndarray, Index)):

570 if isinstance(tuples, Index):

571 tuples = np.asarray(tuples._values)

572

573 arrays = list(lib.tuples_to_object_array(tuples).T)

574 elif isinstance(tuples, list):

575 arrays = list(lib.to_object_array_tuples(tuples).T)

576 else:

577 arrs = zip(*tuples)

578 arrays = cast(List[Sequence[Hashable]], arrs)

579

580 return cls.from_arrays(arrays, sortorder=sortorder, names=names)

581

582 @classmethod

583 def from_product(

584 cls,

585 iterables: Sequence[Iterable[Hashable]],

586 sortorder: int | None = None,

587 names: Sequence[Hashable] | lib.NoDefault = lib.no_default,

588 ) -> MultiIndex:

589 """

590 Make a MultiIndex from the cartesian product of multiple iterables.

591

592 Parameters

593 ----------

594 iterables : list / sequence of iterables

595 Each iterable has unique labels for each level of the index.

596 sortorder : int or None

597 Level of sortedness (must be lexicographically sorted by that

598 level).

599 names : list / sequence of str, optional

600 Names for the levels in the index.

601

602 .. versionchanged:: 1.0.0

603

604 If not explicitly provided, names will be inferred from the

605 elements of iterables if an element has a name attribute

606

607 Returns

608 -------

609 MultiIndex

610

611 See Also

612 --------

613 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

614 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.

615 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

616

617 Examples

618 --------

619 >>> numbers = [0, 1, 2]

620 >>> colors = ['green', 'purple']

621 >>> pd.MultiIndex.from_product([numbers, colors],

622 ... names=['number', 'color'])

623 MultiIndex([(0, 'green'),

624 (0, 'purple'),

625 (1, 'green'),

626 (1, 'purple'),

627 (2, 'green'),

628 (2, 'purple')],

629 names=['number', 'color'])

630 """

631 from pandas.core.reshape.util import cartesian_product

632

633 if not is_list_like(iterables):

634 raise TypeError("Input must be a list / sequence of iterables.")

635 elif is_iterator(iterables):

636 iterables = list(iterables)

637

638 codes, levels = factorize_from_iterables(iterables)

639 if names is lib.no_default:

640 names = [getattr(it, "name", None) for it in iterables]

641

642 # codes are all ndarrays, so cartesian_product is lossless

643 codes = cartesian_product(codes)

644 return cls(levels, codes, sortorder=sortorder, names=names)

645

646 @classmethod

647 def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex:

648 """

649 Make a MultiIndex from a DataFrame.

650

651 Parameters

652 ----------

653 df : DataFrame

654 DataFrame to be converted to MultiIndex.

655 sortorder : int, optional

656 Level of sortedness (must be lexicographically sorted by that

657 level).

658 names : list-like, optional

659 If no names are provided, use the column names, or tuple of column

660 names if the columns is a MultiIndex. If a sequence, overwrite

661 names with the given sequence.

662

663 Returns

664 -------

665 MultiIndex

666 The MultiIndex representation of the given DataFrame.

667

668 See Also

669 --------

670 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

671 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.

672 MultiIndex.from_product : Make a MultiIndex from cartesian product

673 of iterables.

674

675 Examples

676 --------

677 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],

678 ... ['NJ', 'Temp'], ['NJ', 'Precip']],

679 ... columns=['a', 'b'])

680 >>> df

681 a b

682 0 HI Temp

683 1 HI Precip

684 2 NJ Temp

685 3 NJ Precip

686

687 >>> pd.MultiIndex.from_frame(df)

688 MultiIndex([('HI', 'Temp'),

689 ('HI', 'Precip'),

690 ('NJ', 'Temp'),

691 ('NJ', 'Precip')],

692 names=['a', 'b'])

693

694 Using explicit names, instead of the column names

695

696 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])

697 MultiIndex([('HI', 'Temp'),

698 ('HI', 'Precip'),

699 ('NJ', 'Temp'),

700 ('NJ', 'Precip')],

701 names=['state', 'observation'])

702 """

703 if not isinstance(df, ABCDataFrame):

704 raise TypeError("Input must be a DataFrame")

705

706 column_names, columns = zip(*df.items())

707 names = column_names if names is None else names

708 return cls.from_arrays(columns, sortorder=sortorder, names=names)

709

710 # --------------------------------------------------------------------

711

712 @cache_readonly

713 def _values(self) -> np.ndarray:

714 # We override here, since our parent uses _data, which we don't use.

715 values = []

716

717 for i in range(self.nlevels):

718 index = self.levels[i]

719 codes = self.codes[i]

720

721 vals = index

722 if is_categorical_dtype(vals.dtype):

723 vals = cast("CategoricalIndex", vals)

724 vals = vals._data._internal_get_values()

725

726 is_dti = isinstance(vals, ABCDatetimeIndex)

727

728 if is_dti:

729 # TODO: this can be removed after Timestamp.freq is removed

730 # The astype(object) below does not remove the freq from

731 # the underlying Timestamps so we remove it here to match

732 # the behavior of self._get_level_values

733 vals = algos.take_nd(vals, codes, fill_value=index._na_value)

734

735 if isinstance(vals.dtype, ExtensionDtype) or isinstance(

736 vals, (ABCDatetimeIndex, ABCTimedeltaIndex)

737 ):

738 vals = vals.astype(object)

739

740 vals = np.array(vals, copy=False)

741 if not is_dti:

742 vals = algos.take_nd(vals, codes, fill_value=index._na_value)

743 values.append(vals)

744

745 arr = lib.fast_zip(values)

746 return arr

747

748 @property

749 def values(self) -> np.ndarray:

750 return self._values

751

752 @property

753 def array(self):

754 """

755 Raises a ValueError for `MultiIndex` because there's no single

756 array backing a MultiIndex.

757

758 Raises

759 ------

760 ValueError

761 """

762 raise ValueError(

763 "MultiIndex has no single backing array. Use "

764 "'MultiIndex.to_numpy()' to get a NumPy array of tuples."

765 )

766

767 @cache_readonly

768 def dtypes(self) -> Series:

769 """

770 Return the dtypes as a Series for the underlying MultiIndex.

771 """

772 from pandas import Series

773

774 names = com.fill_missing_names([level.name for level in self.levels])

775 return Series([level.dtype for level in self.levels], index=Index(names))

776

777 def __len__(self) -> int:

778 return len(self.codes[0])

779

780 # --------------------------------------------------------------------

781 # Levels Methods

782

783 @cache_readonly

784 def levels(self) -> FrozenList:

785 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly

786 # create new IndexEngine

787 # https://github.com/pandas-dev/pandas/issues/31648

788 result = [x._rename(name=name) for x, name in zip(self._levels, self._names)]

789 for level in result:

790 # disallow midx.levels[0].name = "foo"

791 level._no_setting_name = True

792 return FrozenList(result)

793

794 def _set_levels(

795 self,

796 levels,

797 *,

798 level=None,

799 copy: bool = False,

800 validate: bool = True,

801 verify_integrity: bool = False,

802 ) -> None:

803 # This is NOT part of the levels property because it should be

804 # externally not allowed to set levels. User beware if you change

805 # _levels directly

806 if validate:

807 if len(levels) == 0:

808 raise ValueError("Must set non-zero number of levels.")

809 if level is None and len(levels) != self.nlevels:

810 raise ValueError("Length of levels must match number of levels.")

811 if level is not None and len(levels) != len(level):

812 raise ValueError("Length of levels must match length of level.")

813

814 if level is None:

815 new_levels = FrozenList(

816 ensure_index(lev, copy=copy)._view() for lev in levels

817 )

818 else:

819 level_numbers = [self._get_level_number(lev) for lev in level]

820 new_levels_list = list(self._levels)

821 for lev_num, lev in zip(level_numbers, levels):

822 new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view()

823 new_levels = FrozenList(new_levels_list)

824

825 if verify_integrity:

826 new_codes = self._verify_integrity(levels=new_levels)

827 self._codes = new_codes

828

829 names = self.names

830 self._levels = new_levels

831 if any(names):

832 self._set_names(names)

833

834 self._reset_cache()

835

836 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "levels"])

837 def set_levels(

838 self, levels, level=None, inplace=None, verify_integrity: bool = True

839 ):

840 """

841 Set new levels on MultiIndex. Defaults to returning new index.

842

843 Parameters

844 ----------

845 levels : sequence or list of sequence

846 New level(s) to apply.

847 level : int, level name, or sequence of int/level names (default None)

848 Level(s) to set (None for all levels).

849 inplace : bool

850 If True, mutates in place.

851

852 .. deprecated:: 1.2.0

853 verify_integrity : bool, default True

854 If True, checks that levels and codes are compatible.

855

856 Returns

857 -------

858 new index (of same type and class...etc) or None

859 The same type as the caller or None if ``inplace=True``.

860

861 Examples

862 --------

863 >>> idx = pd.MultiIndex.from_tuples(

864 ... [

865 ... (1, "one"),

866 ... (1, "two"),

867 ... (2, "one"),

868 ... (2, "two"),

869 ... (3, "one"),

870 ... (3, "two")

871 ... ],

872 ... names=["foo", "bar"]

873 ... )

874 >>> idx

875 MultiIndex([(1, 'one'),

876 (1, 'two'),

877 (2, 'one'),

878 (2, 'two'),

879 (3, 'one'),

880 (3, 'two')],

881 names=['foo', 'bar'])

882

883 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]])

884 MultiIndex([('a', 1),

885 ('a', 2),

886 ('b', 1),

887 ('b', 2),

888 ('c', 1),

889 ('c', 2)],

890 names=['foo', 'bar'])

891 >>> idx.set_levels(['a', 'b', 'c'], level=0)

892 MultiIndex([('a', 'one'),

893 ('a', 'two'),

894 ('b', 'one'),

895 ('b', 'two'),

896 ('c', 'one'),

897 ('c', 'two')],

898 names=['foo', 'bar'])

899 >>> idx.set_levels(['a', 'b'], level='bar')

900 MultiIndex([(1, 'a'),

901 (1, 'b'),

902 (2, 'a'),

903 (2, 'b'),

904 (3, 'a'),

905 (3, 'b')],

906 names=['foo', 'bar'])

907

908 If any of the levels passed to ``set_levels()`` exceeds the

909 existing length, all of the values from that argument will

910 be stored in the MultiIndex levels, though the values will

911 be truncated in the MultiIndex output.

912

913 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1])

914 MultiIndex([('a', 1),

915 ('a', 2),

916 ('b', 1),

917 ('b', 2),

918 ('c', 1),

919 ('c', 2)],

920 names=['foo', 'bar'])

921 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels

922 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]])

923 """

924 if inplace is not None:

925 warnings.warn(

926 "inplace is deprecated and will be removed in a future version.",

927 FutureWarning,

928 stacklevel=find_stack_level(),

929 )

930 else:

931 inplace = False

932

933 if is_list_like(levels) and not isinstance(levels, Index):

934 levels = list(levels)

935

936 level, levels = _require_listlike(level, levels, "Levels")

937

938 if inplace:

939 idx = self

940 else:

941 idx = self._view()

942 idx._reset_identity()

943 idx._set_levels(

944 levels, level=level, validate=True, verify_integrity=verify_integrity

945 )

946 if not inplace:

947 return idx

948

949 @property

950 def nlevels(self) -> int:

951 """

952 Integer number of levels in this MultiIndex.

953

954 Examples

955 --------

956 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])

957 >>> mi

958 MultiIndex([('a', 'b', 'c')],

959 )

960 >>> mi.nlevels

961 3

962 """

963 return len(self._levels)

964

965 @property

966 def levshape(self) -> Shape:

967 """

968 A tuple with the length of each level.

969

970 Examples

971 --------

972 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])

973 >>> mi

974 MultiIndex([('a', 'b', 'c')],

975 )

976 >>> mi.levshape

977 (1, 1, 1)

978 """

979 return tuple(len(x) for x in self.levels)

980

981 # --------------------------------------------------------------------

982 # Codes Methods

983

984 @property

985 def codes(self):

986 return self._codes

987

988 def _set_codes(

989 self,

990 codes,

991 *,

992 level=None,

993 copy: bool = False,

994 validate: bool = True,

995 verify_integrity: bool = False,

996 ) -> None:

997 if validate:

998 if level is None and len(codes) != self.nlevels:

999 raise ValueError("Length of codes must match number of levels")

1000 if level is not None and len(codes) != len(level):

1001 raise ValueError("Length of codes must match length of levels.")

1002

1003 if level is None:

1004 new_codes = FrozenList(

1005 _coerce_indexer_frozen(level_codes, lev, copy=copy).view()

1006 for lev, level_codes in zip(self._levels, codes)

1007 )

1008 else:

1009 level_numbers = [self._get_level_number(lev) for lev in level]

1010 new_codes_list = list(self._codes)

1011 for lev_num, level_codes in zip(level_numbers, codes):

1012 lev = self.levels[lev_num]

1013 new_codes_list[lev_num] = _coerce_indexer_frozen(

1014 level_codes, lev, copy=copy

1015 )

1016 new_codes = FrozenList(new_codes_list)

1017

1018 if verify_integrity:

1019 new_codes = self._verify_integrity(codes=new_codes)

1020

1021 self._codes = new_codes

1022

1023 self._reset_cache()

1024

1025 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "codes"])

1026 def set_codes(self, codes, level=None, inplace=None, verify_integrity: bool = True):

1027 """

1028 Set new codes on MultiIndex. Defaults to returning new index.

1029

1030 Parameters

1031 ----------

1032 codes : sequence or list of sequence

1033 New codes to apply.

1034 level : int, level name, or sequence of int/level names (default None)

1035 Level(s) to set (None for all levels).

1036 inplace : bool

1037 If True, mutates in place.

1038

1039 .. deprecated:: 1.2.0

1040 verify_integrity : bool, default True

1041 If True, checks that levels and codes are compatible.

1042

1043 Returns

1044 -------

1045 new index (of same type and class...etc) or None

1046 The same type as the caller or None if ``inplace=True``.

1047

1048 Examples

1049 --------

1050 >>> idx = pd.MultiIndex.from_tuples(

1051 ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"]

1052 ... )

1053 >>> idx

1054 MultiIndex([(1, 'one'),

1055 (1, 'two'),

1056 (2, 'one'),

1057 (2, 'two')],

1058 names=['foo', 'bar'])

1059

1060 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]])

1061 MultiIndex([(2, 'one'),

1062 (1, 'one'),

1063 (2, 'two'),

1064 (1, 'two')],

1065 names=['foo', 'bar'])

1066 >>> idx.set_codes([1, 0, 1, 0], level=0)

1067 MultiIndex([(2, 'one'),

1068 (1, 'two'),

1069 (2, 'one'),

1070 (1, 'two')],

1071 names=['foo', 'bar'])

1072 >>> idx.set_codes([0, 0, 1, 1], level='bar')

1073 MultiIndex([(1, 'one'),

1074 (1, 'one'),

1075 (2, 'two'),

1076 (2, 'two')],

1077 names=['foo', 'bar'])

1078 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1])

1079 MultiIndex([(2, 'one'),

1080 (1, 'one'),

1081 (2, 'two'),

1082 (1, 'two')],

1083 names=['foo', 'bar'])

1084 """

1085 if inplace is not None:

1086 warnings.warn(

1087 "inplace is deprecated and will be removed in a future version.",

1088 FutureWarning,

1089 stacklevel=find_stack_level(),

1090 )

1091 else:

1092 inplace = False

1093

1094 level, codes = _require_listlike(level, codes, "Codes")

1095

1096 if inplace:

1097 idx = self

1098 else:

1099 idx = self._view()

1100 idx._reset_identity()

1101 idx._set_codes(codes, level=level, verify_integrity=verify_integrity)

1102 if not inplace:

1103 return idx

1104

1105 # --------------------------------------------------------------------

1106 # Index Internals

1107

1108 @cache_readonly

1109 def _engine(self):

1110 # Calculate the number of bits needed to represent labels in each

1111 # level, as log2 of their sizes (including -1 for NaN):

1112 sizes = np.ceil(np.log2([len(level) + 1 for level in self.levels]))

1113

1114 # Sum bit counts, starting from the _right_....

1115 lev_bits = np.cumsum(sizes[::-1])[::-1]

1116

1117 # ... in order to obtain offsets such that sorting the combination of

1118 # shifted codes (one for each level, resulting in a unique integer) is

1119 # equivalent to sorting lexicographically the codes themselves. Notice

1120 # that each level needs to be shifted by the number of bits needed to

1121 # represent the _previous_ ones:

1122 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64")

1123

1124 # Check the total number of bits needed for our representation:

1125 if lev_bits[0] > 64:

1126 # The levels would overflow a 64 bit uint - use Python integers:

1127 return MultiIndexPyIntEngine(self.levels, self.codes, offsets)

1128 return MultiIndexUIntEngine(self.levels, self.codes, offsets)

1129

1130 # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return

1131 # type "Type[MultiIndex]" in supertype "Index"

1132 @property

1133 def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override]

1134 return type(self).from_tuples

1135

1136 @doc(Index._shallow_copy)

1137 def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex:

1138 names = name if name is not lib.no_default else self.names

1139

1140 return type(self).from_tuples(values, sortorder=None, names=names)

1141

1142 def _view(self) -> MultiIndex:

1143 result = type(self)(

1144 levels=self.levels,

1145 codes=self.codes,

1146 sortorder=self.sortorder,

1147 names=self.names,

1148 verify_integrity=False,

1149 )

1150 result._cache = self._cache.copy()

1151 result._cache.pop("levels", None) # GH32669

1152 return result

1153

1154 # --------------------------------------------------------------------

1155

1156 def copy(

1157 self,

1158 names=None,

1159 dtype=None,

1160 levels=None,

1161 codes=None,

1162 deep=False,

1163 name=None,

1164 ):

1165 """

1166 Make a copy of this object. Names, dtype, levels and codes can be

1167 passed and will be set on new copy.

1168

1169 Parameters

1170 ----------

1171 names : sequence, optional

1172 dtype : numpy dtype or pandas type, optional

1173

1174 .. deprecated:: 1.2.0

1175 levels : sequence, optional

1176

1177 .. deprecated:: 1.2.0

1178 codes : sequence, optional

1179

1180 .. deprecated:: 1.2.0

1181 deep : bool, default False

1182 name : Label

1183 Kept for compatibility with 1-dimensional Index. Should not be used.

1184

1185 Returns

1186 -------

1187 MultiIndex

1188

1189 Notes

1190 -----

1191 In most cases, there should be no functional difference from using

1192 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.

1193 This could be potentially expensive on large MultiIndex objects.

1194 """

1195 names = self._validate_names(name=name, names=names, deep=deep)

1196 keep_id = not deep

1197 if levels is not None:

1198 warnings.warn(

1199 "parameter levels is deprecated and will be removed in a future "

1200 "version. Use the set_levels method instead.",

1201 FutureWarning,

1202 stacklevel=find_stack_level(),

1203 )

1204 keep_id = False

1205 if codes is not None:

1206 warnings.warn(

1207 "parameter codes is deprecated and will be removed in a future "

1208 "version. Use the set_codes method instead.",

1209 FutureWarning,

1210 stacklevel=find_stack_level(),

1211 )

1212 keep_id = False

1213

1214 if deep:

1215 from copy import deepcopy

1216

1217 if levels is None:

1218 levels = deepcopy(self.levels)

1219 if codes is None:

1220 codes = deepcopy(self.codes)

1221

1222 levels = levels if levels is not None else self.levels

1223 codes = codes if codes is not None else self.codes

1224

1225 new_index = type(self)(

1226 levels=levels,

1227 codes=codes,

1228 sortorder=self.sortorder,

1229 names=names,

1230 verify_integrity=False,

1231 )

1232 new_index._cache = self._cache.copy()

1233 new_index._cache.pop("levels", None) # GH32669

1234 if keep_id:

1235 new_index._id = self._id

1236

1237 if dtype:

1238 warnings.warn(

1239 "parameter dtype is deprecated and will be removed in a future "

1240 "version. Use the astype method instead.",

1241 FutureWarning,

1242 stacklevel=find_stack_level(),

1243 )

1244 new_index = new_index.astype(dtype)

1245 return new_index

1246

1247 def __array__(self, dtype=None) -> np.ndarray:

1248 """the array interface, return my values"""

1249 return self.values

1250

1251 def view(self, cls=None):

1252 """this is defined as a copy with the same identity"""

1253 result = self.copy()

1254 result._id = self._id

1255 return result

1256

1257 @doc(Index.__contains__)

1258 def __contains__(self, key: Any) -> bool:

1259 hash(key)

1260 try:

1261 self.get_loc(key)

1262 return True

1263 except (LookupError, TypeError, ValueError):

1264 return False

1265

1266 @cache_readonly

1267 def dtype(self) -> np.dtype:

1268 return np.dtype("O")

1269

1270 def _is_memory_usage_qualified(self) -> bool:

1271 """return a boolean if we need a qualified .info display"""

1272

1273 def f(level):

1274 return "mixed" in level or "string" in level or "unicode" in level

1275

1276 return any(f(level) for level in self._inferred_type_levels)

1277

1278 @doc(Index.memory_usage)

1279 def memory_usage(self, deep: bool = False) -> int:

1280 # we are overwriting our base class to avoid

1281 # computing .values here which could materialize

1282 # a tuple representation unnecessarily

1283 return self._nbytes(deep)

1284

1285 @cache_readonly

1286 def nbytes(self) -> int:

1287 """return the number of bytes in the underlying data"""

1288 return self._nbytes(False)

1289

1290 def _nbytes(self, deep: bool = False) -> int:

1291 """

1292 return the number of bytes in the underlying data

1293 deeply introspect the level data if deep=True

1294

1295 include the engine hashtable

1296

1297 *this is in internal routine*

1298

1299 """

1300 # for implementations with no useful getsizeof (PyPy)

1301 objsize = 24

1302

1303 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)

1304 label_nbytes = sum(i.nbytes for i in self.codes)

1305 names_nbytes = sum(getsizeof(i, objsize) for i in self.names)

1306 result = level_nbytes + label_nbytes + names_nbytes

1307

1308 # include our engine hashtable

1309 result += self._engine.sizeof(deep=deep)

1310 return result

1311

1312 # --------------------------------------------------------------------

1313 # Rendering Methods

1314

1315 def _formatter_func(self, tup):

1316 """

1317 Formats each item in tup according to its level's formatter function.

1318 """

1319 formatter_funcs = [level._formatter_func for level in self.levels]

1320 return tuple(func(val) for func, val in zip(formatter_funcs, tup))

1321

1322 def _format_native_types(

1323 self, *, na_rep="nan", **kwargs

1324 ) -> npt.NDArray[np.object_]:

1325 new_levels = []

1326 new_codes = []

1327

1328 # go through the levels and format them

1329 for level, level_codes in zip(self.levels, self.codes):

1330 level_strs = level._format_native_types(na_rep=na_rep, **kwargs)

1331 # add nan values, if there are any

1332 mask = level_codes == -1

1333 if mask.any():

1334 nan_index = len(level_strs)

1335 # numpy 1.21 deprecated implicit string casting

1336 level_strs = level_strs.astype(str)

1337 level_strs = np.append(level_strs, na_rep)

1338 assert not level_codes.flags.writeable # i.e. copy is needed

1339 level_codes = level_codes.copy() # make writeable

1340 level_codes[mask] = nan_index

1341 new_levels.append(level_strs)

1342 new_codes.append(level_codes)

1343

1344 if len(new_levels) == 1:

1345 # a single-level multi-index

1346 return Index(new_levels[0].take(new_codes[0]))._format_native_types()

1347 else:

1348 # reconstruct the multi-index

1349 mi = MultiIndex(

1350 levels=new_levels,

1351 codes=new_codes,

1352 names=self.names,

1353 sortorder=self.sortorder,

1354 verify_integrity=False,

1355 )

1356 return mi._values

1357

1358 def format(

1359 self,

1360 name: bool | None = None,

1361 formatter: Callable | None = None,

1362 na_rep: str | None = None,

1363 names: bool = False,

1364 space: int = 2,

1365 sparsify=None,

1366 adjoin: bool = True,

1367 ) -> list:

1368 if name is not None:

1369 names = name

1370

1371 if len(self) == 0:

1372 return []

1373

1374 stringified_levels = []

1375 for lev, level_codes in zip(self.levels, self.codes):

1376 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype)

1377

1378 if len(lev) > 0:

1379

1380 formatted = lev.take(level_codes).format(formatter=formatter)

1381

1382 # we have some NA

1383 mask = level_codes == -1

1384 if mask.any():

1385 formatted = np.array(formatted, dtype=object)

1386 formatted[mask] = na

1387 formatted = formatted.tolist()

1388

1389 else:

1390 # weird all NA case

1391 formatted = [

1392 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))

1393 for x in algos.take_nd(lev._values, level_codes)

1394 ]

1395 stringified_levels.append(formatted)

1396

1397 result_levels = []

1398 for lev, lev_name in zip(stringified_levels, self.names):

1399 level = []

1400

1401 if names:

1402 level.append(

1403 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))

1404 if lev_name is not None

1405 else ""

1406 )

1407

1408 level.extend(np.array(lev, dtype=object))

1409 result_levels.append(level)

1410

1411 if sparsify is None:

1412 sparsify = get_option("display.multi_sparse")

1413

1414 if sparsify:

1415 sentinel: Literal[""] | bool | lib.NoDefault = ""

1416 # GH3547 use value of sparsify as sentinel if it's "Falsey"

1417 assert isinstance(sparsify, bool) or sparsify is lib.no_default

1418 if sparsify in [False, lib.no_default]:

1419 sentinel = sparsify

1420 # little bit of a kludge job for #1217

1421 result_levels = sparsify_labels(

1422 result_levels, start=int(names), sentinel=sentinel

1423 )

1424

1425 if adjoin:

1426 from pandas.io.formats.format import get_adjustment

1427

1428 adj = get_adjustment()

1429 return adj.adjoin(space, *result_levels).split("\n")

1430 else:

1431 return result_levels

1432

1433 # --------------------------------------------------------------------

1434 # Names Methods

1435

1436 def _get_names(self) -> FrozenList:

1437 return FrozenList(self._names)

1438

1439 def _set_names(self, names, *, level=None, validate: bool = True):

1440 """

1441 Set new names on index. Each name has to be a hashable type.

1442

1443 Parameters

1444 ----------

1445 values : str or sequence

1446 name(s) to set

1447 level : int, level name, or sequence of int/level names (default None)

1448 If the index is a MultiIndex (hierarchical), level(s) to set (None

1449 for all levels). Otherwise level must be None

1450 validate : bool, default True

1451 validate that the names match level lengths

1452

1453 Raises

1454 ------

1455 TypeError if each name is not hashable.

1456

1457 Notes

1458 -----

1459 sets names on levels. WARNING: mutates!

1460

1461 Note that you generally want to set this *after* changing levels, so

1462 that it only acts on copies

1463 """

1464 # GH 15110

1465 # Don't allow a single string for names in a MultiIndex

1466 if names is not None and not is_list_like(names):

1467 raise ValueError("Names should be list-like for a MultiIndex")

1468 names = list(names)

1469

1470 if validate:

1471 if level is not None and len(names) != len(level):

1472 raise ValueError("Length of names must match length of level.")

1473 if level is None and len(names) != self.nlevels:

1474 raise ValueError(

1475 "Length of names must match number of levels in MultiIndex."

1476 )

1477

1478 if level is None:

1479 level = range(self.nlevels)

1480 else:

1481 level = [self._get_level_number(lev) for lev in level]

1482

1483 # set the name

1484 for lev, name in zip(level, names):

1485 if name is not None:

1486 # GH 20527

1487 # All items in 'names' need to be hashable:

1488 if not is_hashable(name):

1489 raise TypeError(

1490 f"{type(self).__name__}.name must be a hashable type"

1491 )

1492 self._names[lev] = name

1493

1494 # If .levels has been accessed, the names in our cache will be stale.

1495 self._reset_cache()

1496

1497 names = property(

1498 fset=_set_names,

1499 fget=_get_names,

1500 doc="""

1501 Names of levels in MultiIndex.

1502

1503 Examples

1504 --------

1505 >>> mi = pd.MultiIndex.from_arrays(

1506 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])

1507 >>> mi

1508 MultiIndex([(1, 3, 5),

1509 (2, 4, 6)],

1510 names=['x', 'y', 'z'])

1511 >>> mi.names

1512 FrozenList(['x', 'y', 'z'])

1513 """,

1514 )

1515

1516 # --------------------------------------------------------------------

1517

1518 @doc(Index._get_grouper_for_level)

1519 def _get_grouper_for_level(

1520 self,

1521 mapper,

1522 *,

1523 level=None,

1524 dropna: bool = True,

1525 ) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]:

1526 if mapper is not None:

1527 indexer = self.codes[level]

1528 # Handle group mapping function and return

1529 level_values = self.levels[level].take(indexer)

1530 grouper = level_values.map(mapper)

1531 return grouper, None, None

1532

1533 values = self.get_level_values(level)

1534 codes, uniques = algos.factorize(values, sort=True, use_na_sentinel=dropna)

1535 assert isinstance(uniques, Index)

1536

1537 if self.levels[level]._can_hold_na:

1538 grouper = uniques.take(codes, fill_value=True)

1539 else:

1540 grouper = uniques.take(codes)

1541

1542 return grouper, codes, uniques

1543

1544 @cache_readonly

1545 def inferred_type(self) -> str:

1546 return "mixed"

1547

1548 def _get_level_number(self, level) -> int:

1549 count = self.names.count(level)

1550 if (count > 1) and not is_integer(level):

1551 raise ValueError(

1552 f"The name {level} occurs multiple times, use a level number"

1553 )

1554 try:

1555 level = self.names.index(level)

1556 except ValueError as err:

1557 if not is_integer(level):

1558 raise KeyError(f"Level {level} not found") from err

1559 elif level < 0:

1560 level += self.nlevels

1561 if level < 0:

1562 orig_level = level - self.nlevels

1563 raise IndexError(

1564 f"Too many levels: Index has only {self.nlevels} levels, "

1565 f"{orig_level} is not a valid level number"

1566 ) from err

1567 # Note: levels are zero-based

1568 elif level >= self.nlevels:

1569 raise IndexError(

1570 f"Too many levels: Index has only {self.nlevels} levels, "

1571 f"not {level + 1}"

1572 ) from err

1573 return level

1574

1575 @cache_readonly

1576 def is_monotonic_increasing(self) -> bool:

1577 """

1578 Return a boolean if the values are equal or increasing.

1579 """

1580 if any(-1 in code for code in self.codes):

1581 return False

1582

1583 if all(level.is_monotonic_increasing for level in self.levels):

1584 # If each level is sorted, we can operate on the codes directly. GH27495

1585 return libalgos.is_lexsorted(

1586 [x.astype("int64", copy=False) for x in self.codes]

1587 )

1588

1589 # reversed() because lexsort() wants the most significant key last.

1590 values = [

1591 self._get_level_values(i)._values for i in reversed(range(len(self.levels)))

1592 ]

1593 try:

1594 # error: Argument 1 to "lexsort" has incompatible type

1595 # "List[Union[ExtensionArray, ndarray[Any, Any]]]";

1596 # expected "Union[_SupportsArray[dtype[Any]],

1597 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,

1598 # int, float, complex, str, bytes, _NestedSequence[Union

1599 # [bool, int, float, complex, str, bytes]]]"

1600 sort_order = np.lexsort(values) # type: ignore[arg-type]

1601 return Index(sort_order).is_monotonic_increasing

1602 except TypeError:

1603

1604 # we have mixed types and np.lexsort is not happy

1605 return Index(self._values).is_monotonic_increasing

1606

1607 @cache_readonly

1608 def is_monotonic_decreasing(self) -> bool:

1609 """

1610 Return a boolean if the values are equal or decreasing.

1611 """

1612 # monotonic decreasing if and only if reverse is monotonic increasing

1613 return self[::-1].is_monotonic_increasing

1614

1615 @cache_readonly

1616 def _inferred_type_levels(self) -> list[str]:

1617 """return a list of the inferred types, one for each level"""

1618 return [i.inferred_type for i in self.levels]

1619

1620 @doc(Index.duplicated)

1621 def duplicated(self, keep="first") -> npt.NDArray[np.bool_]:

1622 shape = tuple(len(lev) for lev in self.levels)

1623 ids = get_group_index(self.codes, shape, sort=False, xnull=False)

1624

1625 return duplicated(ids, keep)

1626

1627 # error: Cannot override final attribute "_duplicated"

1628 # (previously declared in base class "IndexOpsMixin")

1629 _duplicated = duplicated # type: ignore[misc]

1630

1631 def fillna(self, value=None, downcast=None):

1632 """

1633 fillna is not implemented for MultiIndex

1634 """

1635 raise NotImplementedError("isna is not defined for MultiIndex")

1636

1637 @doc(Index.dropna)

1638 def dropna(self, how: str = "any") -> MultiIndex:

1639 nans = [level_codes == -1 for level_codes in self.codes]

1640 if how == "any":

1641 indexer = np.any(nans, axis=0)

1642 elif how == "all":

1643 indexer = np.all(nans, axis=0)

1644 else:

1645 raise ValueError(f"invalid how option: {how}")

1646

1647 new_codes = [level_codes[~indexer] for level_codes in self.codes]

1648 return self.set_codes(codes=new_codes)

1649

1650 def _get_level_values(self, level: int, unique: bool = False) -> Index:

1651 """

1652 Return vector of label values for requested level,

1653 equal to the length of the index

1654

1655 **this is an internal method**

1656

1657 Parameters

1658 ----------

1659 level : int

1660 unique : bool, default False

1661 if True, drop duplicated values

1662

1663 Returns

1664 -------

1665 Index

1666 """

1667 lev = self.levels[level]

1668 level_codes = self.codes[level]

1669 name = self._names[level]

1670 if unique:

1671 level_codes = algos.unique(level_codes)

1672 filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value)

1673 return lev._shallow_copy(filled, name=name)

1674

1675 def get_level_values(self, level):

1676 """

1677 Return vector of label values for requested level.

1678

1679 Length of returned vector is equal to the length of the index.

1680

1681 Parameters

1682 ----------

1683 level : int or str

1684 ``level`` is either the integer position of the level in the

1685 MultiIndex, or the name of the level.

1686

1687 Returns

1688 -------

1689 values : Index

1690 Values is a level of this MultiIndex converted to

1691 a single :class:`Index` (or subclass thereof).

1692

1693 Notes

1694 -----

1695 If the level contains missing values, the result may be casted to

1696 ``float`` with missing values specified as ``NaN``. This is because

1697 the level is converted to a regular ``Index``.

1698

1699 Examples

1700 --------

1701 Create a MultiIndex:

1702

1703 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))

1704 >>> mi.names = ['level_1', 'level_2']

1705

1706 Get level values by supplying level as either integer or name:

1707

1708 >>> mi.get_level_values(0)

1709 Index(['a', 'b', 'c'], dtype='object', name='level_1')

1710 >>> mi.get_level_values('level_2')

1711 Index(['d', 'e', 'f'], dtype='object', name='level_2')

1712

1713 If a level contains missing values, the return type of the level

1714 maybe casted to ``float``.

1715

1716 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes

1717 level_0 int64

1718 level_1 int64

1719 dtype: object

1720 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0)

1721 Float64Index([1.0, nan, 2.0], dtype='float64')

1722 """

1723 level = self._get_level_number(level)

1724 values = self._get_level_values(level)

1725 return values

1726

1727 @doc(Index.unique)

1728 def unique(self, level=None):

1729

1730 if level is None:

1731 return super().unique()

1732 else:

1733 level = self._get_level_number(level)

1734 return self._get_level_values(level=level, unique=True)

1735

1736 def to_frame(

1737 self,

1738 index: bool = True,

1739 name=lib.no_default,

1740 allow_duplicates: bool = False,

1741 ) -> DataFrame:

1742 """

1743 Create a DataFrame with the levels of the MultiIndex as columns.

1744

1745 Column ordering is determined by the DataFrame constructor with data as

1746 a dict.

1747

1748 Parameters

1749 ----------

1750 index : bool, default True

1751 Set the index of the returned DataFrame as the original MultiIndex.

1752

1753 name : list / sequence of str, optional

1754 The passed names should substitute index level names.

1755

1756 allow_duplicates : bool, optional default False

1757 Allow duplicate column labels to be created.

1758

1759 .. versionadded:: 1.5.0

1760

1761 Returns

1762 -------

1763 DataFrame : a DataFrame containing the original MultiIndex data.

1764

1765 See Also

1766 --------

1767 DataFrame : Two-dimensional, size-mutable, potentially heterogeneous

1768 tabular data.

1769

1770 Examples

1771 --------

1772 >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']])

1773 >>> mi

1774 MultiIndex([('a', 'c'),

1775 ('b', 'd')],

1776 )

1777

1778 >>> df = mi.to_frame()

1779 >>> df

1780 0 1

1781 a c a c

1782 b d b d

1783

1784 >>> df = mi.to_frame(index=False)

1785 >>> df

1786 0 1

1787 0 a c

1788 1 b d

1789

1790 >>> df = mi.to_frame(name=['x', 'y'])

1791 >>> df

1792 x y

1793 a c a c

1794 b d b d

1795 """

1796 from pandas import DataFrame

1797

1798 if name is None:

1799 warnings.warn(

1800 "Explicitly passing `name=None` currently preserves the Index's name "

1801 "or uses a default name of 0. This behaviour is deprecated, and in "

1802 "the future `None` will be used as the name of the resulting "

1803 "DataFrame column.",

1804 FutureWarning,

1805 stacklevel=find_stack_level(),

1806 )

1807 name = lib.no_default

1808

1809 if name is not lib.no_default:

1810 if not is_list_like(name):

1811 raise TypeError("'name' must be a list / sequence of column names.")

1812

1813 if len(name) != len(self.levels):

1814 raise ValueError(

1815 "'name' should have same length as number of levels on index."

1816 )

1817 idx_names = name

1818 else:

1819 idx_names = self._get_level_names()

1820

1821 if not allow_duplicates and len(set(idx_names)) != len(idx_names):

1822 raise ValueError(

1823 "Cannot create duplicate column labels if allow_duplicates is False"

1824 )

1825

1826 # Guarantee resulting column order - PY36+ dict maintains insertion order

1827 result = DataFrame(

1828 {level: self._get_level_values(level) for level in range(len(self.levels))},

1829 copy=False,

1830 )

1831 result.columns = idx_names

1832

1833 if index:

1834 result.index = self

1835 return result

1836

1837 # error: Return type "Index" of "to_flat_index" incompatible with return type

1838 # "MultiIndex" in supertype "Index"

1839 def to_flat_index(self) -> Index: # type: ignore[override]

1840 """

1841 Convert a MultiIndex to an Index of Tuples containing the level values.

1842

1843 Returns

1844 -------

1845 pd.Index

1846 Index with the MultiIndex data represented in Tuples.

1847

1848 See Also

1849 --------

1850 MultiIndex.from_tuples : Convert flat index back to MultiIndex.

1851

1852 Notes

1853 -----

1854 This method will simply return the caller if called by anything other

1855 than a MultiIndex.

1856

1857 Examples

1858 --------

1859 >>> index = pd.MultiIndex.from_product(

1860 ... [['foo', 'bar'], ['baz', 'qux']],

1861 ... names=['a', 'b'])

1862 >>> index.to_flat_index()

1863 Index([('foo', 'baz'), ('foo', 'qux'),

1864 ('bar', 'baz'), ('bar', 'qux')],

1865 dtype='object')

1866 """

1867 return Index(self._values, tupleize_cols=False)

1868

1869 def is_lexsorted(self) -> bool:

1870 warnings.warn(

1871 "MultiIndex.is_lexsorted is deprecated as a public function, "

1872 "users should use MultiIndex.is_monotonic_increasing instead.",

1873 FutureWarning,

1874 stacklevel=find_stack_level(),

1875 )

1876 return self._is_lexsorted()

1877

1878 def _is_lexsorted(self) -> bool:

1879 """

1880 Return True if the codes are lexicographically sorted.

1881

1882 Returns

1883 -------

1884 bool

1885

1886 Examples

1887 --------

1888 In the below examples, the first level of the MultiIndex is sorted because

1889 a<b<c, so there is no need to look at the next level.

1890

1891 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'e', 'f']]).is_lexsorted()

1892 True

1893 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'f', 'e']]).is_lexsorted()

1894 True

1895

1896 In case there is a tie, the lexicographical sorting looks

1897 at the next level of the MultiIndex.

1898

1899 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']]).is_lexsorted()

1900 True

1901 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']]).is_lexsorted()

1902 False

1903 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],

1904 ... ['aa', 'bb', 'aa', 'bb']]).is_lexsorted()

1905 True

1906 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],

1907 ... ['bb', 'aa', 'aa', 'bb']]).is_lexsorted()

1908 False

1909 """

1910 return self._lexsort_depth == self.nlevels

1911

1912 @property

1913 def lexsort_depth(self) -> int:

1914 warnings.warn(

1915 "MultiIndex.lexsort_depth is deprecated as a public function, "

1916 "users should use MultiIndex.is_monotonic_increasing instead.",

1917 FutureWarning,

1918 stacklevel=find_stack_level(),

1919 )

1920 return self._lexsort_depth

1921

1922 @cache_readonly

1923 def _lexsort_depth(self) -> int:

1924 """

1925 Compute and return the lexsort_depth, the number of levels of the

1926 MultiIndex that are sorted lexically

1927

1928 Returns

1929 -------

1930 int

1931 """

1932 if self.sortorder is not None:

1933 return self.sortorder

1934 return _lexsort_depth(self.codes, self.nlevels)

1935

1936 def _sort_levels_monotonic(self) -> MultiIndex:

1937 """

1938 This is an *internal* function.

1939

1940 Create a new MultiIndex from the current to monotonically sorted

1941 items IN the levels. This does not actually make the entire MultiIndex

1942 monotonic, JUST the levels.

1943

1944 The resulting MultiIndex will have the same outward

1945 appearance, meaning the same .values and ordering. It will also

1946 be .equals() to the original.

1947

1948 Returns

1949 -------

1950 MultiIndex

1951

1952 Examples

1953 --------

1954 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],

1955 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])

1956 >>> mi

1957 MultiIndex([('a', 'bb'),

1958 ('a', 'aa'),

1959 ('b', 'bb'),

1960 ('b', 'aa')],

1961 )

1962

1963 >>> mi.sort_values()

1964 MultiIndex([('a', 'aa'),

1965 ('a', 'bb'),

1966 ('b', 'aa'),

1967 ('b', 'bb')],

1968 )

1969 """

1970 if self._is_lexsorted() and self.is_monotonic_increasing:

1971 return self

1972

1973 new_levels = []

1974 new_codes = []

1975

1976 for lev, level_codes in zip(self.levels, self.codes):

1977

1978 if not lev.is_monotonic_increasing:

1979 try:

1980 # indexer to reorder the levels

1981 indexer = lev.argsort()

1982 except TypeError:

1983 pass

1984 else:

1985 lev = lev.take(indexer)

1986

1987 # indexer to reorder the level codes

1988 indexer = ensure_platform_int(indexer)

1989 ri = lib.get_reverse_indexer(indexer, len(indexer))

1990 level_codes = algos.take_nd(ri, level_codes)

1991

1992 new_levels.append(lev)

1993 new_codes.append(level_codes)

1994

1995 return MultiIndex(

1996 new_levels,

1997 new_codes,

1998 names=self.names,

1999 sortorder=self.sortorder,

2000 verify_integrity=False,

2001 )

2002

2003 def remove_unused_levels(self) -> MultiIndex:

2004 """

2005 Create new MultiIndex from current that removes unused levels.

2006

2007 Unused level(s) means levels that are not expressed in the

2008 labels. The resulting MultiIndex will have the same outward

2009 appearance, meaning the same .values and ordering. It will

2010 also be .equals() to the original.

2011

2012 Returns

2013 -------

2014 MultiIndex

2015

2016 Examples

2017 --------

2018 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')])

2019 >>> mi

2020 MultiIndex([(0, 'a'),

2021 (0, 'b'),

2022 (1, 'a'),

2023 (1, 'b')],

2024 )

2025

2026 >>> mi[2:]

2027 MultiIndex([(1, 'a'),

2028 (1, 'b')],

2029 )

2030

2031 The 0 from the first level is not represented

2032 and can be removed

2033

2034 >>> mi2 = mi[2:].remove_unused_levels()

2035 >>> mi2.levels

2036 FrozenList([[1], ['a', 'b']])

2037 """

2038 new_levels = []

2039 new_codes = []

2040

2041 changed = False

2042 for lev, level_codes in zip(self.levels, self.codes):

2043

2044 # Since few levels are typically unused, bincount() is more

2045 # efficient than unique() - however it only accepts positive values

2046 # (and drops order):

2047 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1

2048 has_na = int(len(uniques) and (uniques[0] == -1))

2049

2050 if len(uniques) != len(lev) + has_na:

2051

2052 if lev.isna().any() and len(uniques) == len(lev):

2053 break

2054 # We have unused levels

2055 changed = True

2056

2057 # Recalculate uniques, now preserving order.

2058 # Can easily be cythonized by exploiting the already existing

2059 # "uniques" and stop parsing "level_codes" when all items

2060 # are found:

2061 uniques = algos.unique(level_codes)

2062 if has_na:

2063 na_idx = np.where(uniques == -1)[0]

2064 # Just ensure that -1 is in first position:

2065 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]

2066

2067 # codes get mapped from uniques to 0:len(uniques)

2068 # -1 (if present) is mapped to last position

2069 code_mapping = np.zeros(len(lev) + has_na)

2070 # ... and reassigned value -1:

2071 code_mapping[uniques] = np.arange(len(uniques)) - has_na

2072

2073 level_codes = code_mapping[level_codes]

2074

2075 # new levels are simple

2076 lev = lev.take(uniques[has_na:])

2077

2078 new_levels.append(lev)

2079 new_codes.append(level_codes)

2080

2081 result = self.view()

2082

2083 if changed:

2084 result._reset_identity()

2085 result._set_levels(new_levels, validate=False)

2086 result._set_codes(new_codes, validate=False)

2087

2088 return result

2089

2090 # --------------------------------------------------------------------

2091 # Pickling Methods

2092

2093 def __reduce__(self):

2094 """Necessary for making this object picklable"""

2095 d = {

2096 "levels": list(self.levels),

2097 "codes": list(self.codes),

2098 "sortorder": self.sortorder,

2099 "names": list(self.names),

2100 }

2101 return ibase._new_Index, (type(self), d), None

2102

2103 # --------------------------------------------------------------------

2104

2105 def __getitem__(self, key):

2106 if is_scalar(key):

2107 key = com.cast_scalar_indexer(key, warn_float=True)

2108

2109 retval = []

2110 for lev, level_codes in zip(self.levels, self.codes):

2111 if level_codes[key] == -1:

2112 retval.append(np.nan)

2113 else:

2114 retval.append(lev[level_codes[key]])

2115

2116 return tuple(retval)

2117 else:

2118 # in general cannot be sure whether the result will be sorted

2119 sortorder = None

2120 if com.is_bool_indexer(key):

2121 key = np.asarray(key, dtype=bool)

2122 sortorder = self.sortorder

2123 elif isinstance(key, slice):

2124 if key.step is None or key.step > 0:

2125 sortorder = self.sortorder

2126 elif isinstance(key, Index):

2127 key = np.asarray(key)

2128

2129 new_codes = [level_codes[key] for level_codes in self.codes]

2130

2131 return MultiIndex(

2132 levels=self.levels,

2133 codes=new_codes,

2134 names=self.names,

2135 sortorder=sortorder,

2136 verify_integrity=False,

2137 )

2138

2139 def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex:

2140 """

2141 Fastpath for __getitem__ when we know we have a slice.

2142 """

2143 sortorder = None

2144 if slobj.step is None or slobj.step > 0:

2145 sortorder = self.sortorder

2146

2147 new_codes = [level_codes[slobj] for level_codes in self.codes]

2148

2149 return type(self)(

2150 levels=self.levels,

2151 codes=new_codes,

2152 names=self._names,

2153 sortorder=sortorder,

2154 verify_integrity=False,

2155 )

2156

2157 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)

2158 def take(

2159 self: MultiIndex,

2160 indices,

2161 axis: int = 0,

2162 allow_fill: bool = True,

2163 fill_value=None,

2164 **kwargs,

2165 ) -> MultiIndex:

2166 nv.validate_take((), kwargs)

2167 indices = ensure_platform_int(indices)

2168

2169 # only fill if we are passing a non-None fill_value

2170 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)

2171

2172 na_value = -1

2173

2174 taken = [lab.take(indices) for lab in self.codes]

2175 if allow_fill:

2176 mask = indices == -1

2177 if mask.any():

2178 masked = []

2179 for new_label in taken:

2180 label_values = new_label

2181 label_values[mask] = na_value

2182 masked.append(np.asarray(label_values))

2183 taken = masked

2184

2185 return MultiIndex(

2186 levels=self.levels, codes=taken, names=self.names, verify_integrity=False

2187 )

2188

2189 def append(self, other):

2190 """

2191 Append a collection of Index options together

2192

2193 Parameters

2194 ----------

2195 other : Index or list/tuple of indices

2196

2197 Returns

2198 -------

2199 appended : Index

2200 """

2201 if not isinstance(other, (list, tuple)):

2202 other = [other]

2203

2204 if all(

2205 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other

2206 ):

2207 arrays = []

2208 for i in range(self.nlevels):

2209 label = self._get_level_values(i)

2210 appended = [o._get_level_values(i) for o in other]

2211 arrays.append(label.append(appended))

2212 return MultiIndex.from_arrays(arrays, names=self.names)

2213

2214 to_concat = (self._values,) + tuple(k._values for k in other)

2215 new_tuples = np.concatenate(to_concat)

2216

2217 # if all(isinstance(x, MultiIndex) for x in other):

2218 try:

2219 return MultiIndex.from_tuples(new_tuples, names=self.names)

2220 except (TypeError, IndexError):

2221 return Index._with_infer(new_tuples)

2222

2223 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:

2224 return self._values.argsort(*args, **kwargs)

2225

2226 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)

2227 def repeat(self, repeats: int, axis=None) -> MultiIndex:

2228 nv.validate_repeat((), {"axis": axis})

2229 # error: Incompatible types in assignment (expression has type "ndarray",

2230 # variable has type "int")

2231 repeats = ensure_platform_int(repeats) # type: ignore[assignment]

2232 return MultiIndex(

2233 levels=self.levels,

2234 codes=[

2235 level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats)

2236 for level_codes in self.codes

2237 ],

2238 names=self.names,

2239 sortorder=self.sortorder,

2240 verify_integrity=False,

2241 )

2242

2243 def drop(self, codes, level=None, errors="raise"):

2244 """

2245 Make new MultiIndex with passed list of codes deleted

2246

2247 Parameters

2248 ----------

2249 codes : array-like

2250 Must be a list of tuples when level is not specified

2251 level : int or level name, default None

2252 errors : str, default 'raise'

2253

2254 Returns

2255 -------

2256 dropped : MultiIndex

2257 """

2258 if level is not None:

2259 return self._drop_from_level(codes, level, errors)

2260

2261 if not isinstance(codes, (np.ndarray, Index)):

2262 try:

2263 codes = com.index_labels_to_array(codes, dtype=np.dtype("object"))

2264 except ValueError:

2265 pass

2266

2267 inds = []

2268 for level_codes in codes:

2269 try:

2270 loc = self.get_loc(level_codes)

2271 # get_loc returns either an integer, a slice, or a boolean

2272 # mask

2273 if isinstance(loc, int):

2274 inds.append(loc)

2275 elif isinstance(loc, slice):

2276 step = loc.step if loc.step is not None else 1

2277 inds.extend(range(loc.start, loc.stop, step))

2278 elif com.is_bool_indexer(loc):

2279 if self._lexsort_depth == 0:

2280 warnings.warn(

2281 "dropping on a non-lexsorted multi-index "

2282 "without a level parameter may impact performance.",

2283 PerformanceWarning,

2284 stacklevel=find_stack_level(),

2285 )

2286 loc = loc.nonzero()[0]

2287 inds.extend(loc)

2288 else:

2289 msg = f"unsupported indexer of type {type(loc)}"

2290 raise AssertionError(msg)

2291 except KeyError:

2292 if errors != "ignore":

2293 raise

2294

2295 return self.delete(inds)

2296

2297 def _drop_from_level(self, codes, level, errors="raise") -> MultiIndex:

2298 codes = com.index_labels_to_array(codes)

2299 i = self._get_level_number(level)

2300 index = self.levels[i]

2301 values = index.get_indexer(codes)

2302 # If nan should be dropped it will equal -1 here. We have to check which values

2303 # are not nan and equal -1, this means they are missing in the index

2304 nan_codes = isna(codes)

2305 values[(np.equal(nan_codes, False)) & (values == -1)] = -2

2306 if index.shape[0] == self.shape[0]:

2307 values[np.equal(nan_codes, True)] = -2

2308

2309 not_found = codes[values == -2]

2310 if len(not_found) != 0 and errors != "ignore":

2311 raise KeyError(f"labels {not_found} not found in level")

2312 mask = ~algos.isin(self.codes[i], values)

2313

2314 return self[mask]

2315

2316 def swaplevel(self, i=-2, j=-1) -> MultiIndex:

2317 """

2318 Swap level i with level j.

2319

2320 Calling this method does not change the ordering of the values.

2321

2322 Parameters

2323 ----------

2324 i : int, str, default -2

2325 First level of index to be swapped. Can pass level name as string.

2326 Type of parameters can be mixed.

2327 j : int, str, default -1

2328 Second level of index to be swapped. Can pass level name as string.

2329 Type of parameters can be mixed.

2330

2331 Returns

2332 -------

2333 MultiIndex

2334 A new MultiIndex.

2335

2336 See Also

2337 --------

2338 Series.swaplevel : Swap levels i and j in a MultiIndex.

2339 DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a

2340 particular axis.

2341

2342 Examples

2343 --------

2344 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],

2345 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])

2346 >>> mi

2347 MultiIndex([('a', 'bb'),

2348 ('a', 'aa'),

2349 ('b', 'bb'),

2350 ('b', 'aa')],

2351 )

2352 >>> mi.swaplevel(0, 1)

2353 MultiIndex([('bb', 'a'),

2354 ('aa', 'a'),

2355 ('bb', 'b'),

2356 ('aa', 'b')],

2357 )

2358 """

2359 new_levels = list(self.levels)

2360 new_codes = list(self.codes)

2361 new_names = list(self.names)

2362

2363 i = self._get_level_number(i)

2364 j = self._get_level_number(j)

2365

2366 new_levels[i], new_levels[j] = new_levels[j], new_levels[i]

2367 new_codes[i], new_codes[j] = new_codes[j], new_codes[i]

2368 new_names[i], new_names[j] = new_names[j], new_names[i]

2369

2370 return MultiIndex(

2371 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False

2372 )

2373

2374 def reorder_levels(self, order) -> MultiIndex:

2375 """

2376 Rearrange levels using input order. May not drop or duplicate levels.

2377

2378 Parameters

2379 ----------

2380 order : list of int or list of str

2381 List representing new level order. Reference level by number

2382 (position) or by key (label).

2383

2384 Returns

2385 -------

2386 MultiIndex

2387

2388 Examples

2389 --------

2390 >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y'])

2391 >>> mi

2392 MultiIndex([(1, 3),

2393 (2, 4)],

2394 names=['x', 'y'])

2395

2396 >>> mi.reorder_levels(order=[1, 0])

2397 MultiIndex([(3, 1),

2398 (4, 2)],

2399 names=['y', 'x'])

2400

2401 >>> mi.reorder_levels(order=['y', 'x'])

2402 MultiIndex([(3, 1),

2403 (4, 2)],

2404 names=['y', 'x'])

2405 """

2406 order = [self._get_level_number(i) for i in order]

2407 if len(order) != self.nlevels:

2408 raise AssertionError(

2409 f"Length of order must be same as number of levels ({self.nlevels}), "

2410 f"got {len(order)}"

2411 )

2412 new_levels = [self.levels[i] for i in order]

2413 new_codes = [self.codes[i] for i in order]

2414 new_names = [self.names[i] for i in order]

2415

2416 return MultiIndex(

2417 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False

2418 )

2419

2420 def _get_codes_for_sorting(self) -> list[Categorical]:

2421 """

2422 we are categorizing our codes by using the

2423 available categories (all, not just observed)

2424 excluding any missing ones (-1); this is in preparation

2425 for sorting, where we need to disambiguate that -1 is not

2426 a valid valid

2427 """

2428

2429 def cats(level_codes):

2430 return np.arange(

2431 np.array(level_codes).max() + 1 if len(level_codes) else 0,

2432 dtype=level_codes.dtype,

2433 )

2434

2435 return [

2436 Categorical.from_codes(level_codes, cats(level_codes), ordered=True)

2437 for level_codes in self.codes

2438 ]

2439

2440 def sortlevel(

2441 self, level=0, ascending: bool = True, sort_remaining: bool = True

2442 ) -> tuple[MultiIndex, npt.NDArray[np.intp]]:

2443 """

2444 Sort MultiIndex at the requested level.

2445

2446 The result will respect the original ordering of the associated

2447 factor at that level.

2448

2449 Parameters

2450 ----------

2451 level : list-like, int or str, default 0

2452 If a string is given, must be a name of the level.

2453 If list-like must be names or ints of levels.

2454 ascending : bool, default True

2455 False to sort in descending order.

2456 Can also be a list to specify a directed ordering.

2457 sort_remaining : sort by the remaining levels after level

2458

2459 Returns

2460 -------

2461 sorted_index : pd.MultiIndex

2462 Resulting index.

2463 indexer : np.ndarray[np.intp]

2464 Indices of output values in original index.

2465

2466 Examples

2467 --------

2468 >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]])

2469 >>> mi

2470 MultiIndex([(0, 2),

2471 (0, 1)],

2472 )

2473

2474 >>> mi.sortlevel()

2475 (MultiIndex([(0, 1),

2476 (0, 2)],

2477 ), array([1, 0]))

2478

2479 >>> mi.sortlevel(sort_remaining=False)

2480 (MultiIndex([(0, 2),

2481 (0, 1)],

2482 ), array([0, 1]))

2483

2484 >>> mi.sortlevel(1)

2485 (MultiIndex([(0, 1),

2486 (0, 2)],

2487 ), array([1, 0]))

2488

2489 >>> mi.sortlevel(1, ascending=False)

2490 (MultiIndex([(0, 2),

2491 (0, 1)],

2492 ), array([0, 1]))

2493 """

2494 if isinstance(level, (str, int)):

2495 level = [level]

2496 level = [self._get_level_number(lev) for lev in level]

2497 sortorder = None

2498

2499 # we have a directed ordering via ascending

2500 if isinstance(ascending, list):

2501 if not len(level) == len(ascending):

2502 raise ValueError("level must have same length as ascending")

2503

2504 indexer = lexsort_indexer(

2505 [self.codes[lev] for lev in level], orders=ascending

2506 )

2507

2508 # level ordering

2509 else:

2510

2511 codes = list(self.codes)

2512 shape = list(self.levshape)

2513

2514 # partition codes and shape

2515 primary = tuple(codes[lev] for lev in level)

2516 primshp = tuple(shape[lev] for lev in level)

2517

2518 # Reverse sorted to retain the order of

2519 # smaller indices that needs to be removed

2520 for lev in sorted(level, reverse=True):

2521 codes.pop(lev)

2522 shape.pop(lev)

2523

2524 if sort_remaining:

2525 primary += primary + tuple(codes)

2526 primshp += primshp + tuple(shape)

2527 else:

2528 sortorder = level[0]

2529

2530 indexer = indexer_from_factorized(primary, primshp, compress=False)

2531

2532 if not ascending:

2533 indexer = indexer[::-1]

2534

2535 indexer = ensure_platform_int(indexer)

2536 new_codes = [level_codes.take(indexer) for level_codes in self.codes]

2537

2538 new_index = MultiIndex(

2539 codes=new_codes,

2540 levels=self.levels,

2541 names=self.names,

2542 sortorder=sortorder,

2543 verify_integrity=False,

2544 )

2545

2546 return new_index, indexer

2547

2548 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):

2549 if not isinstance(target, MultiIndex):

2550 if indexer is None:

2551 target = self

2552 elif (indexer >= 0).all():

2553 target = self.take(indexer)

2554 else:

2555 try:

2556 target = MultiIndex.from_tuples(target)

2557 except TypeError:

2558 # not all tuples, see test_constructor_dict_multiindex_reindex_flat

2559 return target

2560

2561 target = self._maybe_preserve_names(target, preserve_names)

2562 return target

2563

2564 def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index:

2565 if (

2566 preserve_names

2567 and target.nlevels == self.nlevels

2568 and target.names != self.names

2569 ):

2570 target = target.copy(deep=False)

2571 target.names = self.names

2572 return target

2573

2574 # --------------------------------------------------------------------

2575 # Indexing Methods

2576

2577 def _check_indexing_error(self, key) -> None:

2578 if not is_hashable(key) or is_iterator(key):

2579 # We allow tuples if they are hashable, whereas other Index

2580 # subclasses require scalar.

2581 # We have to explicitly exclude generators, as these are hashable.

2582 raise InvalidIndexError(key)

2583

2584 @cache_readonly

2585 def _should_fallback_to_positional(self) -> bool:

2586 """

2587 Should integer key(s) be treated as positional?

2588 """

2589 # GH#33355

2590 return self.levels[0]._should_fallback_to_positional

2591

2592 def _get_values_for_loc(self, series: Series, loc, key):

2593 """

2594 Do a positional lookup on the given Series, returning either a scalar

2595 or a Series.

2596

2597 Assumes that `series.index is self`

2598 """

2599 new_values = series._values[loc]

2600 if is_scalar(loc):

2601 return new_values

2602

2603 if len(new_values) == 1 and not self.nlevels > 1:

2604 # If more than one level left, we can not return a scalar

2605 return new_values[0]

2606

2607 new_index = self[loc]

2608 new_index = maybe_droplevels(new_index, key)

2609 new_ser = series._constructor(new_values, index=new_index, name=series.name)

2610 return new_ser.__finalize__(series)

2611

2612 def _get_indexer_strict(

2613 self, key, axis_name: str

2614 ) -> tuple[Index, npt.NDArray[np.intp]]:

2615

2616 keyarr = key

2617 if not isinstance(keyarr, Index):

2618 keyarr = com.asarray_tuplesafe(keyarr)

2619

2620 if len(keyarr) and not isinstance(keyarr[0], tuple):

2621 indexer = self._get_indexer_level_0(keyarr)

2622

2623 self._raise_if_missing(key, indexer, axis_name)

2624 return self[indexer], indexer

2625

2626 return super()._get_indexer_strict(key, axis_name)

2627

2628 def _raise_if_missing(self, key, indexer, axis_name: str) -> None:

2629 keyarr = key

2630 if not isinstance(key, Index):

2631 keyarr = com.asarray_tuplesafe(key)

2632

2633 if len(keyarr) and not isinstance(keyarr[0], tuple):

2634 # i.e. same condition for special case in MultiIndex._get_indexer_strict

2635

2636 mask = indexer == -1

2637 if mask.any():

2638 check = self.levels[0].get_indexer(keyarr)

2639 cmask = check == -1

2640 if cmask.any():

2641 raise KeyError(f"{keyarr[cmask]} not in index")

2642 # We get here when levels still contain values which are not

2643 # actually in Index anymore

2644 raise KeyError(f"{keyarr} not in index")

2645 else:

2646 return super()._raise_if_missing(key, indexer, axis_name)

2647

2648 def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]:

2649 """

2650 Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`.

2651 """

2652 lev = self.levels[0]

2653 codes = self._codes[0]

2654 cat = Categorical.from_codes(codes=codes, categories=lev)

2655 ci = Index(cat)

2656 return ci.get_indexer_for(target)

2657

2658 def get_slice_bound(

2659 self,

2660 label: Hashable | Sequence[Hashable],

2661 side: Literal["left", "right"],

2662 kind=lib.no_default,

2663 ) -> int:

2664 """

2665 For an ordered MultiIndex, compute slice bound

2666 that corresponds to given label.

2667

2668 Returns leftmost (one-past-the-rightmost if `side=='right') position

2669 of given label.

2670

2671 Parameters

2672 ----------

2673 label : object or tuple of objects

2674 side : {'left', 'right'}

2675 kind : {'loc', 'getitem', None}

2676

2677 .. deprecated:: 1.4.0

2678

2679 Returns

2680 -------

2681 int

2682 Index of label.

2683

2684 Notes

2685 -----

2686 This method only works if level 0 index of the MultiIndex is lexsorted.

2687

2688 Examples

2689 --------

2690 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')])

2691

2692 Get the locations from the leftmost 'b' in the first level

2693 until the end of the multiindex:

2694

2695 >>> mi.get_slice_bound('b', side="left")

2696 1

2697

2698 Like above, but if you get the locations from the rightmost

2699 'b' in the first level and 'f' in the second level:

2700

2701 >>> mi.get_slice_bound(('b','f'), side="right")

2702 3

2703

2704 See Also

2705 --------

2706 MultiIndex.get_loc : Get location for a label or a tuple of labels.

2707 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

2708 sequence of such.

2709 """

2710 self._deprecated_arg(kind, "kind", "get_slice_bound")

2711

2712 if not isinstance(label, tuple):

2713 label = (label,)

2714 return self._partial_tup_index(label, side=side)

2715

2716 def slice_locs(

2717 self, start=None, end=None, step=None, kind=lib.no_default

2718 ) -> tuple[int, int]:

2719 """

2720 For an ordered MultiIndex, compute the slice locations for input

2721 labels.

2722

2723 The input labels can be tuples representing partial levels, e.g. for a

2724 MultiIndex with 3 levels, you can pass a single value (corresponding to

2725 the first level), or a 1-, 2-, or 3-tuple.

2726

2727 Parameters

2728 ----------

2729 start : label or tuple, default None

2730 If None, defaults to the beginning

2731 end : label or tuple

2732 If None, defaults to the end

2733 step : int or None

2734 Slice step

2735 kind : string, optional, defaults None

2736

2737 .. deprecated:: 1.4.0

2738

2739 Returns

2740 -------

2741 (start, end) : (int, int)

2742

2743 Notes

2744 -----

2745 This method only works if the MultiIndex is properly lexsorted. So,

2746 if only the first 2 levels of a 3-level MultiIndex are lexsorted,

2747 you can only pass two levels to ``.slice_locs``.

2748

2749 Examples

2750 --------

2751 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')],

2752 ... names=['A', 'B'])

2753

2754 Get the slice locations from the beginning of 'b' in the first level

2755 until the end of the multiindex:

2756

2757 >>> mi.slice_locs(start='b')

2758 (1, 4)

2759

2760 Like above, but stop at the end of 'b' in the first level and 'f' in

2761 the second level:

2762

2763 >>> mi.slice_locs(start='b', end=('b', 'f'))

2764 (1, 3)

2765

2766 See Also

2767 --------

2768 MultiIndex.get_loc : Get location for a label or a tuple of labels.

2769 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

2770 sequence of such.

2771 """

2772 self._deprecated_arg(kind, "kind", "slice_locs")

2773 # This function adds nothing to its parent implementation (the magic

2774 # happens in get_slice_bound method), but it adds meaningful doc.

2775 return super().slice_locs(start, end, step)

2776

2777 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"):

2778 if len(tup) > self._lexsort_depth:

2779 raise UnsortedIndexError(

2780 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth "

2781 f"({self._lexsort_depth})"

2782 )

2783

2784 n = len(tup)

2785 start, end = 0, len(self)

2786 zipped = zip(tup, self.levels, self.codes)

2787 for k, (lab, lev, level_codes) in enumerate(zipped):

2788 section = level_codes[start:end]

2789

2790 if lab not in lev and not isna(lab):

2791 # short circuit

2792 try:

2793 loc = algos.searchsorted(lev, lab, side=side)

2794 except TypeError as err:

2795 # non-comparable e.g. test_slice_locs_with_type_mismatch

2796 raise TypeError(f"Level type mismatch: {lab}") from err

2797 if not is_integer(loc):

2798 # non-comparable level, e.g. test_groupby_example

2799 raise TypeError(f"Level type mismatch: {lab}")

2800 if side == "right" and loc >= 0:

2801 loc -= 1

2802 return start + algos.searchsorted(section, loc, side=side)

2803

2804 idx = self._get_loc_single_level_index(lev, lab)

2805 if isinstance(idx, slice) and k < n - 1:

2806 # Get start and end value from slice, necessary when a non-integer

2807 # interval is given as input GH#37707

2808 start = idx.start

2809 end = idx.stop

2810 elif k < n - 1:

2811 # error: Incompatible types in assignment (expression has type

2812 # "Union[ndarray[Any, dtype[signedinteger[Any]]]

2813 end = start + algos.searchsorted( # type: ignore[assignment]

2814 section, idx, side="right"

2815 )

2816 # error: Incompatible types in assignment (expression has type

2817 # "Union[ndarray[Any, dtype[signedinteger[Any]]]

2818 start = start + algos.searchsorted( # type: ignore[assignment]

2819 section, idx, side="left"

2820 )

2821 elif isinstance(idx, slice):

2822 idx = idx.start

2823 return start + algos.searchsorted(section, idx, side=side)

2824 else:

2825 return start + algos.searchsorted(section, idx, side=side)

2826

2827 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int:

2828 """

2829 If key is NA value, location of index unify as -1.

2830

2831 Parameters

2832 ----------

2833 level_index: Index

2834 key : label

2835

2836 Returns

2837 -------

2838 loc : int

2839 If key is NA value, loc is -1

2840 Else, location of key in index.

2841

2842 See Also

2843 --------

2844 Index.get_loc : The get_loc method for (single-level) index.

2845 """

2846 if is_scalar(key) and isna(key):

2847 return -1

2848 else:

2849 return level_index.get_loc(key)

2850

2851 def get_loc(self, key, method=None):

2852 """

2853 Get location for a label or a tuple of labels.

2854

2855 The location is returned as an integer/slice or boolean

2856 mask.

2857

2858 Parameters

2859 ----------

2860 key : label or tuple of labels (one for each level)

2861 method : None

2862

2863 Returns

2864 -------

2865 loc : int, slice object or boolean mask

2866 If the key is past the lexsort depth, the return may be a

2867 boolean mask array, otherwise it is always a slice or int.

2868

2869 See Also

2870 --------

2871 Index.get_loc : The get_loc method for (single-level) index.

2872 MultiIndex.slice_locs : Get slice location given start label(s) and

2873 end label(s).

2874 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

2875 sequence of such.

2876

2877 Notes

2878 -----

2879 The key cannot be a slice, list of same-level labels, a boolean mask,

2880 or a sequence of such. If you want to use those, use

2881 :meth:`MultiIndex.get_locs` instead.

2882

2883 Examples

2884 --------

2885 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])

2886

2887 >>> mi.get_loc('b')

2888 slice(1, 3, None)

2889

2890 >>> mi.get_loc(('b', 'e'))

2891 1

2892 """

2893 if method is not None:

2894 raise NotImplementedError(

2895 "only the default get_loc method is "

2896 "currently supported for MultiIndex"

2897 )

2898

2899 self._check_indexing_error(key)

2900

2901 def _maybe_to_slice(loc):

2902 """convert integer indexer to boolean mask or slice if possible"""

2903 if not isinstance(loc, np.ndarray) or loc.dtype != np.intp:

2904 return loc

2905

2906 loc = lib.maybe_indices_to_slice(loc, len(self))

2907 if isinstance(loc, slice):

2908 return loc

2909

2910 mask = np.empty(len(self), dtype="bool")

2911 mask.fill(False)

2912 mask[loc] = True

2913 return mask

2914

2915 if not isinstance(key, tuple):

2916 loc = self._get_level_indexer(key, level=0)

2917 return _maybe_to_slice(loc)

2918

2919 keylen = len(key)

2920 if self.nlevels < keylen:

2921 raise KeyError(

2922 f"Key length ({keylen}) exceeds index depth ({self.nlevels})"

2923 )

2924

2925 if keylen == self.nlevels and self.is_unique:

2926 try:

2927 return self._engine.get_loc(key)

2928 except TypeError:

2929 # e.g. test_partial_slicing_with_multiindex partial string slicing

2930 loc, _ = self.get_loc_level(key, list(range(self.nlevels)))

2931 return loc

2932

2933 # -- partial selection or non-unique index

2934 # break the key into 2 parts based on the lexsort_depth of the index;

2935 # the first part returns a continuous slice of the index; the 2nd part

2936 # needs linear search within the slice

2937 i = self._lexsort_depth

2938 lead_key, follow_key = key[:i], key[i:]

2939

2940 if not lead_key:

2941 start = 0

2942 stop = len(self)

2943 else:

2944 try:

2945 start, stop = self.slice_locs(lead_key, lead_key)

2946 except TypeError as err:

2947 # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col")

2948 # when self has 5 integer levels

2949 raise KeyError(key) from err

2950

2951 if start == stop:

2952 raise KeyError(key)

2953

2954 if not follow_key:

2955 return slice(start, stop)

2956

2957 warnings.warn(

2958 "indexing past lexsort depth may impact performance.",

2959 PerformanceWarning,

2960 stacklevel=find_stack_level(),

2961 )

2962

2963 loc = np.arange(start, stop, dtype=np.intp)

2964

2965 for i, k in enumerate(follow_key, len(lead_key)):

2966 mask = self.codes[i][loc] == self._get_loc_single_level_index(

2967 self.levels[i], k

2968 )

2969 if not mask.all():

2970 loc = loc[mask]

2971 if not len(loc):

2972 raise KeyError(key)

2973

2974 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop)

2975

2976 def get_loc_level(self, key, level=0, drop_level: bool = True):

2977 """

2978 Get location and sliced index for requested label(s)/level(s).

2979

2980 Parameters

2981 ----------

2982 key : label or sequence of labels

2983 level : int/level name or list thereof, optional

2984 drop_level : bool, default True

2985 If ``False``, the resulting index will not drop any level.

2986

2987 Returns

2988 -------

2989 loc : A 2-tuple where the elements are:

2990 Element 0: int, slice object or boolean array

2991 Element 1: The resulting sliced multiindex/index. If the key

2992 contains all levels, this will be ``None``.

2993

2994 See Also

2995 --------

2996 MultiIndex.get_loc : Get location for a label or a tuple of labels.

2997 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

2998 sequence of such.

2999

3000 Examples

3001 --------

3002 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],

3003 ... names=['A', 'B'])

3004

3005 >>> mi.get_loc_level('b')

3006 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))

3007

3008 >>> mi.get_loc_level('e', level='B')

3009 (array([False, True, False]), Index(['b'], dtype='object', name='A'))

3010

3011 >>> mi.get_loc_level(['b', 'e'])

3012 (1, None)

3013 """

3014 if not isinstance(level, (list, tuple)):

3015 level = self._get_level_number(level)

3016 else:

3017 level = [self._get_level_number(lev) for lev in level]

3018

3019 loc, mi = self._get_loc_level(key, level=level)

3020 if not drop_level:

3021 if lib.is_integer(loc):

3022 mi = self[loc : loc + 1]

3023 else:

3024 mi = self[loc]

3025 return loc, mi

3026

3027 def _get_loc_level(self, key, level: int | list[int] = 0):

3028 """

3029 get_loc_level but with `level` known to be positional, not name-based.

3030 """

3031

3032 # different name to distinguish from maybe_droplevels

3033 def maybe_mi_droplevels(indexer, levels):

3034 """

3035 If level does not exist or all levels were dropped, the exception

3036 has to be handled outside.

3037 """

3038 new_index = self[indexer]

3039

3040 for i in sorted(levels, reverse=True):

3041 new_index = new_index._drop_level_numbers([i])

3042

3043 return new_index

3044

3045 if isinstance(level, (tuple, list)):

3046 if len(key) != len(level):

3047 raise AssertionError(

3048 "Key for location must have same length as number of levels"

3049 )

3050 result = None

3051 for lev, k in zip(level, key):

3052 loc, new_index = self._get_loc_level(k, level=lev)

3053 if isinstance(loc, slice):

3054 mask = np.zeros(len(self), dtype=bool)

3055 mask[loc] = True

3056 loc = mask

3057 result = loc if result is None else result & loc

3058

3059 try:

3060 # FIXME: we should be only dropping levels on which we are

3061 # scalar-indexing

3062 mi = maybe_mi_droplevels(result, level)

3063 except ValueError:

3064 # droplevel failed because we tried to drop all levels,

3065 # i.e. len(level) == self.nlevels

3066 mi = self[result]

3067

3068 return result, mi

3069

3070 # kludge for #1796

3071 if isinstance(key, list):

3072 key = tuple(key)

3073

3074 if isinstance(key, tuple) and level == 0:

3075

3076 try:

3077 # Check if this tuple is a single key in our first level

3078 if key in self.levels[0]:

3079 indexer = self._get_level_indexer(key, level=level)

3080 new_index = maybe_mi_droplevels(indexer, [0])

3081 return indexer, new_index

3082 except (TypeError, InvalidIndexError):

3083 pass

3084

3085 if not any(isinstance(k, slice) for k in key):

3086

3087 if len(key) == self.nlevels and self.is_unique:

3088 # Complete key in unique index -> standard get_loc

3089 try:

3090 return (self._engine.get_loc(key), None)

3091 except KeyError as err:

3092 raise KeyError(key) from err

3093 except TypeError:

3094 # e.g. partial string indexing

3095 # test_partial_string_timestamp_multiindex

3096 pass

3097

3098 # partial selection

3099 indexer = self.get_loc(key)

3100 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]

3101 if len(ilevels) == self.nlevels:

3102 if is_integer(indexer):

3103 # we are dropping all levels

3104 return indexer, None

3105

3106 # TODO: in some cases we still need to drop some levels,

3107 # e.g. test_multiindex_perf_warn

3108 # test_partial_string_timestamp_multiindex

3109 ilevels = [

3110 i

3111 for i in range(len(key))

3112 if (

3113 not isinstance(key[i], str)

3114 or not self.levels[i]._supports_partial_string_indexing

3115 )

3116 and key[i] != slice(None, None)

3117 ]

3118 if len(ilevels) == self.nlevels:

3119 # TODO: why?

3120 ilevels = []

3121 return indexer, maybe_mi_droplevels(indexer, ilevels)

3122

3123 else:

3124 indexer = None

3125 for i, k in enumerate(key):

3126 if not isinstance(k, slice):

3127 loc_level = self._get_level_indexer(k, level=i)

3128 if isinstance(loc_level, slice):

3129 if com.is_null_slice(loc_level) or com.is_full_slice(

3130 loc_level, len(self)

3131 ):

3132 # everything

3133 continue

3134 else:

3135 # e.g. test_xs_IndexSlice_argument_not_implemented

3136 k_index = np.zeros(len(self), dtype=bool)

3137 k_index[loc_level] = True

3138

3139 else:

3140 k_index = loc_level

3141

3142 elif com.is_null_slice(k):

3143 # taking everything, does not affect `indexer` below

3144 continue

3145

3146 else:

3147 # FIXME: this message can be inaccurate, e.g.

3148 # test_series_varied_multiindex_alignment

3149 raise TypeError(f"Expected label or tuple of labels, got {key}")

3150

3151 if indexer is None:

3152 indexer = k_index

3153 else:

3154 indexer &= k_index

3155 if indexer is None:

3156 indexer = slice(None, None)

3157 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]

3158 return indexer, maybe_mi_droplevels(indexer, ilevels)

3159 else:

3160 indexer = self._get_level_indexer(key, level=level)

3161 if (

3162 isinstance(key, str)

3163 and self.levels[level]._supports_partial_string_indexing

3164 ):

3165 # check to see if we did an exact lookup vs sliced

3166 check = self.levels[level].get_loc(key)

3167 if not is_integer(check):

3168 # e.g. test_partial_string_timestamp_multiindex

3169 return indexer, self[indexer]

3170

3171 try:

3172 result_index = maybe_mi_droplevels(indexer, [level])

3173 except ValueError:

3174 result_index = self[indexer]

3175

3176 return indexer, result_index

3177

3178 def _get_level_indexer(

3179 self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None

3180 ):

3181 # `level` kwarg is _always_ positional, never name

3182 # return a boolean array or slice showing where the key is

3183 # in the totality of values

3184 # if the indexer is provided, then use this

3185

3186 level_index = self.levels[level]

3187 level_codes = self.codes[level]

3188

3189 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):

3190 # Compute a bool indexer to identify the positions to take.

3191 # If we have an existing indexer, we only need to examine the

3192 # subset of positions where the existing indexer is True.

3193 if indexer is not None:

3194 # we only need to look at the subset of codes where the

3195 # existing indexer equals True

3196 codes = codes[indexer]

3197

3198 if step is None or step == 1:

3199 new_indexer = (codes >= start) & (codes < stop)

3200 else:

3201 r = np.arange(start, stop, step, dtype=codes.dtype)

3202 new_indexer = algos.isin(codes, r)

3203

3204 if indexer is None:

3205 return new_indexer

3206

3207 indexer = indexer.copy()

3208 indexer[indexer] = new_indexer

3209 return indexer

3210

3211 if isinstance(key, slice):

3212 # handle a slice, returning a slice if we can

3213 # otherwise a boolean indexer

3214 step = key.step

3215 is_negative_step = step is not None and step < 0

3216

3217 try:

3218 if key.start is not None:

3219 start = level_index.get_loc(key.start)

3220 elif is_negative_step:

3221 start = len(level_index) - 1

3222 else:

3223 start = 0

3224

3225 if key.stop is not None:

3226 stop = level_index.get_loc(key.stop)

3227 elif is_negative_step:

3228 stop = 0

3229 elif isinstance(start, slice):

3230 stop = len(level_index)

3231 else:

3232 stop = len(level_index) - 1

3233 except KeyError:

3234

3235 # we have a partial slice (like looking up a partial date

3236 # string)

3237 start = stop = level_index.slice_indexer(key.start, key.stop, key.step)

3238 step = start.step

3239

3240 if isinstance(start, slice) or isinstance(stop, slice):

3241 # we have a slice for start and/or stop

3242 # a partial date slicer on a DatetimeIndex generates a slice

3243 # note that the stop ALREADY includes the stopped point (if

3244 # it was a string sliced)

3245 start = getattr(start, "start", start)

3246 stop = getattr(stop, "stop", stop)

3247 return convert_indexer(start, stop, step)

3248

3249 elif level > 0 or self._lexsort_depth == 0 or step is not None:

3250 # need to have like semantics here to right

3251 # searching as when we are using a slice

3252 # so adjust the stop by 1 (so we include stop)

3253 stop = (stop - 1) if is_negative_step else (stop + 1)

3254 return convert_indexer(start, stop, step)

3255 else:

3256 # sorted, so can return slice object -> view

3257 i = algos.searchsorted(level_codes, start, side="left")

3258 j = algos.searchsorted(level_codes, stop, side="right")

3259 return slice(i, j, step)

3260

3261 else:

3262

3263 idx = self._get_loc_single_level_index(level_index, key)

3264

3265 if level > 0 or self._lexsort_depth == 0:

3266 # Desired level is not sorted

3267 if isinstance(idx, slice):

3268 # test_get_loc_partial_timestamp_multiindex

3269 locs = (level_codes >= idx.start) & (level_codes < idx.stop)

3270 return locs

3271

3272 locs = np.array(level_codes == idx, dtype=bool, copy=False)

3273

3274 if not locs.any():

3275 # The label is present in self.levels[level] but unused:

3276 raise KeyError(key)

3277 return locs

3278

3279 if isinstance(idx, slice):

3280 # e.g. test_partial_string_timestamp_multiindex

3281 start = algos.searchsorted(level_codes, idx.start, side="left")

3282 # NB: "left" here bc of slice semantics

3283 end = algos.searchsorted(level_codes, idx.stop, side="left")

3284 else:

3285 start = algos.searchsorted(level_codes, idx, side="left")

3286 end = algos.searchsorted(level_codes, idx, side="right")

3287

3288 if start == end:

3289 # The label is present in self.levels[level] but unused:

3290 raise KeyError(key)

3291 return slice(start, end)

3292

3293 def get_locs(self, seq):

3294 """

3295 Get location for a sequence of labels.

3296

3297 Parameters

3298 ----------

3299 seq : label, slice, list, mask or a sequence of such

3300 You should use one of the above for each level.

3301 If a level should not be used, set it to ``slice(None)``.

3302

3303 Returns

3304 -------

3305 numpy.ndarray

3306 NumPy array of integers suitable for passing to iloc.

3307

3308 See Also

3309 --------

3310 MultiIndex.get_loc : Get location for a label or a tuple of labels.

3311 MultiIndex.slice_locs : Get slice location given start label(s) and

3312 end label(s).

3313

3314 Examples

3315 --------

3316 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])

3317

3318 >>> mi.get_locs('b') # doctest: +SKIP

3319 array([1, 2], dtype=int64)

3320

3321 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP

3322 array([1, 2], dtype=int64)

3323

3324 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP

3325 array([2], dtype=int64)

3326 """

3327

3328 # must be lexsorted to at least as many levels

3329 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]

3330 if true_slices and true_slices[-1] >= self._lexsort_depth:

3331 raise UnsortedIndexError(

3332 "MultiIndex slicing requires the index to be lexsorted: slicing "

3333 f"on levels {true_slices}, lexsort depth {self._lexsort_depth}"

3334 )

3335

3336 if any(x is Ellipsis for x in seq):

3337 raise NotImplementedError(

3338 "MultiIndex does not support indexing with Ellipsis"

3339 )

3340

3341 n = len(self)

3342

3343 def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:

3344 if isinstance(indexer, slice):

3345 new_indexer = np.zeros(n, dtype=np.bool_)

3346 new_indexer[indexer] = True

3347 return new_indexer

3348 return indexer

3349

3350 # a bool indexer for the positions we want to take

3351 indexer: npt.NDArray[np.bool_] | None = None

3352

3353 for i, k in enumerate(seq):

3354

3355 lvl_indexer: npt.NDArray[np.bool_] | slice | None = None

3356

3357 if com.is_bool_indexer(k):

3358 if len(k) != n:

3359 raise ValueError(

3360 "cannot index with a boolean indexer that "

3361 "is not the same length as the index"

3362 )

3363 lvl_indexer = np.asarray(k)

3364

3365 elif is_list_like(k):

3366 # a collection of labels to include from this level (these are or'd)

3367

3368 # GH#27591 check if this is a single tuple key in the level

3369 try:

3370 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)

3371 except (InvalidIndexError, TypeError, KeyError) as err:

3372 # InvalidIndexError e.g. non-hashable, fall back to treating

3373 # this as a sequence of labels

3374 # KeyError it can be ambiguous if this is a label or sequence

3375 # of labels

3376 # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708

3377 for x in k:

3378 if not is_hashable(x):

3379 # e.g. slice

3380 raise err

3381 try:

3382 item_indexer = self._get_level_indexer(

3383 x, level=i, indexer=indexer

3384 )

3385 except KeyError:

3386 # ignore not founds; see discussion in GH#39424

3387 warnings.warn(

3388 "The behavior of indexing on a MultiIndex with a "

3389 "nested sequence of labels is deprecated and will "

3390 "change in a future version. "

3391 "`series.loc[label, sequence]` will raise if any "

3392 "members of 'sequence' or not present in "

3393 "the index's second level. To retain the old "

3394 "behavior, use `series.index.isin(sequence, level=1)`",

3395 # TODO: how to opt in to the future behavior?

3396 # TODO: how to handle IntervalIndex level?

3397 # (no test cases)

3398 FutureWarning,

3399 stacklevel=find_stack_level(),

3400 )

3401 continue

3402 else:

3403 if lvl_indexer is None:

3404 lvl_indexer = _to_bool_indexer(item_indexer)

3405 elif isinstance(item_indexer, slice):

3406 lvl_indexer[item_indexer] = True # type: ignore[index]

3407 else:

3408 lvl_indexer |= item_indexer

3409

3410 if lvl_indexer is None:

3411 # no matches we are done

3412 # test_loc_getitem_duplicates_multiindex_empty_indexer

3413 return np.array([], dtype=np.intp)

3414

3415 elif com.is_null_slice(k):

3416 # empty slice

3417 if indexer is None and i == len(seq) - 1:

3418 return np.arange(n, dtype=np.intp)

3419 continue

3420

3421 else:

3422 # a slice or a single label

3423 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)

3424

3425 # update indexer

3426 lvl_indexer = _to_bool_indexer(lvl_indexer)

3427 if indexer is None:

3428 indexer = lvl_indexer

3429 else:

3430 indexer &= lvl_indexer

3431 if not np.any(indexer) and np.any(lvl_indexer):

3432 raise KeyError(seq)

3433

3434 # empty indexer

3435 if indexer is None:

3436 return np.array([], dtype=np.intp)

3437

3438 pos_indexer = indexer.nonzero()[0]

3439 return self._reorder_indexer(seq, pos_indexer)

3440

3441 # --------------------------------------------------------------------

3442

3443 def _reorder_indexer(

3444 self,

3445 seq: tuple[Scalar | Iterable | AnyArrayLike, ...],

3446 indexer: npt.NDArray[np.intp],

3447 ) -> npt.NDArray[np.intp]:

3448 """

3449 Reorder an indexer of a MultiIndex (self) so that the labels are in the

3450 same order as given in seq

3451

3452 Parameters

3453 ----------

3454 seq : label/slice/list/mask or a sequence of such

3455 indexer: a position indexer of self

3456

3457 Returns

3458 -------

3459 indexer : a sorted position indexer of self ordered as seq

3460 """

3461 # If the index is lexsorted and the list_like label in seq are sorted

3462 # then we do not need to sort

3463 if self._is_lexsorted():

3464 need_sort = False

3465 for i, k in enumerate(seq):

3466 if is_list_like(k):

3467 if not need_sort:

3468 k_codes = self.levels[i].get_indexer(k)

3469 k_codes = k_codes[k_codes >= 0] # Filter absent keys

3470 # True if the given codes are not ordered

3471 need_sort = (k_codes[:-1] > k_codes[1:]).any()

3472 elif isinstance(k, slice) and k.step is not None and k.step < 0:

3473 need_sort = True

3474 # Bail out if both index and seq are sorted

3475 if not need_sort:

3476 return indexer

3477

3478 n = len(self)

3479 keys: tuple[np.ndarray, ...] = ()

3480 # For each level of the sequence in seq, map the level codes with the

3481 # order they appears in a list-like sequence

3482 # This mapping is then use to reorder the indexer

3483 for i, k in enumerate(seq):

3484 if is_scalar(k):

3485 # GH#34603 we want to treat a scalar the same as an all equal list

3486 k = [k]

3487 if com.is_bool_indexer(k):

3488 new_order = np.arange(n)[indexer]

3489 elif is_list_like(k):

3490 # Generate a map with all level codes as sorted initially

3491 k = algos.unique(k)

3492 key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len(

3493 self.levels[i]

3494 )

3495 # Set order as given in the indexer list

3496 level_indexer = self.levels[i].get_indexer(k)

3497 level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys

3498 key_order_map[level_indexer] = np.arange(len(level_indexer))

3499

3500 new_order = key_order_map[self.codes[i][indexer]]

3501 elif isinstance(k, slice) and k.step is not None and k.step < 0:

3502 # flip order for negative step

3503 new_order = np.arange(n)[::-1][indexer]

3504 elif isinstance(k, slice) and k.start is None and k.stop is None:

3505 # slice(None) should not determine order GH#31330

3506 new_order = np.ones((n,))[indexer]

3507 else:

3508 # For all other case, use the same order as the level

3509 new_order = np.arange(n)[indexer]

3510 keys = (new_order,) + keys

3511

3512 # Find the reordering using lexsort on the keys mapping

3513 ind = np.lexsort(keys)

3514 return indexer[ind]

3515

3516 def truncate(self, before=None, after=None) -> MultiIndex:

3517 """

3518 Slice index between two labels / tuples, return new MultiIndex

3519

3520 Parameters

3521 ----------

3522 before : label or tuple, can be partial. Default None

3523 None defaults to start

3524 after : label or tuple, can be partial. Default None

3525 None defaults to end

3526

3527 Returns

3528 -------

3529 truncated : MultiIndex

3530 """

3531 if after and before and after < before:

3532 raise ValueError("after < before")

3533

3534 i, j = self.levels[0].slice_locs(before, after)

3535 left, right = self.slice_locs(before, after)

3536

3537 new_levels = list(self.levels)

3538 new_levels[0] = new_levels[0][i:j]

3539

3540 new_codes = [level_codes[left:right] for level_codes in self.codes]

3541 new_codes[0] = new_codes[0] - i

3542

3543 return MultiIndex(

3544 levels=new_levels,

3545 codes=new_codes,

3546 names=self._names,

3547 verify_integrity=False,

3548 )

3549

3550 def equals(self, other: object) -> bool:

3551 """

3552 Determines if two MultiIndex objects have the same labeling information

3553 (the levels themselves do not necessarily have to be the same)

3554

3555 See Also

3556 --------

3557 equal_levels

3558 """

3559 if self.is_(other):

3560 return True

3561

3562 if not isinstance(other, Index):

3563 return False

3564

3565 if len(self) != len(other):

3566 return False

3567

3568 if not isinstance(other, MultiIndex):

3569 # d-level MultiIndex can equal d-tuple Index

3570 if not self._should_compare(other):

3571 # object Index or Categorical[object] may contain tuples

3572 return False

3573 return array_equivalent(self._values, other._values)

3574

3575 if self.nlevels != other.nlevels:

3576 return False

3577

3578 for i in range(self.nlevels):

3579 self_codes = self.codes[i]

3580 other_codes = other.codes[i]

3581 self_mask = self_codes == -1

3582 other_mask = other_codes == -1

3583 if not np.array_equal(self_mask, other_mask):

3584 return False

3585 self_codes = self_codes[~self_mask]

3586 self_values = self.levels[i]._values.take(self_codes)

3587

3588 other_codes = other_codes[~other_mask]

3589 other_values = other.levels[i]._values.take(other_codes)

3590

3591 # since we use NaT both datetime64 and timedelta64 we can have a

3592 # situation where a level is typed say timedelta64 in self (IOW it

3593 # has other values than NaT) but types datetime64 in other (where

3594 # its all NaT) but these are equivalent

3595 if len(self_values) == 0 and len(other_values) == 0:

3596 continue

3597

3598 if not isinstance(self_values, np.ndarray):

3599 # i.e. ExtensionArray

3600 if not self_values.equals(other_values):

3601 return False

3602 elif not isinstance(other_values, np.ndarray):

3603 # i.e. other is ExtensionArray

3604 if not other_values.equals(self_values):

3605 return False

3606 else:

3607 if not array_equivalent(self_values, other_values):

3608 return False

3609

3610 return True

3611

3612 def equal_levels(self, other: MultiIndex) -> bool:

3613 """

3614 Return True if the levels of both MultiIndex objects are the same

3615

3616 """

3617 if self.nlevels != other.nlevels:

3618 return False

3619

3620 for i in range(self.nlevels):

3621 if not self.levels[i].equals(other.levels[i]):

3622 return False

3623 return True

3624

3625 # --------------------------------------------------------------------

3626 # Set Methods

3627

3628 def _union(self, other, sort) -> MultiIndex:

3629 other, result_names = self._convert_can_do_setop(other)

3630 if (

3631 any(-1 in code for code in self.codes)

3632 and any(-1 in code for code in other.codes)

3633 or self.has_duplicates

3634 or other.has_duplicates

3635 ):

3636 # This is only necessary if both sides have nans or one has dups,

3637 # fast_unique_multiple is faster

3638 result = super()._union(other, sort)

3639 else:

3640 rvals = other._values.astype(object, copy=False)

3641 result = lib.fast_unique_multiple([self._values, rvals], sort=sort)

3642

3643 return MultiIndex.from_arrays(zip(*result), sortorder=None, names=result_names)

3644

3645 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

3646 return is_object_dtype(dtype)

3647

3648 def _get_reconciled_name_object(self, other) -> MultiIndex:

3649 """

3650 If the result of a set operation will be self,

3651 return self, unless the names change, in which

3652 case make a shallow copy of self.

3653 """

3654 names = self._maybe_match_names(other)

3655 if self.names != names:

3656 # Incompatible return value type (got "Optional[MultiIndex]", expected

3657 # "MultiIndex")

3658 return self.rename(names) # type: ignore[return-value]

3659 return self

3660

3661 def _maybe_match_names(self, other):

3662 """

3663 Try to find common names to attach to the result of an operation between

3664 a and b. Return a consensus list of names if they match at least partly

3665 or list of None if they have completely different names.

3666 """

3667 if len(self.names) != len(other.names):

3668 return [None] * len(self.names)

3669 names = []

3670 for a_name, b_name in zip(self.names, other.names):

3671 if a_name == b_name:

3672 names.append(a_name)

3673 else:

3674 # TODO: what if they both have np.nan for their names?

3675 names.append(None)

3676 return names

3677

3678 def _wrap_intersection_result(self, other, result) -> MultiIndex:

3679 _, result_names = self._convert_can_do_setop(other)

3680

3681 if len(result) == 0:

3682 return MultiIndex(

3683 levels=self.levels,

3684 codes=[[]] * self.nlevels,

3685 names=result_names,

3686 verify_integrity=False,

3687 )

3688 else:

3689 return MultiIndex.from_arrays(zip(*result), sortorder=0, names=result_names)

3690

3691 def _wrap_difference_result(self, other, result) -> MultiIndex:

3692 _, result_names = self._convert_can_do_setop(other)

3693

3694 if len(result) == 0:

3695 return MultiIndex(

3696 levels=[[]] * self.nlevels,

3697 codes=[[]] * self.nlevels,

3698 names=result_names,

3699 verify_integrity=False,

3700 )

3701 else:

3702 return MultiIndex.from_tuples(result, sortorder=0, names=result_names)

3703

3704 def _convert_can_do_setop(self, other):

3705 result_names = self.names

3706

3707 if not isinstance(other, Index):

3708

3709 if len(other) == 0:

3710 return self[:0], self.names

3711 else:

3712 msg = "other must be a MultiIndex or a list of tuples"

3713 try:

3714 other = MultiIndex.from_tuples(other, names=self.names)

3715 except (ValueError, TypeError) as err:

3716 # ValueError raised by tuples_to_object_array if we

3717 # have non-object dtype

3718 raise TypeError(msg) from err

3719 else:

3720 result_names = get_unanimous_names(self, other)

3721

3722 return other, result_names

3723

3724 # --------------------------------------------------------------------

3725

3726 @doc(Index.astype)

3727 def astype(self, dtype, copy: bool = True):

3728 dtype = pandas_dtype(dtype)

3729 if is_categorical_dtype(dtype):

3730 msg = "> 1 ndim Categorical are not supported at this time"

3731 raise NotImplementedError(msg)

3732 elif not is_object_dtype(dtype):

3733 raise TypeError(

3734 "Setting a MultiIndex dtype to anything other than object "

3735 "is not supported"

3736 )

3737 elif copy is True:

3738 return self._view()

3739 return self

3740

3741 def _validate_fill_value(self, item):

3742 if isinstance(item, MultiIndex):

3743 # GH#43212

3744 if item.nlevels != self.nlevels:

3745 raise ValueError("Item must have length equal to number of levels.")

3746 return item._values

3747 elif not isinstance(item, tuple):

3748 # Pad the key with empty strings if lower levels of the key

3749 # aren't specified:

3750 item = (item,) + ("",) * (self.nlevels - 1)

3751 elif len(item) != self.nlevels:

3752 raise ValueError("Item must have length equal to number of levels.")

3753 return item

3754

3755 def insert(self, loc: int, item) -> MultiIndex:

3756 """

3757 Make new MultiIndex inserting new item at location

3758

3759 Parameters

3760 ----------

3761 loc : int

3762 item : tuple

3763 Must be same length as number of levels in the MultiIndex

3764

3765 Returns

3766 -------

3767 new_index : Index

3768 """

3769 item = self._validate_fill_value(item)

3770

3771 new_levels = []

3772 new_codes = []

3773 for k, level, level_codes in zip(item, self.levels, self.codes):

3774 if k not in level:

3775 # have to insert into level

3776 # must insert at end otherwise you have to recompute all the

3777 # other codes

3778 lev_loc = len(level)

3779 level = level.insert(lev_loc, k)

3780 else:

3781 lev_loc = level.get_loc(k)

3782

3783 new_levels.append(level)

3784 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc))

3785

3786 return MultiIndex(

3787 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False

3788 )

3789

3790 def delete(self, loc) -> MultiIndex:

3791 """

3792 Make new index with passed location deleted

3793

3794 Returns

3795 -------

3796 new_index : MultiIndex

3797 """

3798 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes]

3799 return MultiIndex(

3800 levels=self.levels,

3801 codes=new_codes,

3802 names=self.names,

3803 verify_integrity=False,

3804 )

3805

3806 @doc(Index.isin)

3807 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:

3808 if level is None:

3809 values = MultiIndex.from_tuples(values, names=self.names)._values

3810 return algos.isin(self._values, values)

3811 else:

3812 num = self._get_level_number(level)

3813 levs = self.get_level_values(num)

3814

3815 if levs.size == 0:

3816 return np.zeros(len(levs), dtype=np.bool_)

3817 return levs.isin(values)

3818

3819 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"])

3820 def set_names(self, names, level=None, inplace: bool = False) -> MultiIndex | None:

3821 return super().set_names(names=names, level=level, inplace=inplace)

3822

3823 rename = set_names

3824

3825 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])

3826 def drop_duplicates(self, keep: str | bool = "first") -> MultiIndex:

3827 return super().drop_duplicates(keep=keep)

3828

3829 # ---------------------------------------------------------------

3830 # Arithmetic/Numeric Methods - Disabled

3831

3832 __add__ = make_invalid_op("__add__")

3833 __radd__ = make_invalid_op("__radd__")

3834 __iadd__ = make_invalid_op("__iadd__")

3835 __sub__ = make_invalid_op("__sub__")

3836 __rsub__ = make_invalid_op("__rsub__")

3837 __isub__ = make_invalid_op("__isub__")

3838 __pow__ = make_invalid_op("__pow__")

3839 __rpow__ = make_invalid_op("__rpow__")

3840 __mul__ = make_invalid_op("__mul__")

3841 __rmul__ = make_invalid_op("__rmul__")

3842 __floordiv__ = make_invalid_op("__floordiv__")

3843 __rfloordiv__ = make_invalid_op("__rfloordiv__")

3844 __truediv__ = make_invalid_op("__truediv__")

3845 __rtruediv__ = make_invalid_op("__rtruediv__")

3846 __mod__ = make_invalid_op("__mod__")

3847 __rmod__ = make_invalid_op("__rmod__")

3848 __divmod__ = make_invalid_op("__divmod__")

3849 __rdivmod__ = make_invalid_op("__rdivmod__")

3850 # Unary methods disabled

3851 __neg__ = make_invalid_op("__neg__")

3852 __pos__ = make_invalid_op("__pos__")

3853 __abs__ = make_invalid_op("__abs__")

3854 __invert__ = make_invalid_op("__invert__")

3855

3856

3857def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int:

3858 """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted."""

3859 int64_codes = [ensure_int64(level_codes) for level_codes in codes]

3860 for k in range(nlevels, 0, -1):

3861 if libalgos.is_lexsorted(int64_codes[:k]):

3862 return k

3863 return 0

3864

3865

3866def sparsify_labels(label_list, start: int = 0, sentinel=""):

3867 pivoted = list(zip(*label_list))

3868 k = len(label_list)

3869

3870 result = pivoted[: start + 1]

3871 prev = pivoted[start]

3872

3873 for cur in pivoted[start + 1 :]:

3874 sparse_cur = []

3875

3876 for i, (p, t) in enumerate(zip(prev, cur)):

3877 if i == k - 1:

3878 sparse_cur.append(t)

3879 result.append(sparse_cur)

3880 break

3881

3882 if p == t:

3883 sparse_cur.append(sentinel)

3884 else:

3885 sparse_cur.extend(cur[i:])

3886 result.append(sparse_cur)

3887 break

3888

3889 prev = cur

3890

3891 return list(zip(*result))

3892

3893

3894def _get_na_rep(dtype) -> str:

3895 if is_extension_array_dtype(dtype):

3896 return f"{dtype.na_value}"

3897 else:

3898 dtype = dtype.type

3899

3900 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN")

3901

3902

3903def maybe_droplevels(index: Index, key) -> Index:

3904 """

3905 Attempt to drop level or levels from the given index.

3906

3907 Parameters

3908 ----------

3909 index: Index

3910 key : scalar or tuple

3911

3912 Returns

3913 -------

3914 Index

3915 """

3916 # drop levels

3917 original_index = index

3918 if isinstance(key, tuple):

3919 for _ in key:

3920 try:

3921 index = index._drop_level_numbers([0])

3922 except ValueError:

3923 # we have dropped too much, so back out

3924 return original_index

3925 else:

3926 try:

3927 index = index._drop_level_numbers([0])

3928 except ValueError:

3929 pass

3930

3931 return index

3932

3933

3934def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray:

3935 """

3936 Coerce the array-like indexer to the smallest integer dtype that can encode all

3937 of the given categories.

3938

3939 Parameters

3940 ----------

3941 array_like : array-like

3942 categories : array-like

3943 copy : bool

3944

3945 Returns

3946 -------

3947 np.ndarray

3948 Non-writeable.

3949 """

3950 array_like = coerce_indexer_dtype(array_like, categories)

3951 if copy:

3952 array_like = array_like.copy()

3953 array_like.flags.writeable = False

3954 return array_like

3955

3956

3957def _require_listlike(level, arr, arrname: str):

3958 """

3959 Ensure that level is either None or listlike, and arr is list-of-listlike.

3960 """

3961 if level is not None and not is_list_like(level):

3962 if not is_list_like(arr):

3963 raise TypeError(f"{arrname} must be list-like")

3964 if is_list_like(arr[0]):

3965 raise TypeError(f"{arrname} must be list-like")

3966 level = [level]

3967 arr = [arr]

3968 elif level is None or is_list_like(level):

3969 if not is_list_like(arr) or not is_list_like(arr[0]):

3970 raise TypeError(f"{arrname} must be list of lists-like")

3971 return level, arr