Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/multi.py: 11%

1404 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from functools import wraps 

4from sys import getsizeof 

5from typing import ( 

6 TYPE_CHECKING, 

7 Any, 

8 Callable, 

9 Collection, 

10 Hashable, 

11 Iterable, 

12 List, 

13 Literal, 

14 Sequence, 

15 Tuple, 

16 cast, 

17) 

18import warnings 

19 

20import numpy as np 

21 

22from pandas._config import get_option 

23 

24from pandas._libs import ( 

25 algos as libalgos, 

26 index as libindex, 

27 lib, 

28) 

29from pandas._libs.hashtable import duplicated 

30from pandas._typing import ( 

31 AnyArrayLike, 

32 DtypeObj, 

33 F, 

34 Scalar, 

35 Shape, 

36 npt, 

37) 

38from pandas.compat.numpy import function as nv 

39from pandas.errors import ( 

40 InvalidIndexError, 

41 PerformanceWarning, 

42 UnsortedIndexError, 

43) 

44from pandas.util._decorators import ( 

45 Appender, 

46 cache_readonly, 

47 deprecate_nonkeyword_arguments, 

48 doc, 

49) 

50from pandas.util._exceptions import find_stack_level 

51 

52from pandas.core.dtypes.cast import coerce_indexer_dtype 

53from pandas.core.dtypes.common import ( 

54 ensure_int64, 

55 ensure_platform_int, 

56 is_categorical_dtype, 

57 is_extension_array_dtype, 

58 is_hashable, 

59 is_integer, 

60 is_iterator, 

61 is_list_like, 

62 is_object_dtype, 

63 is_scalar, 

64 pandas_dtype, 

65) 

66from pandas.core.dtypes.dtypes import ExtensionDtype 

67from pandas.core.dtypes.generic import ( 

68 ABCDataFrame, 

69 ABCDatetimeIndex, 

70 ABCTimedeltaIndex, 

71) 

72from pandas.core.dtypes.missing import ( 

73 array_equivalent, 

74 isna, 

75) 

76 

77import pandas.core.algorithms as algos 

78from pandas.core.arrays import Categorical 

79from pandas.core.arrays.categorical import factorize_from_iterables 

80import pandas.core.common as com 

81import pandas.core.indexes.base as ibase 

82from pandas.core.indexes.base import ( 

83 Index, 

84 _index_shared_docs, 

85 ensure_index, 

86 get_unanimous_names, 

87) 

88from pandas.core.indexes.frozen import FrozenList 

89from pandas.core.ops.invalid import make_invalid_op 

90from pandas.core.sorting import ( 

91 get_group_index, 

92 indexer_from_factorized, 

93 lexsort_indexer, 

94) 

95 

96from pandas.io.formats.printing import pprint_thing 

97 

98if TYPE_CHECKING: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true

99 from pandas import ( 

100 CategoricalIndex, 

101 DataFrame, 

102 Series, 

103 ) 

104 

105_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

106_index_doc_kwargs.update( 

107 {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"} 

108) 

109 

110 

111class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): 

112 """ 

113 This class manages a MultiIndex by mapping label combinations to positive 

114 integers. 

115 """ 

116 

117 _base = libindex.UInt64Engine 

118 

119 def _codes_to_ints(self, codes): 

120 """ 

121 Transform combination(s) of uint64 in one uint64 (each), in a strictly 

122 monotonic way (i.e. respecting the lexicographic order of integer 

123 combinations): see BaseMultiIndexCodesEngine documentation. 

124 

125 Parameters 

126 ---------- 

127 codes : 1- or 2-dimensional array of dtype uint64 

128 Combinations of integers (one per row) 

129 

130 Returns 

131 ------- 

132 scalar or 1-dimensional array, of dtype uint64 

133 Integer(s) representing one combination (each). 

134 """ 

135 # Shift the representation of each level by the pre-calculated number 

136 # of bits: 

137 codes <<= self.offsets 

138 

139 # Now sum and OR are in fact interchangeable. This is a simple 

140 # composition of the (disjunct) significant bits of each level (i.e. 

141 # each column in "codes") in a single positive integer: 

142 if codes.ndim == 1: 

143 # Single key 

144 return np.bitwise_or.reduce(codes) 

145 

146 # Multiple keys 

147 return np.bitwise_or.reduce(codes, axis=1) 

148 

149 

150class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): 

151 """ 

152 This class manages those (extreme) cases in which the number of possible 

153 label combinations overflows the 64 bits integers, and uses an ObjectEngine 

154 containing Python integers. 

155 """ 

156 

157 _base = libindex.ObjectEngine 

158 

159 def _codes_to_ints(self, codes): 

160 """ 

161 Transform combination(s) of uint64 in one Python integer (each), in a 

162 strictly monotonic way (i.e. respecting the lexicographic order of 

163 integer combinations): see BaseMultiIndexCodesEngine documentation. 

164 

165 Parameters 

166 ---------- 

167 codes : 1- or 2-dimensional array of dtype uint64 

168 Combinations of integers (one per row) 

169 

170 Returns 

171 ------- 

172 int, or 1-dimensional array of dtype object 

173 Integer(s) representing one combination (each). 

174 """ 

175 # Shift the representation of each level by the pre-calculated number 

176 # of bits. Since this can overflow uint64, first make sure we are 

177 # working with Python integers: 

178 codes = codes.astype("object") << self.offsets 

179 

180 # Now sum and OR are in fact interchangeable. This is a simple 

181 # composition of the (disjunct) significant bits of each level (i.e. 

182 # each column in "codes") in a single positive integer (per row): 

183 if codes.ndim == 1: 

184 # Single key 

185 return np.bitwise_or.reduce(codes) 

186 

187 # Multiple keys 

188 return np.bitwise_or.reduce(codes, axis=1) 

189 

190 

191def names_compat(meth: F) -> F: 

192 """ 

193 A decorator to allow either `name` or `names` keyword but not both. 

194 

195 This makes it easier to share code with base class. 

196 """ 

197 

198 @wraps(meth) 

199 def new_meth(self_or_cls, *args, **kwargs): 

200 if "name" in kwargs and "names" in kwargs: 

201 raise TypeError("Can only provide one of `names` and `name`") 

202 elif "name" in kwargs: 

203 kwargs["names"] = kwargs.pop("name") 

204 

205 return meth(self_or_cls, *args, **kwargs) 

206 

207 return cast(F, new_meth) 

208 

209 

210class MultiIndex(Index): 

211 """ 

212 A multi-level, or hierarchical, index object for pandas objects. 

213 

214 Parameters 

215 ---------- 

216 levels : sequence of arrays 

217 The unique labels for each level. 

218 codes : sequence of arrays 

219 Integers for each level designating which label at each location. 

220 sortorder : optional int 

221 Level of sortedness (must be lexicographically sorted by that 

222 level). 

223 names : optional sequence of objects 

224 Names for each of the index levels. (name is accepted for compat). 

225 copy : bool, default False 

226 Copy the meta-data. 

227 verify_integrity : bool, default True 

228 Check that the levels/codes are consistent and valid. 

229 

230 Attributes 

231 ---------- 

232 names 

233 levels 

234 codes 

235 nlevels 

236 levshape 

237 

238 Methods 

239 ------- 

240 from_arrays 

241 from_tuples 

242 from_product 

243 from_frame 

244 set_levels 

245 set_codes 

246 to_frame 

247 to_flat_index 

248 sortlevel 

249 droplevel 

250 swaplevel 

251 reorder_levels 

252 remove_unused_levels 

253 get_locs 

254 

255 See Also 

256 -------- 

257 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

258 MultiIndex.from_product : Create a MultiIndex from the cartesian product 

259 of iterables. 

260 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex. 

261 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

262 Index : The base pandas Index type. 

263 

264 Notes 

265 ----- 

266 See the `user guide 

267 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__ 

268 for more. 

269 

270 Examples 

271 -------- 

272 A new ``MultiIndex`` is typically constructed using one of the helper 

273 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product` 

274 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``): 

275 

276 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] 

277 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) 

278 MultiIndex([(1, 'red'), 

279 (1, 'blue'), 

280 (2, 'red'), 

281 (2, 'blue')], 

282 names=['number', 'color']) 

283 

284 See further examples for how to construct a MultiIndex in the doc strings 

285 of the mentioned helper methods. 

286 """ 

287 

288 _hidden_attrs = Index._hidden_attrs | frozenset() 

289 

290 # initialize to zero-length tuples to make everything work 

291 _typ = "multiindex" 

292 _names: list[Hashable | None] = [] 

293 _levels = FrozenList() 

294 _codes = FrozenList() 

295 _comparables = ["names"] 

296 

297 sortorder: int | None 

298 

299 # -------------------------------------------------------------------- 

300 # Constructors 

301 

302 def __new__( 

303 cls, 

304 levels=None, 

305 codes=None, 

306 sortorder=None, 

307 names=None, 

308 dtype=None, 

309 copy=False, 

310 name=None, 

311 verify_integrity: bool = True, 

312 ) -> MultiIndex: 

313 

314 # compat with Index 

315 if name is not None: 

316 names = name 

317 if levels is None or codes is None: 

318 raise TypeError("Must pass both levels and codes") 

319 if len(levels) != len(codes): 

320 raise ValueError("Length of levels and codes must be the same.") 

321 if len(levels) == 0: 

322 raise ValueError("Must pass non-zero number of levels/codes") 

323 

324 result = object.__new__(cls) 

325 result._cache = {} 

326 

327 # we've already validated levels and codes, so shortcut here 

328 result._set_levels(levels, copy=copy, validate=False) 

329 result._set_codes(codes, copy=copy, validate=False) 

330 

331 result._names = [None] * len(levels) 

332 if names is not None: 

333 # handles name validation 

334 result._set_names(names) 

335 

336 if sortorder is not None: 

337 result.sortorder = int(sortorder) 

338 else: 

339 result.sortorder = sortorder 

340 

341 if verify_integrity: 

342 new_codes = result._verify_integrity() 

343 result._codes = new_codes 

344 

345 result._reset_identity() 

346 

347 return result 

348 

349 def _validate_codes(self, level: list, code: list): 

350 """ 

351 Reassign code values as -1 if their corresponding levels are NaN. 

352 

353 Parameters 

354 ---------- 

355 code : list 

356 Code to reassign. 

357 level : list 

358 Level to check for missing values (NaN, NaT, None). 

359 

360 Returns 

361 ------- 

362 new code where code value = -1 if it corresponds 

363 to a level with missing values (NaN, NaT, None). 

364 """ 

365 null_mask = isna(level) 

366 if np.any(null_mask): 

367 # error: Incompatible types in assignment 

368 # (expression has type "ndarray[Any, dtype[Any]]", 

369 # variable has type "List[Any]") 

370 code = np.where(null_mask[code], -1, code) # type: ignore[assignment] 

371 return code 

372 

373 def _verify_integrity(self, codes: list | None = None, levels: list | None = None): 

374 """ 

375 Parameters 

376 ---------- 

377 codes : optional list 

378 Codes to check for validity. Defaults to current codes. 

379 levels : optional list 

380 Levels to check for validity. Defaults to current levels. 

381 

382 Raises 

383 ------ 

384 ValueError 

385 If length of levels and codes don't match, if the codes for any 

386 level would exceed level bounds, or there are any duplicate levels. 

387 

388 Returns 

389 ------- 

390 new codes where code value = -1 if it corresponds to a 

391 NaN level. 

392 """ 

393 # NOTE: Currently does not check, among other things, that cached 

394 # nlevels matches nor that sortorder matches actually sortorder. 

395 codes = codes or self.codes 

396 levels = levels or self.levels 

397 

398 if len(levels) != len(codes): 

399 raise ValueError( 

400 "Length of levels and codes must match. NOTE: " 

401 "this index is in an inconsistent state." 

402 ) 

403 codes_length = len(codes[0]) 

404 for i, (level, level_codes) in enumerate(zip(levels, codes)): 

405 if len(level_codes) != codes_length: 

406 raise ValueError( 

407 f"Unequal code lengths: {[len(code_) for code_ in codes]}" 

408 ) 

409 if len(level_codes) and level_codes.max() >= len(level): 

410 raise ValueError( 

411 f"On level {i}, code max ({level_codes.max()}) >= length of " 

412 f"level ({len(level)}). NOTE: this index is in an " 

413 "inconsistent state" 

414 ) 

415 if len(level_codes) and level_codes.min() < -1: 

416 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1") 

417 if not level.is_unique: 

418 raise ValueError( 

419 f"Level values must be unique: {list(level)} on level {i}" 

420 ) 

421 if self.sortorder is not None: 

422 if self.sortorder > _lexsort_depth(self.codes, self.nlevels): 

423 raise ValueError( 

424 "Value for sortorder must be inferior or equal to actual " 

425 f"lexsort_depth: sortorder {self.sortorder} " 

426 f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}" 

427 ) 

428 

429 codes = [ 

430 self._validate_codes(level, code) for level, code in zip(levels, codes) 

431 ] 

432 new_codes = FrozenList(codes) 

433 return new_codes 

434 

435 @classmethod 

436 def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex: 

437 """ 

438 Convert arrays to MultiIndex. 

439 

440 Parameters 

441 ---------- 

442 arrays : list / sequence of array-likes 

443 Each array-like gives one level's value for each data point. 

444 len(arrays) is the number of levels. 

445 sortorder : int or None 

446 Level of sortedness (must be lexicographically sorted by that 

447 level). 

448 names : list / sequence of str, optional 

449 Names for the levels in the index. 

450 

451 Returns 

452 ------- 

453 MultiIndex 

454 

455 See Also 

456 -------- 

457 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

458 MultiIndex.from_product : Make a MultiIndex from cartesian product 

459 of iterables. 

460 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

461 

462 Examples 

463 -------- 

464 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] 

465 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) 

466 MultiIndex([(1, 'red'), 

467 (1, 'blue'), 

468 (2, 'red'), 

469 (2, 'blue')], 

470 names=['number', 'color']) 

471 """ 

472 error_msg = "Input must be a list / sequence of array-likes." 

473 if not is_list_like(arrays): 

474 raise TypeError(error_msg) 

475 elif is_iterator(arrays): 

476 arrays = list(arrays) 

477 

478 # Check if elements of array are list-like 

479 for array in arrays: 

480 if not is_list_like(array): 

481 raise TypeError(error_msg) 

482 

483 # Check if lengths of all arrays are equal or not, 

484 # raise ValueError, if not 

485 for i in range(1, len(arrays)): 

486 if len(arrays[i]) != len(arrays[i - 1]): 

487 raise ValueError("all arrays must be same length") 

488 

489 codes, levels = factorize_from_iterables(arrays) 

490 if names is lib.no_default: 

491 names = [getattr(arr, "name", None) for arr in arrays] 

492 

493 return cls( 

494 levels=levels, 

495 codes=codes, 

496 sortorder=sortorder, 

497 names=names, 

498 verify_integrity=False, 

499 ) 

500 

501 @classmethod 

502 @names_compat 

503 def from_tuples( 

504 cls, 

505 tuples: Iterable[tuple[Hashable, ...]], 

506 sortorder: int | None = None, 

507 names: Sequence[Hashable] | Hashable | None = None, 

508 ) -> MultiIndex: 

509 """ 

510 Convert list of tuples to MultiIndex. 

511 

512 Parameters 

513 ---------- 

514 tuples : list / sequence of tuple-likes 

515 Each tuple is the index of one row/column. 

516 sortorder : int or None 

517 Level of sortedness (must be lexicographically sorted by that 

518 level). 

519 names : list / sequence of str, optional 

520 Names for the levels in the index. 

521 

522 Returns 

523 ------- 

524 MultiIndex 

525 

526 See Also 

527 -------- 

528 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

529 MultiIndex.from_product : Make a MultiIndex from cartesian product 

530 of iterables. 

531 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

532 

533 Examples 

534 -------- 

535 >>> tuples = [(1, 'red'), (1, 'blue'), 

536 ... (2, 'red'), (2, 'blue')] 

537 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) 

538 MultiIndex([(1, 'red'), 

539 (1, 'blue'), 

540 (2, 'red'), 

541 (2, 'blue')], 

542 names=['number', 'color']) 

543 """ 

544 if not is_list_like(tuples): 

545 raise TypeError("Input must be a list / sequence of tuple-likes.") 

546 elif is_iterator(tuples): 

547 tuples = list(tuples) 

548 tuples = cast(Collection[Tuple[Hashable, ...]], tuples) 

549 

550 # handling the empty tuple cases 

551 if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): 

552 codes = [np.zeros(len(tuples))] 

553 levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))] 

554 return cls( 

555 levels=levels, 

556 codes=codes, 

557 sortorder=sortorder, 

558 names=names, 

559 verify_integrity=False, 

560 ) 

561 

562 arrays: list[Sequence[Hashable]] 

563 if len(tuples) == 0: 

564 if names is None: 

565 raise TypeError("Cannot infer number of levels from empty list") 

566 # error: Argument 1 to "len" has incompatible type "Hashable"; 

567 # expected "Sized" 

568 arrays = [[]] * len(names) # type: ignore[arg-type] 

569 elif isinstance(tuples, (np.ndarray, Index)): 

570 if isinstance(tuples, Index): 

571 tuples = np.asarray(tuples._values) 

572 

573 arrays = list(lib.tuples_to_object_array(tuples).T) 

574 elif isinstance(tuples, list): 

575 arrays = list(lib.to_object_array_tuples(tuples).T) 

576 else: 

577 arrs = zip(*tuples) 

578 arrays = cast(List[Sequence[Hashable]], arrs) 

579 

580 return cls.from_arrays(arrays, sortorder=sortorder, names=names) 

581 

582 @classmethod 

583 def from_product( 

584 cls, 

585 iterables: Sequence[Iterable[Hashable]], 

586 sortorder: int | None = None, 

587 names: Sequence[Hashable] | lib.NoDefault = lib.no_default, 

588 ) -> MultiIndex: 

589 """ 

590 Make a MultiIndex from the cartesian product of multiple iterables. 

591 

592 Parameters 

593 ---------- 

594 iterables : list / sequence of iterables 

595 Each iterable has unique labels for each level of the index. 

596 sortorder : int or None 

597 Level of sortedness (must be lexicographically sorted by that 

598 level). 

599 names : list / sequence of str, optional 

600 Names for the levels in the index. 

601 

602 .. versionchanged:: 1.0.0 

603 

604 If not explicitly provided, names will be inferred from the 

605 elements of iterables if an element has a name attribute 

606 

607 Returns 

608 ------- 

609 MultiIndex 

610 

611 See Also 

612 -------- 

613 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

614 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

615 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

616 

617 Examples 

618 -------- 

619 >>> numbers = [0, 1, 2] 

620 >>> colors = ['green', 'purple'] 

621 >>> pd.MultiIndex.from_product([numbers, colors], 

622 ... names=['number', 'color']) 

623 MultiIndex([(0, 'green'), 

624 (0, 'purple'), 

625 (1, 'green'), 

626 (1, 'purple'), 

627 (2, 'green'), 

628 (2, 'purple')], 

629 names=['number', 'color']) 

630 """ 

631 from pandas.core.reshape.util import cartesian_product 

632 

633 if not is_list_like(iterables): 

634 raise TypeError("Input must be a list / sequence of iterables.") 

635 elif is_iterator(iterables): 

636 iterables = list(iterables) 

637 

638 codes, levels = factorize_from_iterables(iterables) 

639 if names is lib.no_default: 

640 names = [getattr(it, "name", None) for it in iterables] 

641 

642 # codes are all ndarrays, so cartesian_product is lossless 

643 codes = cartesian_product(codes) 

644 return cls(levels, codes, sortorder=sortorder, names=names) 

645 

646 @classmethod 

647 def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex: 

648 """ 

649 Make a MultiIndex from a DataFrame. 

650 

651 Parameters 

652 ---------- 

653 df : DataFrame 

654 DataFrame to be converted to MultiIndex. 

655 sortorder : int, optional 

656 Level of sortedness (must be lexicographically sorted by that 

657 level). 

658 names : list-like, optional 

659 If no names are provided, use the column names, or tuple of column 

660 names if the columns is a MultiIndex. If a sequence, overwrite 

661 names with the given sequence. 

662 

663 Returns 

664 ------- 

665 MultiIndex 

666 The MultiIndex representation of the given DataFrame. 

667 

668 See Also 

669 -------- 

670 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

671 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

672 MultiIndex.from_product : Make a MultiIndex from cartesian product 

673 of iterables. 

674 

675 Examples 

676 -------- 

677 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'], 

678 ... ['NJ', 'Temp'], ['NJ', 'Precip']], 

679 ... columns=['a', 'b']) 

680 >>> df 

681 a b 

682 0 HI Temp 

683 1 HI Precip 

684 2 NJ Temp 

685 3 NJ Precip 

686 

687 >>> pd.MultiIndex.from_frame(df) 

688 MultiIndex([('HI', 'Temp'), 

689 ('HI', 'Precip'), 

690 ('NJ', 'Temp'), 

691 ('NJ', 'Precip')], 

692 names=['a', 'b']) 

693 

694 Using explicit names, instead of the column names 

695 

696 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation']) 

697 MultiIndex([('HI', 'Temp'), 

698 ('HI', 'Precip'), 

699 ('NJ', 'Temp'), 

700 ('NJ', 'Precip')], 

701 names=['state', 'observation']) 

702 """ 

703 if not isinstance(df, ABCDataFrame): 

704 raise TypeError("Input must be a DataFrame") 

705 

706 column_names, columns = zip(*df.items()) 

707 names = column_names if names is None else names 

708 return cls.from_arrays(columns, sortorder=sortorder, names=names) 

709 

710 # -------------------------------------------------------------------- 

711 

712 @cache_readonly 

713 def _values(self) -> np.ndarray: 

714 # We override here, since our parent uses _data, which we don't use. 

715 values = [] 

716 

717 for i in range(self.nlevels): 

718 index = self.levels[i] 

719 codes = self.codes[i] 

720 

721 vals = index 

722 if is_categorical_dtype(vals.dtype): 

723 vals = cast("CategoricalIndex", vals) 

724 vals = vals._data._internal_get_values() 

725 

726 is_dti = isinstance(vals, ABCDatetimeIndex) 

727 

728 if is_dti: 

729 # TODO: this can be removed after Timestamp.freq is removed 

730 # The astype(object) below does not remove the freq from 

731 # the underlying Timestamps so we remove it here to match 

732 # the behavior of self._get_level_values 

733 vals = algos.take_nd(vals, codes, fill_value=index._na_value) 

734 

735 if isinstance(vals.dtype, ExtensionDtype) or isinstance( 

736 vals, (ABCDatetimeIndex, ABCTimedeltaIndex) 

737 ): 

738 vals = vals.astype(object) 

739 

740 vals = np.array(vals, copy=False) 

741 if not is_dti: 

742 vals = algos.take_nd(vals, codes, fill_value=index._na_value) 

743 values.append(vals) 

744 

745 arr = lib.fast_zip(values) 

746 return arr 

747 

748 @property 

749 def values(self) -> np.ndarray: 

750 return self._values 

751 

752 @property 

753 def array(self): 

754 """ 

755 Raises a ValueError for `MultiIndex` because there's no single 

756 array backing a MultiIndex. 

757 

758 Raises 

759 ------ 

760 ValueError 

761 """ 

762 raise ValueError( 

763 "MultiIndex has no single backing array. Use " 

764 "'MultiIndex.to_numpy()' to get a NumPy array of tuples." 

765 ) 

766 

767 @cache_readonly 

768 def dtypes(self) -> Series: 

769 """ 

770 Return the dtypes as a Series for the underlying MultiIndex. 

771 """ 

772 from pandas import Series 

773 

774 names = com.fill_missing_names([level.name for level in self.levels]) 

775 return Series([level.dtype for level in self.levels], index=Index(names)) 

776 

777 def __len__(self) -> int: 

778 return len(self.codes[0]) 

779 

780 # -------------------------------------------------------------------- 

781 # Levels Methods 

782 

783 @cache_readonly 

784 def levels(self) -> FrozenList: 

785 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly 

786 # create new IndexEngine 

787 # https://github.com/pandas-dev/pandas/issues/31648 

788 result = [x._rename(name=name) for x, name in zip(self._levels, self._names)] 

789 for level in result: 

790 # disallow midx.levels[0].name = "foo" 

791 level._no_setting_name = True 

792 return FrozenList(result) 

793 

794 def _set_levels( 

795 self, 

796 levels, 

797 *, 

798 level=None, 

799 copy: bool = False, 

800 validate: bool = True, 

801 verify_integrity: bool = False, 

802 ) -> None: 

803 # This is NOT part of the levels property because it should be 

804 # externally not allowed to set levels. User beware if you change 

805 # _levels directly 

806 if validate: 

807 if len(levels) == 0: 

808 raise ValueError("Must set non-zero number of levels.") 

809 if level is None and len(levels) != self.nlevels: 

810 raise ValueError("Length of levels must match number of levels.") 

811 if level is not None and len(levels) != len(level): 

812 raise ValueError("Length of levels must match length of level.") 

813 

814 if level is None: 

815 new_levels = FrozenList( 

816 ensure_index(lev, copy=copy)._view() for lev in levels 

817 ) 

818 else: 

819 level_numbers = [self._get_level_number(lev) for lev in level] 

820 new_levels_list = list(self._levels) 

821 for lev_num, lev in zip(level_numbers, levels): 

822 new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view() 

823 new_levels = FrozenList(new_levels_list) 

824 

825 if verify_integrity: 

826 new_codes = self._verify_integrity(levels=new_levels) 

827 self._codes = new_codes 

828 

829 names = self.names 

830 self._levels = new_levels 

831 if any(names): 

832 self._set_names(names) 

833 

834 self._reset_cache() 

835 

836 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "levels"]) 

837 def set_levels( 

838 self, levels, level=None, inplace=None, verify_integrity: bool = True 

839 ): 

840 """ 

841 Set new levels on MultiIndex. Defaults to returning new index. 

842 

843 Parameters 

844 ---------- 

845 levels : sequence or list of sequence 

846 New level(s) to apply. 

847 level : int, level name, or sequence of int/level names (default None) 

848 Level(s) to set (None for all levels). 

849 inplace : bool 

850 If True, mutates in place. 

851 

852 .. deprecated:: 1.2.0 

853 verify_integrity : bool, default True 

854 If True, checks that levels and codes are compatible. 

855 

856 Returns 

857 ------- 

858 new index (of same type and class...etc) or None 

859 The same type as the caller or None if ``inplace=True``. 

860 

861 Examples 

862 -------- 

863 >>> idx = pd.MultiIndex.from_tuples( 

864 ... [ 

865 ... (1, "one"), 

866 ... (1, "two"), 

867 ... (2, "one"), 

868 ... (2, "two"), 

869 ... (3, "one"), 

870 ... (3, "two") 

871 ... ], 

872 ... names=["foo", "bar"] 

873 ... ) 

874 >>> idx 

875 MultiIndex([(1, 'one'), 

876 (1, 'two'), 

877 (2, 'one'), 

878 (2, 'two'), 

879 (3, 'one'), 

880 (3, 'two')], 

881 names=['foo', 'bar']) 

882 

883 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) 

884 MultiIndex([('a', 1), 

885 ('a', 2), 

886 ('b', 1), 

887 ('b', 2), 

888 ('c', 1), 

889 ('c', 2)], 

890 names=['foo', 'bar']) 

891 >>> idx.set_levels(['a', 'b', 'c'], level=0) 

892 MultiIndex([('a', 'one'), 

893 ('a', 'two'), 

894 ('b', 'one'), 

895 ('b', 'two'), 

896 ('c', 'one'), 

897 ('c', 'two')], 

898 names=['foo', 'bar']) 

899 >>> idx.set_levels(['a', 'b'], level='bar') 

900 MultiIndex([(1, 'a'), 

901 (1, 'b'), 

902 (2, 'a'), 

903 (2, 'b'), 

904 (3, 'a'), 

905 (3, 'b')], 

906 names=['foo', 'bar']) 

907 

908 If any of the levels passed to ``set_levels()`` exceeds the 

909 existing length, all of the values from that argument will 

910 be stored in the MultiIndex levels, though the values will 

911 be truncated in the MultiIndex output. 

912 

913 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]) 

914 MultiIndex([('a', 1), 

915 ('a', 2), 

916 ('b', 1), 

917 ('b', 2), 

918 ('c', 1), 

919 ('c', 2)], 

920 names=['foo', 'bar']) 

921 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels 

922 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) 

923 """ 

924 if inplace is not None: 

925 warnings.warn( 

926 "inplace is deprecated and will be removed in a future version.", 

927 FutureWarning, 

928 stacklevel=find_stack_level(), 

929 ) 

930 else: 

931 inplace = False 

932 

933 if is_list_like(levels) and not isinstance(levels, Index): 

934 levels = list(levels) 

935 

936 level, levels = _require_listlike(level, levels, "Levels") 

937 

938 if inplace: 

939 idx = self 

940 else: 

941 idx = self._view() 

942 idx._reset_identity() 

943 idx._set_levels( 

944 levels, level=level, validate=True, verify_integrity=verify_integrity 

945 ) 

946 if not inplace: 

947 return idx 

948 

949 @property 

950 def nlevels(self) -> int: 

951 """ 

952 Integer number of levels in this MultiIndex. 

953 

954 Examples 

955 -------- 

956 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) 

957 >>> mi 

958 MultiIndex([('a', 'b', 'c')], 

959 ) 

960 >>> mi.nlevels 

961 3 

962 """ 

963 return len(self._levels) 

964 

965 @property 

966 def levshape(self) -> Shape: 

967 """ 

968 A tuple with the length of each level. 

969 

970 Examples 

971 -------- 

972 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) 

973 >>> mi 

974 MultiIndex([('a', 'b', 'c')], 

975 ) 

976 >>> mi.levshape 

977 (1, 1, 1) 

978 """ 

979 return tuple(len(x) for x in self.levels) 

980 

981 # -------------------------------------------------------------------- 

982 # Codes Methods 

983 

984 @property 

985 def codes(self): 

986 return self._codes 

987 

988 def _set_codes( 

989 self, 

990 codes, 

991 *, 

992 level=None, 

993 copy: bool = False, 

994 validate: bool = True, 

995 verify_integrity: bool = False, 

996 ) -> None: 

997 if validate: 

998 if level is None and len(codes) != self.nlevels: 

999 raise ValueError("Length of codes must match number of levels") 

1000 if level is not None and len(codes) != len(level): 

1001 raise ValueError("Length of codes must match length of levels.") 

1002 

1003 if level is None: 

1004 new_codes = FrozenList( 

1005 _coerce_indexer_frozen(level_codes, lev, copy=copy).view() 

1006 for lev, level_codes in zip(self._levels, codes) 

1007 ) 

1008 else: 

1009 level_numbers = [self._get_level_number(lev) for lev in level] 

1010 new_codes_list = list(self._codes) 

1011 for lev_num, level_codes in zip(level_numbers, codes): 

1012 lev = self.levels[lev_num] 

1013 new_codes_list[lev_num] = _coerce_indexer_frozen( 

1014 level_codes, lev, copy=copy 

1015 ) 

1016 new_codes = FrozenList(new_codes_list) 

1017 

1018 if verify_integrity: 

1019 new_codes = self._verify_integrity(codes=new_codes) 

1020 

1021 self._codes = new_codes 

1022 

1023 self._reset_cache() 

1024 

1025 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "codes"]) 

1026 def set_codes(self, codes, level=None, inplace=None, verify_integrity: bool = True): 

1027 """ 

1028 Set new codes on MultiIndex. Defaults to returning new index. 

1029 

1030 Parameters 

1031 ---------- 

1032 codes : sequence or list of sequence 

1033 New codes to apply. 

1034 level : int, level name, or sequence of int/level names (default None) 

1035 Level(s) to set (None for all levels). 

1036 inplace : bool 

1037 If True, mutates in place. 

1038 

1039 .. deprecated:: 1.2.0 

1040 verify_integrity : bool, default True 

1041 If True, checks that levels and codes are compatible. 

1042 

1043 Returns 

1044 ------- 

1045 new index (of same type and class...etc) or None 

1046 The same type as the caller or None if ``inplace=True``. 

1047 

1048 Examples 

1049 -------- 

1050 >>> idx = pd.MultiIndex.from_tuples( 

1051 ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"] 

1052 ... ) 

1053 >>> idx 

1054 MultiIndex([(1, 'one'), 

1055 (1, 'two'), 

1056 (2, 'one'), 

1057 (2, 'two')], 

1058 names=['foo', 'bar']) 

1059 

1060 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) 

1061 MultiIndex([(2, 'one'), 

1062 (1, 'one'), 

1063 (2, 'two'), 

1064 (1, 'two')], 

1065 names=['foo', 'bar']) 

1066 >>> idx.set_codes([1, 0, 1, 0], level=0) 

1067 MultiIndex([(2, 'one'), 

1068 (1, 'two'), 

1069 (2, 'one'), 

1070 (1, 'two')], 

1071 names=['foo', 'bar']) 

1072 >>> idx.set_codes([0, 0, 1, 1], level='bar') 

1073 MultiIndex([(1, 'one'), 

1074 (1, 'one'), 

1075 (2, 'two'), 

1076 (2, 'two')], 

1077 names=['foo', 'bar']) 

1078 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) 

1079 MultiIndex([(2, 'one'), 

1080 (1, 'one'), 

1081 (2, 'two'), 

1082 (1, 'two')], 

1083 names=['foo', 'bar']) 

1084 """ 

1085 if inplace is not None: 

1086 warnings.warn( 

1087 "inplace is deprecated and will be removed in a future version.", 

1088 FutureWarning, 

1089 stacklevel=find_stack_level(), 

1090 ) 

1091 else: 

1092 inplace = False 

1093 

1094 level, codes = _require_listlike(level, codes, "Codes") 

1095 

1096 if inplace: 

1097 idx = self 

1098 else: 

1099 idx = self._view() 

1100 idx._reset_identity() 

1101 idx._set_codes(codes, level=level, verify_integrity=verify_integrity) 

1102 if not inplace: 

1103 return idx 

1104 

1105 # -------------------------------------------------------------------- 

1106 # Index Internals 

1107 

1108 @cache_readonly 

1109 def _engine(self): 

1110 # Calculate the number of bits needed to represent labels in each 

1111 # level, as log2 of their sizes (including -1 for NaN): 

1112 sizes = np.ceil(np.log2([len(level) + 1 for level in self.levels])) 

1113 

1114 # Sum bit counts, starting from the _right_.... 

1115 lev_bits = np.cumsum(sizes[::-1])[::-1] 

1116 

1117 # ... in order to obtain offsets such that sorting the combination of 

1118 # shifted codes (one for each level, resulting in a unique integer) is 

1119 # equivalent to sorting lexicographically the codes themselves. Notice 

1120 # that each level needs to be shifted by the number of bits needed to 

1121 # represent the _previous_ ones: 

1122 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64") 

1123 

1124 # Check the total number of bits needed for our representation: 

1125 if lev_bits[0] > 64: 

1126 # The levels would overflow a 64 bit uint - use Python integers: 

1127 return MultiIndexPyIntEngine(self.levels, self.codes, offsets) 

1128 return MultiIndexUIntEngine(self.levels, self.codes, offsets) 

1129 

1130 # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return 

1131 # type "Type[MultiIndex]" in supertype "Index" 

1132 @property 

1133 def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override] 

1134 return type(self).from_tuples 

1135 

1136 @doc(Index._shallow_copy) 

1137 def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex: 

1138 names = name if name is not lib.no_default else self.names 

1139 

1140 return type(self).from_tuples(values, sortorder=None, names=names) 

1141 

1142 def _view(self) -> MultiIndex: 

1143 result = type(self)( 

1144 levels=self.levels, 

1145 codes=self.codes, 

1146 sortorder=self.sortorder, 

1147 names=self.names, 

1148 verify_integrity=False, 

1149 ) 

1150 result._cache = self._cache.copy() 

1151 result._cache.pop("levels", None) # GH32669 

1152 return result 

1153 

1154 # -------------------------------------------------------------------- 

1155 

1156 def copy( 

1157 self, 

1158 names=None, 

1159 dtype=None, 

1160 levels=None, 

1161 codes=None, 

1162 deep=False, 

1163 name=None, 

1164 ): 

1165 """ 

1166 Make a copy of this object. Names, dtype, levels and codes can be 

1167 passed and will be set on new copy. 

1168 

1169 Parameters 

1170 ---------- 

1171 names : sequence, optional 

1172 dtype : numpy dtype or pandas type, optional 

1173 

1174 .. deprecated:: 1.2.0 

1175 levels : sequence, optional 

1176 

1177 .. deprecated:: 1.2.0 

1178 codes : sequence, optional 

1179 

1180 .. deprecated:: 1.2.0 

1181 deep : bool, default False 

1182 name : Label 

1183 Kept for compatibility with 1-dimensional Index. Should not be used. 

1184 

1185 Returns 

1186 ------- 

1187 MultiIndex 

1188 

1189 Notes 

1190 ----- 

1191 In most cases, there should be no functional difference from using 

1192 ``deep``, but if ``deep`` is passed it will attempt to deepcopy. 

1193 This could be potentially expensive on large MultiIndex objects. 

1194 """ 

1195 names = self._validate_names(name=name, names=names, deep=deep) 

1196 keep_id = not deep 

1197 if levels is not None: 

1198 warnings.warn( 

1199 "parameter levels is deprecated and will be removed in a future " 

1200 "version. Use the set_levels method instead.", 

1201 FutureWarning, 

1202 stacklevel=find_stack_level(), 

1203 ) 

1204 keep_id = False 

1205 if codes is not None: 

1206 warnings.warn( 

1207 "parameter codes is deprecated and will be removed in a future " 

1208 "version. Use the set_codes method instead.", 

1209 FutureWarning, 

1210 stacklevel=find_stack_level(), 

1211 ) 

1212 keep_id = False 

1213 

1214 if deep: 

1215 from copy import deepcopy 

1216 

1217 if levels is None: 

1218 levels = deepcopy(self.levels) 

1219 if codes is None: 

1220 codes = deepcopy(self.codes) 

1221 

1222 levels = levels if levels is not None else self.levels 

1223 codes = codes if codes is not None else self.codes 

1224 

1225 new_index = type(self)( 

1226 levels=levels, 

1227 codes=codes, 

1228 sortorder=self.sortorder, 

1229 names=names, 

1230 verify_integrity=False, 

1231 ) 

1232 new_index._cache = self._cache.copy() 

1233 new_index._cache.pop("levels", None) # GH32669 

1234 if keep_id: 

1235 new_index._id = self._id 

1236 

1237 if dtype: 

1238 warnings.warn( 

1239 "parameter dtype is deprecated and will be removed in a future " 

1240 "version. Use the astype method instead.", 

1241 FutureWarning, 

1242 stacklevel=find_stack_level(), 

1243 ) 

1244 new_index = new_index.astype(dtype) 

1245 return new_index 

1246 

1247 def __array__(self, dtype=None) -> np.ndarray: 

1248 """the array interface, return my values""" 

1249 return self.values 

1250 

1251 def view(self, cls=None): 

1252 """this is defined as a copy with the same identity""" 

1253 result = self.copy() 

1254 result._id = self._id 

1255 return result 

1256 

1257 @doc(Index.__contains__) 

1258 def __contains__(self, key: Any) -> bool: 

1259 hash(key) 

1260 try: 

1261 self.get_loc(key) 

1262 return True 

1263 except (LookupError, TypeError, ValueError): 

1264 return False 

1265 

1266 @cache_readonly 

1267 def dtype(self) -> np.dtype: 

1268 return np.dtype("O") 

1269 

1270 def _is_memory_usage_qualified(self) -> bool: 

1271 """return a boolean if we need a qualified .info display""" 

1272 

1273 def f(level): 

1274 return "mixed" in level or "string" in level or "unicode" in level 

1275 

1276 return any(f(level) for level in self._inferred_type_levels) 

1277 

1278 @doc(Index.memory_usage) 

1279 def memory_usage(self, deep: bool = False) -> int: 

1280 # we are overwriting our base class to avoid 

1281 # computing .values here which could materialize 

1282 # a tuple representation unnecessarily 

1283 return self._nbytes(deep) 

1284 

1285 @cache_readonly 

1286 def nbytes(self) -> int: 

1287 """return the number of bytes in the underlying data""" 

1288 return self._nbytes(False) 

1289 

1290 def _nbytes(self, deep: bool = False) -> int: 

1291 """ 

1292 return the number of bytes in the underlying data 

1293 deeply introspect the level data if deep=True 

1294 

1295 include the engine hashtable 

1296 

1297 *this is in internal routine* 

1298 

1299 """ 

1300 # for implementations with no useful getsizeof (PyPy) 

1301 objsize = 24 

1302 

1303 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels) 

1304 label_nbytes = sum(i.nbytes for i in self.codes) 

1305 names_nbytes = sum(getsizeof(i, objsize) for i in self.names) 

1306 result = level_nbytes + label_nbytes + names_nbytes 

1307 

1308 # include our engine hashtable 

1309 result += self._engine.sizeof(deep=deep) 

1310 return result 

1311 

1312 # -------------------------------------------------------------------- 

1313 # Rendering Methods 

1314 

1315 def _formatter_func(self, tup): 

1316 """ 

1317 Formats each item in tup according to its level's formatter function. 

1318 """ 

1319 formatter_funcs = [level._formatter_func for level in self.levels] 

1320 return tuple(func(val) for func, val in zip(formatter_funcs, tup)) 

1321 

1322 def _format_native_types( 

1323 self, *, na_rep="nan", **kwargs 

1324 ) -> npt.NDArray[np.object_]: 

1325 new_levels = [] 

1326 new_codes = [] 

1327 

1328 # go through the levels and format them 

1329 for level, level_codes in zip(self.levels, self.codes): 

1330 level_strs = level._format_native_types(na_rep=na_rep, **kwargs) 

1331 # add nan values, if there are any 

1332 mask = level_codes == -1 

1333 if mask.any(): 

1334 nan_index = len(level_strs) 

1335 # numpy 1.21 deprecated implicit string casting 

1336 level_strs = level_strs.astype(str) 

1337 level_strs = np.append(level_strs, na_rep) 

1338 assert not level_codes.flags.writeable # i.e. copy is needed 

1339 level_codes = level_codes.copy() # make writeable 

1340 level_codes[mask] = nan_index 

1341 new_levels.append(level_strs) 

1342 new_codes.append(level_codes) 

1343 

1344 if len(new_levels) == 1: 

1345 # a single-level multi-index 

1346 return Index(new_levels[0].take(new_codes[0]))._format_native_types() 

1347 else: 

1348 # reconstruct the multi-index 

1349 mi = MultiIndex( 

1350 levels=new_levels, 

1351 codes=new_codes, 

1352 names=self.names, 

1353 sortorder=self.sortorder, 

1354 verify_integrity=False, 

1355 ) 

1356 return mi._values 

1357 

1358 def format( 

1359 self, 

1360 name: bool | None = None, 

1361 formatter: Callable | None = None, 

1362 na_rep: str | None = None, 

1363 names: bool = False, 

1364 space: int = 2, 

1365 sparsify=None, 

1366 adjoin: bool = True, 

1367 ) -> list: 

1368 if name is not None: 

1369 names = name 

1370 

1371 if len(self) == 0: 

1372 return [] 

1373 

1374 stringified_levels = [] 

1375 for lev, level_codes in zip(self.levels, self.codes): 

1376 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype) 

1377 

1378 if len(lev) > 0: 

1379 

1380 formatted = lev.take(level_codes).format(formatter=formatter) 

1381 

1382 # we have some NA 

1383 mask = level_codes == -1 

1384 if mask.any(): 

1385 formatted = np.array(formatted, dtype=object) 

1386 formatted[mask] = na 

1387 formatted = formatted.tolist() 

1388 

1389 else: 

1390 # weird all NA case 

1391 formatted = [ 

1392 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n")) 

1393 for x in algos.take_nd(lev._values, level_codes) 

1394 ] 

1395 stringified_levels.append(formatted) 

1396 

1397 result_levels = [] 

1398 for lev, lev_name in zip(stringified_levels, self.names): 

1399 level = [] 

1400 

1401 if names: 

1402 level.append( 

1403 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n")) 

1404 if lev_name is not None 

1405 else "" 

1406 ) 

1407 

1408 level.extend(np.array(lev, dtype=object)) 

1409 result_levels.append(level) 

1410 

1411 if sparsify is None: 

1412 sparsify = get_option("display.multi_sparse") 

1413 

1414 if sparsify: 

1415 sentinel: Literal[""] | bool | lib.NoDefault = "" 

1416 # GH3547 use value of sparsify as sentinel if it's "Falsey" 

1417 assert isinstance(sparsify, bool) or sparsify is lib.no_default 

1418 if sparsify in [False, lib.no_default]: 

1419 sentinel = sparsify 

1420 # little bit of a kludge job for #1217 

1421 result_levels = sparsify_labels( 

1422 result_levels, start=int(names), sentinel=sentinel 

1423 ) 

1424 

1425 if adjoin: 

1426 from pandas.io.formats.format import get_adjustment 

1427 

1428 adj = get_adjustment() 

1429 return adj.adjoin(space, *result_levels).split("\n") 

1430 else: 

1431 return result_levels 

1432 

1433 # -------------------------------------------------------------------- 

1434 # Names Methods 

1435 

1436 def _get_names(self) -> FrozenList: 

1437 return FrozenList(self._names) 

1438 

1439 def _set_names(self, names, *, level=None, validate: bool = True): 

1440 """ 

1441 Set new names on index. Each name has to be a hashable type. 

1442 

1443 Parameters 

1444 ---------- 

1445 values : str or sequence 

1446 name(s) to set 

1447 level : int, level name, or sequence of int/level names (default None) 

1448 If the index is a MultiIndex (hierarchical), level(s) to set (None 

1449 for all levels). Otherwise level must be None 

1450 validate : bool, default True 

1451 validate that the names match level lengths 

1452 

1453 Raises 

1454 ------ 

1455 TypeError if each name is not hashable. 

1456 

1457 Notes 

1458 ----- 

1459 sets names on levels. WARNING: mutates! 

1460 

1461 Note that you generally want to set this *after* changing levels, so 

1462 that it only acts on copies 

1463 """ 

1464 # GH 15110 

1465 # Don't allow a single string for names in a MultiIndex 

1466 if names is not None and not is_list_like(names): 

1467 raise ValueError("Names should be list-like for a MultiIndex") 

1468 names = list(names) 

1469 

1470 if validate: 

1471 if level is not None and len(names) != len(level): 

1472 raise ValueError("Length of names must match length of level.") 

1473 if level is None and len(names) != self.nlevels: 

1474 raise ValueError( 

1475 "Length of names must match number of levels in MultiIndex." 

1476 ) 

1477 

1478 if level is None: 

1479 level = range(self.nlevels) 

1480 else: 

1481 level = [self._get_level_number(lev) for lev in level] 

1482 

1483 # set the name 

1484 for lev, name in zip(level, names): 

1485 if name is not None: 

1486 # GH 20527 

1487 # All items in 'names' need to be hashable: 

1488 if not is_hashable(name): 

1489 raise TypeError( 

1490 f"{type(self).__name__}.name must be a hashable type" 

1491 ) 

1492 self._names[lev] = name 

1493 

1494 # If .levels has been accessed, the names in our cache will be stale. 

1495 self._reset_cache() 

1496 

1497 names = property( 

1498 fset=_set_names, 

1499 fget=_get_names, 

1500 doc=""" 

1501 Names of levels in MultiIndex. 

1502 

1503 Examples 

1504 -------- 

1505 >>> mi = pd.MultiIndex.from_arrays( 

1506 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) 

1507 >>> mi 

1508 MultiIndex([(1, 3, 5), 

1509 (2, 4, 6)], 

1510 names=['x', 'y', 'z']) 

1511 >>> mi.names 

1512 FrozenList(['x', 'y', 'z']) 

1513 """, 

1514 ) 

1515 

1516 # -------------------------------------------------------------------- 

1517 

1518 @doc(Index._get_grouper_for_level) 

1519 def _get_grouper_for_level( 

1520 self, 

1521 mapper, 

1522 *, 

1523 level=None, 

1524 dropna: bool = True, 

1525 ) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]: 

1526 if mapper is not None: 

1527 indexer = self.codes[level] 

1528 # Handle group mapping function and return 

1529 level_values = self.levels[level].take(indexer) 

1530 grouper = level_values.map(mapper) 

1531 return grouper, None, None 

1532 

1533 values = self.get_level_values(level) 

1534 codes, uniques = algos.factorize(values, sort=True, use_na_sentinel=dropna) 

1535 assert isinstance(uniques, Index) 

1536 

1537 if self.levels[level]._can_hold_na: 

1538 grouper = uniques.take(codes, fill_value=True) 

1539 else: 

1540 grouper = uniques.take(codes) 

1541 

1542 return grouper, codes, uniques 

1543 

1544 @cache_readonly 

1545 def inferred_type(self) -> str: 

1546 return "mixed" 

1547 

1548 def _get_level_number(self, level) -> int: 

1549 count = self.names.count(level) 

1550 if (count > 1) and not is_integer(level): 

1551 raise ValueError( 

1552 f"The name {level} occurs multiple times, use a level number" 

1553 ) 

1554 try: 

1555 level = self.names.index(level) 

1556 except ValueError as err: 

1557 if not is_integer(level): 

1558 raise KeyError(f"Level {level} not found") from err 

1559 elif level < 0: 

1560 level += self.nlevels 

1561 if level < 0: 

1562 orig_level = level - self.nlevels 

1563 raise IndexError( 

1564 f"Too many levels: Index has only {self.nlevels} levels, " 

1565 f"{orig_level} is not a valid level number" 

1566 ) from err 

1567 # Note: levels are zero-based 

1568 elif level >= self.nlevels: 

1569 raise IndexError( 

1570 f"Too many levels: Index has only {self.nlevels} levels, " 

1571 f"not {level + 1}" 

1572 ) from err 

1573 return level 

1574 

1575 @cache_readonly 

1576 def is_monotonic_increasing(self) -> bool: 

1577 """ 

1578 Return a boolean if the values are equal or increasing. 

1579 """ 

1580 if any(-1 in code for code in self.codes): 

1581 return False 

1582 

1583 if all(level.is_monotonic_increasing for level in self.levels): 

1584 # If each level is sorted, we can operate on the codes directly. GH27495 

1585 return libalgos.is_lexsorted( 

1586 [x.astype("int64", copy=False) for x in self.codes] 

1587 ) 

1588 

1589 # reversed() because lexsort() wants the most significant key last. 

1590 values = [ 

1591 self._get_level_values(i)._values for i in reversed(range(len(self.levels))) 

1592 ] 

1593 try: 

1594 # error: Argument 1 to "lexsort" has incompatible type 

1595 # "List[Union[ExtensionArray, ndarray[Any, Any]]]"; 

1596 # expected "Union[_SupportsArray[dtype[Any]], 

1597 # _NestedSequence[_SupportsArray[dtype[Any]]], bool, 

1598 # int, float, complex, str, bytes, _NestedSequence[Union 

1599 # [bool, int, float, complex, str, bytes]]]" 

1600 sort_order = np.lexsort(values) # type: ignore[arg-type] 

1601 return Index(sort_order).is_monotonic_increasing 

1602 except TypeError: 

1603 

1604 # we have mixed types and np.lexsort is not happy 

1605 return Index(self._values).is_monotonic_increasing 

1606 

1607 @cache_readonly 

1608 def is_monotonic_decreasing(self) -> bool: 

1609 """ 

1610 Return a boolean if the values are equal or decreasing. 

1611 """ 

1612 # monotonic decreasing if and only if reverse is monotonic increasing 

1613 return self[::-1].is_monotonic_increasing 

1614 

1615 @cache_readonly 

1616 def _inferred_type_levels(self) -> list[str]: 

1617 """return a list of the inferred types, one for each level""" 

1618 return [i.inferred_type for i in self.levels] 

1619 

1620 @doc(Index.duplicated) 

1621 def duplicated(self, keep="first") -> npt.NDArray[np.bool_]: 

1622 shape = tuple(len(lev) for lev in self.levels) 

1623 ids = get_group_index(self.codes, shape, sort=False, xnull=False) 

1624 

1625 return duplicated(ids, keep) 

1626 

1627 # error: Cannot override final attribute "_duplicated" 

1628 # (previously declared in base class "IndexOpsMixin") 

1629 _duplicated = duplicated # type: ignore[misc] 

1630 

1631 def fillna(self, value=None, downcast=None): 

1632 """ 

1633 fillna is not implemented for MultiIndex 

1634 """ 

1635 raise NotImplementedError("isna is not defined for MultiIndex") 

1636 

1637 @doc(Index.dropna) 

1638 def dropna(self, how: str = "any") -> MultiIndex: 

1639 nans = [level_codes == -1 for level_codes in self.codes] 

1640 if how == "any": 

1641 indexer = np.any(nans, axis=0) 

1642 elif how == "all": 

1643 indexer = np.all(nans, axis=0) 

1644 else: 

1645 raise ValueError(f"invalid how option: {how}") 

1646 

1647 new_codes = [level_codes[~indexer] for level_codes in self.codes] 

1648 return self.set_codes(codes=new_codes) 

1649 

1650 def _get_level_values(self, level: int, unique: bool = False) -> Index: 

1651 """ 

1652 Return vector of label values for requested level, 

1653 equal to the length of the index 

1654 

1655 **this is an internal method** 

1656 

1657 Parameters 

1658 ---------- 

1659 level : int 

1660 unique : bool, default False 

1661 if True, drop duplicated values 

1662 

1663 Returns 

1664 ------- 

1665 Index 

1666 """ 

1667 lev = self.levels[level] 

1668 level_codes = self.codes[level] 

1669 name = self._names[level] 

1670 if unique: 

1671 level_codes = algos.unique(level_codes) 

1672 filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value) 

1673 return lev._shallow_copy(filled, name=name) 

1674 

1675 def get_level_values(self, level): 

1676 """ 

1677 Return vector of label values for requested level. 

1678 

1679 Length of returned vector is equal to the length of the index. 

1680 

1681 Parameters 

1682 ---------- 

1683 level : int or str 

1684 ``level`` is either the integer position of the level in the 

1685 MultiIndex, or the name of the level. 

1686 

1687 Returns 

1688 ------- 

1689 values : Index 

1690 Values is a level of this MultiIndex converted to 

1691 a single :class:`Index` (or subclass thereof). 

1692 

1693 Notes 

1694 ----- 

1695 If the level contains missing values, the result may be casted to 

1696 ``float`` with missing values specified as ``NaN``. This is because 

1697 the level is converted to a regular ``Index``. 

1698 

1699 Examples 

1700 -------- 

1701 Create a MultiIndex: 

1702 

1703 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) 

1704 >>> mi.names = ['level_1', 'level_2'] 

1705 

1706 Get level values by supplying level as either integer or name: 

1707 

1708 >>> mi.get_level_values(0) 

1709 Index(['a', 'b', 'c'], dtype='object', name='level_1') 

1710 >>> mi.get_level_values('level_2') 

1711 Index(['d', 'e', 'f'], dtype='object', name='level_2') 

1712 

1713 If a level contains missing values, the return type of the level 

1714 maybe casted to ``float``. 

1715 

1716 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes 

1717 level_0 int64 

1718 level_1 int64 

1719 dtype: object 

1720 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0) 

1721 Float64Index([1.0, nan, 2.0], dtype='float64') 

1722 """ 

1723 level = self._get_level_number(level) 

1724 values = self._get_level_values(level) 

1725 return values 

1726 

1727 @doc(Index.unique) 

1728 def unique(self, level=None): 

1729 

1730 if level is None: 

1731 return super().unique() 

1732 else: 

1733 level = self._get_level_number(level) 

1734 return self._get_level_values(level=level, unique=True) 

1735 

1736 def to_frame( 

1737 self, 

1738 index: bool = True, 

1739 name=lib.no_default, 

1740 allow_duplicates: bool = False, 

1741 ) -> DataFrame: 

1742 """ 

1743 Create a DataFrame with the levels of the MultiIndex as columns. 

1744 

1745 Column ordering is determined by the DataFrame constructor with data as 

1746 a dict. 

1747 

1748 Parameters 

1749 ---------- 

1750 index : bool, default True 

1751 Set the index of the returned DataFrame as the original MultiIndex. 

1752 

1753 name : list / sequence of str, optional 

1754 The passed names should substitute index level names. 

1755 

1756 allow_duplicates : bool, optional default False 

1757 Allow duplicate column labels to be created. 

1758 

1759 .. versionadded:: 1.5.0 

1760 

1761 Returns 

1762 ------- 

1763 DataFrame : a DataFrame containing the original MultiIndex data. 

1764 

1765 See Also 

1766 -------- 

1767 DataFrame : Two-dimensional, size-mutable, potentially heterogeneous 

1768 tabular data. 

1769 

1770 Examples 

1771 -------- 

1772 >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']]) 

1773 >>> mi 

1774 MultiIndex([('a', 'c'), 

1775 ('b', 'd')], 

1776 ) 

1777 

1778 >>> df = mi.to_frame() 

1779 >>> df 

1780 0 1 

1781 a c a c 

1782 b d b d 

1783 

1784 >>> df = mi.to_frame(index=False) 

1785 >>> df 

1786 0 1 

1787 0 a c 

1788 1 b d 

1789 

1790 >>> df = mi.to_frame(name=['x', 'y']) 

1791 >>> df 

1792 x y 

1793 a c a c 

1794 b d b d 

1795 """ 

1796 from pandas import DataFrame 

1797 

1798 if name is None: 

1799 warnings.warn( 

1800 "Explicitly passing `name=None` currently preserves the Index's name " 

1801 "or uses a default name of 0. This behaviour is deprecated, and in " 

1802 "the future `None` will be used as the name of the resulting " 

1803 "DataFrame column.", 

1804 FutureWarning, 

1805 stacklevel=find_stack_level(), 

1806 ) 

1807 name = lib.no_default 

1808 

1809 if name is not lib.no_default: 

1810 if not is_list_like(name): 

1811 raise TypeError("'name' must be a list / sequence of column names.") 

1812 

1813 if len(name) != len(self.levels): 

1814 raise ValueError( 

1815 "'name' should have same length as number of levels on index." 

1816 ) 

1817 idx_names = name 

1818 else: 

1819 idx_names = self._get_level_names() 

1820 

1821 if not allow_duplicates and len(set(idx_names)) != len(idx_names): 

1822 raise ValueError( 

1823 "Cannot create duplicate column labels if allow_duplicates is False" 

1824 ) 

1825 

1826 # Guarantee resulting column order - PY36+ dict maintains insertion order 

1827 result = DataFrame( 

1828 {level: self._get_level_values(level) for level in range(len(self.levels))}, 

1829 copy=False, 

1830 ) 

1831 result.columns = idx_names 

1832 

1833 if index: 

1834 result.index = self 

1835 return result 

1836 

1837 # error: Return type "Index" of "to_flat_index" incompatible with return type 

1838 # "MultiIndex" in supertype "Index" 

1839 def to_flat_index(self) -> Index: # type: ignore[override] 

1840 """ 

1841 Convert a MultiIndex to an Index of Tuples containing the level values. 

1842 

1843 Returns 

1844 ------- 

1845 pd.Index 

1846 Index with the MultiIndex data represented in Tuples. 

1847 

1848 See Also 

1849 -------- 

1850 MultiIndex.from_tuples : Convert flat index back to MultiIndex. 

1851 

1852 Notes 

1853 ----- 

1854 This method will simply return the caller if called by anything other 

1855 than a MultiIndex. 

1856 

1857 Examples 

1858 -------- 

1859 >>> index = pd.MultiIndex.from_product( 

1860 ... [['foo', 'bar'], ['baz', 'qux']], 

1861 ... names=['a', 'b']) 

1862 >>> index.to_flat_index() 

1863 Index([('foo', 'baz'), ('foo', 'qux'), 

1864 ('bar', 'baz'), ('bar', 'qux')], 

1865 dtype='object') 

1866 """ 

1867 return Index(self._values, tupleize_cols=False) 

1868 

1869 def is_lexsorted(self) -> bool: 

1870 warnings.warn( 

1871 "MultiIndex.is_lexsorted is deprecated as a public function, " 

1872 "users should use MultiIndex.is_monotonic_increasing instead.", 

1873 FutureWarning, 

1874 stacklevel=find_stack_level(), 

1875 ) 

1876 return self._is_lexsorted() 

1877 

1878 def _is_lexsorted(self) -> bool: 

1879 """ 

1880 Return True if the codes are lexicographically sorted. 

1881 

1882 Returns 

1883 ------- 

1884 bool 

1885 

1886 Examples 

1887 -------- 

1888 In the below examples, the first level of the MultiIndex is sorted because 

1889 a<b<c, so there is no need to look at the next level. 

1890 

1891 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'e', 'f']]).is_lexsorted() 

1892 True 

1893 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'f', 'e']]).is_lexsorted() 

1894 True 

1895 

1896 In case there is a tie, the lexicographical sorting looks 

1897 at the next level of the MultiIndex. 

1898 

1899 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']]).is_lexsorted() 

1900 True 

1901 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']]).is_lexsorted() 

1902 False 

1903 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], 

1904 ... ['aa', 'bb', 'aa', 'bb']]).is_lexsorted() 

1905 True 

1906 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], 

1907 ... ['bb', 'aa', 'aa', 'bb']]).is_lexsorted() 

1908 False 

1909 """ 

1910 return self._lexsort_depth == self.nlevels 

1911 

1912 @property 

1913 def lexsort_depth(self) -> int: 

1914 warnings.warn( 

1915 "MultiIndex.lexsort_depth is deprecated as a public function, " 

1916 "users should use MultiIndex.is_monotonic_increasing instead.", 

1917 FutureWarning, 

1918 stacklevel=find_stack_level(), 

1919 ) 

1920 return self._lexsort_depth 

1921 

1922 @cache_readonly 

1923 def _lexsort_depth(self) -> int: 

1924 """ 

1925 Compute and return the lexsort_depth, the number of levels of the 

1926 MultiIndex that are sorted lexically 

1927 

1928 Returns 

1929 ------- 

1930 int 

1931 """ 

1932 if self.sortorder is not None: 

1933 return self.sortorder 

1934 return _lexsort_depth(self.codes, self.nlevels) 

1935 

1936 def _sort_levels_monotonic(self) -> MultiIndex: 

1937 """ 

1938 This is an *internal* function. 

1939 

1940 Create a new MultiIndex from the current to monotonically sorted 

1941 items IN the levels. This does not actually make the entire MultiIndex 

1942 monotonic, JUST the levels. 

1943 

1944 The resulting MultiIndex will have the same outward 

1945 appearance, meaning the same .values and ordering. It will also 

1946 be .equals() to the original. 

1947 

1948 Returns 

1949 ------- 

1950 MultiIndex 

1951 

1952 Examples 

1953 -------- 

1954 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], 

1955 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) 

1956 >>> mi 

1957 MultiIndex([('a', 'bb'), 

1958 ('a', 'aa'), 

1959 ('b', 'bb'), 

1960 ('b', 'aa')], 

1961 ) 

1962 

1963 >>> mi.sort_values() 

1964 MultiIndex([('a', 'aa'), 

1965 ('a', 'bb'), 

1966 ('b', 'aa'), 

1967 ('b', 'bb')], 

1968 ) 

1969 """ 

1970 if self._is_lexsorted() and self.is_monotonic_increasing: 

1971 return self 

1972 

1973 new_levels = [] 

1974 new_codes = [] 

1975 

1976 for lev, level_codes in zip(self.levels, self.codes): 

1977 

1978 if not lev.is_monotonic_increasing: 

1979 try: 

1980 # indexer to reorder the levels 

1981 indexer = lev.argsort() 

1982 except TypeError: 

1983 pass 

1984 else: 

1985 lev = lev.take(indexer) 

1986 

1987 # indexer to reorder the level codes 

1988 indexer = ensure_platform_int(indexer) 

1989 ri = lib.get_reverse_indexer(indexer, len(indexer)) 

1990 level_codes = algos.take_nd(ri, level_codes) 

1991 

1992 new_levels.append(lev) 

1993 new_codes.append(level_codes) 

1994 

1995 return MultiIndex( 

1996 new_levels, 

1997 new_codes, 

1998 names=self.names, 

1999 sortorder=self.sortorder, 

2000 verify_integrity=False, 

2001 ) 

2002 

2003 def remove_unused_levels(self) -> MultiIndex: 

2004 """ 

2005 Create new MultiIndex from current that removes unused levels. 

2006 

2007 Unused level(s) means levels that are not expressed in the 

2008 labels. The resulting MultiIndex will have the same outward 

2009 appearance, meaning the same .values and ordering. It will 

2010 also be .equals() to the original. 

2011 

2012 Returns 

2013 ------- 

2014 MultiIndex 

2015 

2016 Examples 

2017 -------- 

2018 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')]) 

2019 >>> mi 

2020 MultiIndex([(0, 'a'), 

2021 (0, 'b'), 

2022 (1, 'a'), 

2023 (1, 'b')], 

2024 ) 

2025 

2026 >>> mi[2:] 

2027 MultiIndex([(1, 'a'), 

2028 (1, 'b')], 

2029 ) 

2030 

2031 The 0 from the first level is not represented 

2032 and can be removed 

2033 

2034 >>> mi2 = mi[2:].remove_unused_levels() 

2035 >>> mi2.levels 

2036 FrozenList([[1], ['a', 'b']]) 

2037 """ 

2038 new_levels = [] 

2039 new_codes = [] 

2040 

2041 changed = False 

2042 for lev, level_codes in zip(self.levels, self.codes): 

2043 

2044 # Since few levels are typically unused, bincount() is more 

2045 # efficient than unique() - however it only accepts positive values 

2046 # (and drops order): 

2047 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1 

2048 has_na = int(len(uniques) and (uniques[0] == -1)) 

2049 

2050 if len(uniques) != len(lev) + has_na: 

2051 

2052 if lev.isna().any() and len(uniques) == len(lev): 

2053 break 

2054 # We have unused levels 

2055 changed = True 

2056 

2057 # Recalculate uniques, now preserving order. 

2058 # Can easily be cythonized by exploiting the already existing 

2059 # "uniques" and stop parsing "level_codes" when all items 

2060 # are found: 

2061 uniques = algos.unique(level_codes) 

2062 if has_na: 

2063 na_idx = np.where(uniques == -1)[0] 

2064 # Just ensure that -1 is in first position: 

2065 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]] 

2066 

2067 # codes get mapped from uniques to 0:len(uniques) 

2068 # -1 (if present) is mapped to last position 

2069 code_mapping = np.zeros(len(lev) + has_na) 

2070 # ... and reassigned value -1: 

2071 code_mapping[uniques] = np.arange(len(uniques)) - has_na 

2072 

2073 level_codes = code_mapping[level_codes] 

2074 

2075 # new levels are simple 

2076 lev = lev.take(uniques[has_na:]) 

2077 

2078 new_levels.append(lev) 

2079 new_codes.append(level_codes) 

2080 

2081 result = self.view() 

2082 

2083 if changed: 

2084 result._reset_identity() 

2085 result._set_levels(new_levels, validate=False) 

2086 result._set_codes(new_codes, validate=False) 

2087 

2088 return result 

2089 

2090 # -------------------------------------------------------------------- 

2091 # Pickling Methods 

2092 

2093 def __reduce__(self): 

2094 """Necessary for making this object picklable""" 

2095 d = { 

2096 "levels": list(self.levels), 

2097 "codes": list(self.codes), 

2098 "sortorder": self.sortorder, 

2099 "names": list(self.names), 

2100 } 

2101 return ibase._new_Index, (type(self), d), None 

2102 

2103 # -------------------------------------------------------------------- 

2104 

2105 def __getitem__(self, key): 

2106 if is_scalar(key): 

2107 key = com.cast_scalar_indexer(key, warn_float=True) 

2108 

2109 retval = [] 

2110 for lev, level_codes in zip(self.levels, self.codes): 

2111 if level_codes[key] == -1: 

2112 retval.append(np.nan) 

2113 else: 

2114 retval.append(lev[level_codes[key]]) 

2115 

2116 return tuple(retval) 

2117 else: 

2118 # in general cannot be sure whether the result will be sorted 

2119 sortorder = None 

2120 if com.is_bool_indexer(key): 

2121 key = np.asarray(key, dtype=bool) 

2122 sortorder = self.sortorder 

2123 elif isinstance(key, slice): 

2124 if key.step is None or key.step > 0: 

2125 sortorder = self.sortorder 

2126 elif isinstance(key, Index): 

2127 key = np.asarray(key) 

2128 

2129 new_codes = [level_codes[key] for level_codes in self.codes] 

2130 

2131 return MultiIndex( 

2132 levels=self.levels, 

2133 codes=new_codes, 

2134 names=self.names, 

2135 sortorder=sortorder, 

2136 verify_integrity=False, 

2137 ) 

2138 

2139 def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex: 

2140 """ 

2141 Fastpath for __getitem__ when we know we have a slice. 

2142 """ 

2143 sortorder = None 

2144 if slobj.step is None or slobj.step > 0: 

2145 sortorder = self.sortorder 

2146 

2147 new_codes = [level_codes[slobj] for level_codes in self.codes] 

2148 

2149 return type(self)( 

2150 levels=self.levels, 

2151 codes=new_codes, 

2152 names=self._names, 

2153 sortorder=sortorder, 

2154 verify_integrity=False, 

2155 ) 

2156 

2157 @Appender(_index_shared_docs["take"] % _index_doc_kwargs) 

2158 def take( 

2159 self: MultiIndex, 

2160 indices, 

2161 axis: int = 0, 

2162 allow_fill: bool = True, 

2163 fill_value=None, 

2164 **kwargs, 

2165 ) -> MultiIndex: 

2166 nv.validate_take((), kwargs) 

2167 indices = ensure_platform_int(indices) 

2168 

2169 # only fill if we are passing a non-None fill_value 

2170 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices) 

2171 

2172 na_value = -1 

2173 

2174 taken = [lab.take(indices) for lab in self.codes] 

2175 if allow_fill: 

2176 mask = indices == -1 

2177 if mask.any(): 

2178 masked = [] 

2179 for new_label in taken: 

2180 label_values = new_label 

2181 label_values[mask] = na_value 

2182 masked.append(np.asarray(label_values)) 

2183 taken = masked 

2184 

2185 return MultiIndex( 

2186 levels=self.levels, codes=taken, names=self.names, verify_integrity=False 

2187 ) 

2188 

2189 def append(self, other): 

2190 """ 

2191 Append a collection of Index options together 

2192 

2193 Parameters 

2194 ---------- 

2195 other : Index or list/tuple of indices 

2196 

2197 Returns 

2198 ------- 

2199 appended : Index 

2200 """ 

2201 if not isinstance(other, (list, tuple)): 

2202 other = [other] 

2203 

2204 if all( 

2205 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other 

2206 ): 

2207 arrays = [] 

2208 for i in range(self.nlevels): 

2209 label = self._get_level_values(i) 

2210 appended = [o._get_level_values(i) for o in other] 

2211 arrays.append(label.append(appended)) 

2212 return MultiIndex.from_arrays(arrays, names=self.names) 

2213 

2214 to_concat = (self._values,) + tuple(k._values for k in other) 

2215 new_tuples = np.concatenate(to_concat) 

2216 

2217 # if all(isinstance(x, MultiIndex) for x in other): 

2218 try: 

2219 return MultiIndex.from_tuples(new_tuples, names=self.names) 

2220 except (TypeError, IndexError): 

2221 return Index._with_infer(new_tuples) 

2222 

2223 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: 

2224 return self._values.argsort(*args, **kwargs) 

2225 

2226 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) 

2227 def repeat(self, repeats: int, axis=None) -> MultiIndex: 

2228 nv.validate_repeat((), {"axis": axis}) 

2229 # error: Incompatible types in assignment (expression has type "ndarray", 

2230 # variable has type "int") 

2231 repeats = ensure_platform_int(repeats) # type: ignore[assignment] 

2232 return MultiIndex( 

2233 levels=self.levels, 

2234 codes=[ 

2235 level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats) 

2236 for level_codes in self.codes 

2237 ], 

2238 names=self.names, 

2239 sortorder=self.sortorder, 

2240 verify_integrity=False, 

2241 ) 

2242 

2243 def drop(self, codes, level=None, errors="raise"): 

2244 """ 

2245 Make new MultiIndex with passed list of codes deleted 

2246 

2247 Parameters 

2248 ---------- 

2249 codes : array-like 

2250 Must be a list of tuples when level is not specified 

2251 level : int or level name, default None 

2252 errors : str, default 'raise' 

2253 

2254 Returns 

2255 ------- 

2256 dropped : MultiIndex 

2257 """ 

2258 if level is not None: 

2259 return self._drop_from_level(codes, level, errors) 

2260 

2261 if not isinstance(codes, (np.ndarray, Index)): 

2262 try: 

2263 codes = com.index_labels_to_array(codes, dtype=np.dtype("object")) 

2264 except ValueError: 

2265 pass 

2266 

2267 inds = [] 

2268 for level_codes in codes: 

2269 try: 

2270 loc = self.get_loc(level_codes) 

2271 # get_loc returns either an integer, a slice, or a boolean 

2272 # mask 

2273 if isinstance(loc, int): 

2274 inds.append(loc) 

2275 elif isinstance(loc, slice): 

2276 step = loc.step if loc.step is not None else 1 

2277 inds.extend(range(loc.start, loc.stop, step)) 

2278 elif com.is_bool_indexer(loc): 

2279 if self._lexsort_depth == 0: 

2280 warnings.warn( 

2281 "dropping on a non-lexsorted multi-index " 

2282 "without a level parameter may impact performance.", 

2283 PerformanceWarning, 

2284 stacklevel=find_stack_level(), 

2285 ) 

2286 loc = loc.nonzero()[0] 

2287 inds.extend(loc) 

2288 else: 

2289 msg = f"unsupported indexer of type {type(loc)}" 

2290 raise AssertionError(msg) 

2291 except KeyError: 

2292 if errors != "ignore": 

2293 raise 

2294 

2295 return self.delete(inds) 

2296 

2297 def _drop_from_level(self, codes, level, errors="raise") -> MultiIndex: 

2298 codes = com.index_labels_to_array(codes) 

2299 i = self._get_level_number(level) 

2300 index = self.levels[i] 

2301 values = index.get_indexer(codes) 

2302 # If nan should be dropped it will equal -1 here. We have to check which values 

2303 # are not nan and equal -1, this means they are missing in the index 

2304 nan_codes = isna(codes) 

2305 values[(np.equal(nan_codes, False)) & (values == -1)] = -2 

2306 if index.shape[0] == self.shape[0]: 

2307 values[np.equal(nan_codes, True)] = -2 

2308 

2309 not_found = codes[values == -2] 

2310 if len(not_found) != 0 and errors != "ignore": 

2311 raise KeyError(f"labels {not_found} not found in level") 

2312 mask = ~algos.isin(self.codes[i], values) 

2313 

2314 return self[mask] 

2315 

2316 def swaplevel(self, i=-2, j=-1) -> MultiIndex: 

2317 """ 

2318 Swap level i with level j. 

2319 

2320 Calling this method does not change the ordering of the values. 

2321 

2322 Parameters 

2323 ---------- 

2324 i : int, str, default -2 

2325 First level of index to be swapped. Can pass level name as string. 

2326 Type of parameters can be mixed. 

2327 j : int, str, default -1 

2328 Second level of index to be swapped. Can pass level name as string. 

2329 Type of parameters can be mixed. 

2330 

2331 Returns 

2332 ------- 

2333 MultiIndex 

2334 A new MultiIndex. 

2335 

2336 See Also 

2337 -------- 

2338 Series.swaplevel : Swap levels i and j in a MultiIndex. 

2339 DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a 

2340 particular axis. 

2341 

2342 Examples 

2343 -------- 

2344 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], 

2345 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) 

2346 >>> mi 

2347 MultiIndex([('a', 'bb'), 

2348 ('a', 'aa'), 

2349 ('b', 'bb'), 

2350 ('b', 'aa')], 

2351 ) 

2352 >>> mi.swaplevel(0, 1) 

2353 MultiIndex([('bb', 'a'), 

2354 ('aa', 'a'), 

2355 ('bb', 'b'), 

2356 ('aa', 'b')], 

2357 ) 

2358 """ 

2359 new_levels = list(self.levels) 

2360 new_codes = list(self.codes) 

2361 new_names = list(self.names) 

2362 

2363 i = self._get_level_number(i) 

2364 j = self._get_level_number(j) 

2365 

2366 new_levels[i], new_levels[j] = new_levels[j], new_levels[i] 

2367 new_codes[i], new_codes[j] = new_codes[j], new_codes[i] 

2368 new_names[i], new_names[j] = new_names[j], new_names[i] 

2369 

2370 return MultiIndex( 

2371 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False 

2372 ) 

2373 

2374 def reorder_levels(self, order) -> MultiIndex: 

2375 """ 

2376 Rearrange levels using input order. May not drop or duplicate levels. 

2377 

2378 Parameters 

2379 ---------- 

2380 order : list of int or list of str 

2381 List representing new level order. Reference level by number 

2382 (position) or by key (label). 

2383 

2384 Returns 

2385 ------- 

2386 MultiIndex 

2387 

2388 Examples 

2389 -------- 

2390 >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y']) 

2391 >>> mi 

2392 MultiIndex([(1, 3), 

2393 (2, 4)], 

2394 names=['x', 'y']) 

2395 

2396 >>> mi.reorder_levels(order=[1, 0]) 

2397 MultiIndex([(3, 1), 

2398 (4, 2)], 

2399 names=['y', 'x']) 

2400 

2401 >>> mi.reorder_levels(order=['y', 'x']) 

2402 MultiIndex([(3, 1), 

2403 (4, 2)], 

2404 names=['y', 'x']) 

2405 """ 

2406 order = [self._get_level_number(i) for i in order] 

2407 if len(order) != self.nlevels: 

2408 raise AssertionError( 

2409 f"Length of order must be same as number of levels ({self.nlevels}), " 

2410 f"got {len(order)}" 

2411 ) 

2412 new_levels = [self.levels[i] for i in order] 

2413 new_codes = [self.codes[i] for i in order] 

2414 new_names = [self.names[i] for i in order] 

2415 

2416 return MultiIndex( 

2417 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False 

2418 ) 

2419 

2420 def _get_codes_for_sorting(self) -> list[Categorical]: 

2421 """ 

2422 we are categorizing our codes by using the 

2423 available categories (all, not just observed) 

2424 excluding any missing ones (-1); this is in preparation 

2425 for sorting, where we need to disambiguate that -1 is not 

2426 a valid valid 

2427 """ 

2428 

2429 def cats(level_codes): 

2430 return np.arange( 

2431 np.array(level_codes).max() + 1 if len(level_codes) else 0, 

2432 dtype=level_codes.dtype, 

2433 ) 

2434 

2435 return [ 

2436 Categorical.from_codes(level_codes, cats(level_codes), ordered=True) 

2437 for level_codes in self.codes 

2438 ] 

2439 

2440 def sortlevel( 

2441 self, level=0, ascending: bool = True, sort_remaining: bool = True 

2442 ) -> tuple[MultiIndex, npt.NDArray[np.intp]]: 

2443 """ 

2444 Sort MultiIndex at the requested level. 

2445 

2446 The result will respect the original ordering of the associated 

2447 factor at that level. 

2448 

2449 Parameters 

2450 ---------- 

2451 level : list-like, int or str, default 0 

2452 If a string is given, must be a name of the level. 

2453 If list-like must be names or ints of levels. 

2454 ascending : bool, default True 

2455 False to sort in descending order. 

2456 Can also be a list to specify a directed ordering. 

2457 sort_remaining : sort by the remaining levels after level 

2458 

2459 Returns 

2460 ------- 

2461 sorted_index : pd.MultiIndex 

2462 Resulting index. 

2463 indexer : np.ndarray[np.intp] 

2464 Indices of output values in original index. 

2465 

2466 Examples 

2467 -------- 

2468 >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]]) 

2469 >>> mi 

2470 MultiIndex([(0, 2), 

2471 (0, 1)], 

2472 ) 

2473 

2474 >>> mi.sortlevel() 

2475 (MultiIndex([(0, 1), 

2476 (0, 2)], 

2477 ), array([1, 0])) 

2478 

2479 >>> mi.sortlevel(sort_remaining=False) 

2480 (MultiIndex([(0, 2), 

2481 (0, 1)], 

2482 ), array([0, 1])) 

2483 

2484 >>> mi.sortlevel(1) 

2485 (MultiIndex([(0, 1), 

2486 (0, 2)], 

2487 ), array([1, 0])) 

2488 

2489 >>> mi.sortlevel(1, ascending=False) 

2490 (MultiIndex([(0, 2), 

2491 (0, 1)], 

2492 ), array([0, 1])) 

2493 """ 

2494 if isinstance(level, (str, int)): 

2495 level = [level] 

2496 level = [self._get_level_number(lev) for lev in level] 

2497 sortorder = None 

2498 

2499 # we have a directed ordering via ascending 

2500 if isinstance(ascending, list): 

2501 if not len(level) == len(ascending): 

2502 raise ValueError("level must have same length as ascending") 

2503 

2504 indexer = lexsort_indexer( 

2505 [self.codes[lev] for lev in level], orders=ascending 

2506 ) 

2507 

2508 # level ordering 

2509 else: 

2510 

2511 codes = list(self.codes) 

2512 shape = list(self.levshape) 

2513 

2514 # partition codes and shape 

2515 primary = tuple(codes[lev] for lev in level) 

2516 primshp = tuple(shape[lev] for lev in level) 

2517 

2518 # Reverse sorted to retain the order of 

2519 # smaller indices that needs to be removed 

2520 for lev in sorted(level, reverse=True): 

2521 codes.pop(lev) 

2522 shape.pop(lev) 

2523 

2524 if sort_remaining: 

2525 primary += primary + tuple(codes) 

2526 primshp += primshp + tuple(shape) 

2527 else: 

2528 sortorder = level[0] 

2529 

2530 indexer = indexer_from_factorized(primary, primshp, compress=False) 

2531 

2532 if not ascending: 

2533 indexer = indexer[::-1] 

2534 

2535 indexer = ensure_platform_int(indexer) 

2536 new_codes = [level_codes.take(indexer) for level_codes in self.codes] 

2537 

2538 new_index = MultiIndex( 

2539 codes=new_codes, 

2540 levels=self.levels, 

2541 names=self.names, 

2542 sortorder=sortorder, 

2543 verify_integrity=False, 

2544 ) 

2545 

2546 return new_index, indexer 

2547 

2548 def _wrap_reindex_result(self, target, indexer, preserve_names: bool): 

2549 if not isinstance(target, MultiIndex): 

2550 if indexer is None: 

2551 target = self 

2552 elif (indexer >= 0).all(): 

2553 target = self.take(indexer) 

2554 else: 

2555 try: 

2556 target = MultiIndex.from_tuples(target) 

2557 except TypeError: 

2558 # not all tuples, see test_constructor_dict_multiindex_reindex_flat 

2559 return target 

2560 

2561 target = self._maybe_preserve_names(target, preserve_names) 

2562 return target 

2563 

2564 def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index: 

2565 if ( 

2566 preserve_names 

2567 and target.nlevels == self.nlevels 

2568 and target.names != self.names 

2569 ): 

2570 target = target.copy(deep=False) 

2571 target.names = self.names 

2572 return target 

2573 

2574 # -------------------------------------------------------------------- 

2575 # Indexing Methods 

2576 

2577 def _check_indexing_error(self, key) -> None: 

2578 if not is_hashable(key) or is_iterator(key): 

2579 # We allow tuples if they are hashable, whereas other Index 

2580 # subclasses require scalar. 

2581 # We have to explicitly exclude generators, as these are hashable. 

2582 raise InvalidIndexError(key) 

2583 

2584 @cache_readonly 

2585 def _should_fallback_to_positional(self) -> bool: 

2586 """ 

2587 Should integer key(s) be treated as positional? 

2588 """ 

2589 # GH#33355 

2590 return self.levels[0]._should_fallback_to_positional 

2591 

2592 def _get_values_for_loc(self, series: Series, loc, key): 

2593 """ 

2594 Do a positional lookup on the given Series, returning either a scalar 

2595 or a Series. 

2596 

2597 Assumes that `series.index is self` 

2598 """ 

2599 new_values = series._values[loc] 

2600 if is_scalar(loc): 

2601 return new_values 

2602 

2603 if len(new_values) == 1 and not self.nlevels > 1: 

2604 # If more than one level left, we can not return a scalar 

2605 return new_values[0] 

2606 

2607 new_index = self[loc] 

2608 new_index = maybe_droplevels(new_index, key) 

2609 new_ser = series._constructor(new_values, index=new_index, name=series.name) 

2610 return new_ser.__finalize__(series) 

2611 

2612 def _get_indexer_strict( 

2613 self, key, axis_name: str 

2614 ) -> tuple[Index, npt.NDArray[np.intp]]: 

2615 

2616 keyarr = key 

2617 if not isinstance(keyarr, Index): 

2618 keyarr = com.asarray_tuplesafe(keyarr) 

2619 

2620 if len(keyarr) and not isinstance(keyarr[0], tuple): 

2621 indexer = self._get_indexer_level_0(keyarr) 

2622 

2623 self._raise_if_missing(key, indexer, axis_name) 

2624 return self[indexer], indexer 

2625 

2626 return super()._get_indexer_strict(key, axis_name) 

2627 

2628 def _raise_if_missing(self, key, indexer, axis_name: str) -> None: 

2629 keyarr = key 

2630 if not isinstance(key, Index): 

2631 keyarr = com.asarray_tuplesafe(key) 

2632 

2633 if len(keyarr) and not isinstance(keyarr[0], tuple): 

2634 # i.e. same condition for special case in MultiIndex._get_indexer_strict 

2635 

2636 mask = indexer == -1 

2637 if mask.any(): 

2638 check = self.levels[0].get_indexer(keyarr) 

2639 cmask = check == -1 

2640 if cmask.any(): 

2641 raise KeyError(f"{keyarr[cmask]} not in index") 

2642 # We get here when levels still contain values which are not 

2643 # actually in Index anymore 

2644 raise KeyError(f"{keyarr} not in index") 

2645 else: 

2646 return super()._raise_if_missing(key, indexer, axis_name) 

2647 

2648 def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]: 

2649 """ 

2650 Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`. 

2651 """ 

2652 lev = self.levels[0] 

2653 codes = self._codes[0] 

2654 cat = Categorical.from_codes(codes=codes, categories=lev) 

2655 ci = Index(cat) 

2656 return ci.get_indexer_for(target) 

2657 

2658 def get_slice_bound( 

2659 self, 

2660 label: Hashable | Sequence[Hashable], 

2661 side: Literal["left", "right"], 

2662 kind=lib.no_default, 

2663 ) -> int: 

2664 """ 

2665 For an ordered MultiIndex, compute slice bound 

2666 that corresponds to given label. 

2667 

2668 Returns leftmost (one-past-the-rightmost if `side=='right') position 

2669 of given label. 

2670 

2671 Parameters 

2672 ---------- 

2673 label : object or tuple of objects 

2674 side : {'left', 'right'} 

2675 kind : {'loc', 'getitem', None} 

2676 

2677 .. deprecated:: 1.4.0 

2678 

2679 Returns 

2680 ------- 

2681 int 

2682 Index of label. 

2683 

2684 Notes 

2685 ----- 

2686 This method only works if level 0 index of the MultiIndex is lexsorted. 

2687 

2688 Examples 

2689 -------- 

2690 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')]) 

2691 

2692 Get the locations from the leftmost 'b' in the first level 

2693 until the end of the multiindex: 

2694 

2695 >>> mi.get_slice_bound('b', side="left") 

2696 1 

2697 

2698 Like above, but if you get the locations from the rightmost 

2699 'b' in the first level and 'f' in the second level: 

2700 

2701 >>> mi.get_slice_bound(('b','f'), side="right") 

2702 3 

2703 

2704 See Also 

2705 -------- 

2706 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2707 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2708 sequence of such. 

2709 """ 

2710 self._deprecated_arg(kind, "kind", "get_slice_bound") 

2711 

2712 if not isinstance(label, tuple): 

2713 label = (label,) 

2714 return self._partial_tup_index(label, side=side) 

2715 

2716 def slice_locs( 

2717 self, start=None, end=None, step=None, kind=lib.no_default 

2718 ) -> tuple[int, int]: 

2719 """ 

2720 For an ordered MultiIndex, compute the slice locations for input 

2721 labels. 

2722 

2723 The input labels can be tuples representing partial levels, e.g. for a 

2724 MultiIndex with 3 levels, you can pass a single value (corresponding to 

2725 the first level), or a 1-, 2-, or 3-tuple. 

2726 

2727 Parameters 

2728 ---------- 

2729 start : label or tuple, default None 

2730 If None, defaults to the beginning 

2731 end : label or tuple 

2732 If None, defaults to the end 

2733 step : int or None 

2734 Slice step 

2735 kind : string, optional, defaults None 

2736 

2737 .. deprecated:: 1.4.0 

2738 

2739 Returns 

2740 ------- 

2741 (start, end) : (int, int) 

2742 

2743 Notes 

2744 ----- 

2745 This method only works if the MultiIndex is properly lexsorted. So, 

2746 if only the first 2 levels of a 3-level MultiIndex are lexsorted, 

2747 you can only pass two levels to ``.slice_locs``. 

2748 

2749 Examples 

2750 -------- 

2751 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')], 

2752 ... names=['A', 'B']) 

2753 

2754 Get the slice locations from the beginning of 'b' in the first level 

2755 until the end of the multiindex: 

2756 

2757 >>> mi.slice_locs(start='b') 

2758 (1, 4) 

2759 

2760 Like above, but stop at the end of 'b' in the first level and 'f' in 

2761 the second level: 

2762 

2763 >>> mi.slice_locs(start='b', end=('b', 'f')) 

2764 (1, 3) 

2765 

2766 See Also 

2767 -------- 

2768 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2769 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2770 sequence of such. 

2771 """ 

2772 self._deprecated_arg(kind, "kind", "slice_locs") 

2773 # This function adds nothing to its parent implementation (the magic 

2774 # happens in get_slice_bound method), but it adds meaningful doc. 

2775 return super().slice_locs(start, end, step) 

2776 

2777 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"): 

2778 if len(tup) > self._lexsort_depth: 

2779 raise UnsortedIndexError( 

2780 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " 

2781 f"({self._lexsort_depth})" 

2782 ) 

2783 

2784 n = len(tup) 

2785 start, end = 0, len(self) 

2786 zipped = zip(tup, self.levels, self.codes) 

2787 for k, (lab, lev, level_codes) in enumerate(zipped): 

2788 section = level_codes[start:end] 

2789 

2790 if lab not in lev and not isna(lab): 

2791 # short circuit 

2792 try: 

2793 loc = algos.searchsorted(lev, lab, side=side) 

2794 except TypeError as err: 

2795 # non-comparable e.g. test_slice_locs_with_type_mismatch 

2796 raise TypeError(f"Level type mismatch: {lab}") from err 

2797 if not is_integer(loc): 

2798 # non-comparable level, e.g. test_groupby_example 

2799 raise TypeError(f"Level type mismatch: {lab}") 

2800 if side == "right" and loc >= 0: 

2801 loc -= 1 

2802 return start + algos.searchsorted(section, loc, side=side) 

2803 

2804 idx = self._get_loc_single_level_index(lev, lab) 

2805 if isinstance(idx, slice) and k < n - 1: 

2806 # Get start and end value from slice, necessary when a non-integer 

2807 # interval is given as input GH#37707 

2808 start = idx.start 

2809 end = idx.stop 

2810 elif k < n - 1: 

2811 # error: Incompatible types in assignment (expression has type 

2812 # "Union[ndarray[Any, dtype[signedinteger[Any]]] 

2813 end = start + algos.searchsorted( # type: ignore[assignment] 

2814 section, idx, side="right" 

2815 ) 

2816 # error: Incompatible types in assignment (expression has type 

2817 # "Union[ndarray[Any, dtype[signedinteger[Any]]] 

2818 start = start + algos.searchsorted( # type: ignore[assignment] 

2819 section, idx, side="left" 

2820 ) 

2821 elif isinstance(idx, slice): 

2822 idx = idx.start 

2823 return start + algos.searchsorted(section, idx, side=side) 

2824 else: 

2825 return start + algos.searchsorted(section, idx, side=side) 

2826 

2827 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: 

2828 """ 

2829 If key is NA value, location of index unify as -1. 

2830 

2831 Parameters 

2832 ---------- 

2833 level_index: Index 

2834 key : label 

2835 

2836 Returns 

2837 ------- 

2838 loc : int 

2839 If key is NA value, loc is -1 

2840 Else, location of key in index. 

2841 

2842 See Also 

2843 -------- 

2844 Index.get_loc : The get_loc method for (single-level) index. 

2845 """ 

2846 if is_scalar(key) and isna(key): 

2847 return -1 

2848 else: 

2849 return level_index.get_loc(key) 

2850 

2851 def get_loc(self, key, method=None): 

2852 """ 

2853 Get location for a label or a tuple of labels. 

2854 

2855 The location is returned as an integer/slice or boolean 

2856 mask. 

2857 

2858 Parameters 

2859 ---------- 

2860 key : label or tuple of labels (one for each level) 

2861 method : None 

2862 

2863 Returns 

2864 ------- 

2865 loc : int, slice object or boolean mask 

2866 If the key is past the lexsort depth, the return may be a 

2867 boolean mask array, otherwise it is always a slice or int. 

2868 

2869 See Also 

2870 -------- 

2871 Index.get_loc : The get_loc method for (single-level) index. 

2872 MultiIndex.slice_locs : Get slice location given start label(s) and 

2873 end label(s). 

2874 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2875 sequence of such. 

2876 

2877 Notes 

2878 ----- 

2879 The key cannot be a slice, list of same-level labels, a boolean mask, 

2880 or a sequence of such. If you want to use those, use 

2881 :meth:`MultiIndex.get_locs` instead. 

2882 

2883 Examples 

2884 -------- 

2885 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) 

2886 

2887 >>> mi.get_loc('b') 

2888 slice(1, 3, None) 

2889 

2890 >>> mi.get_loc(('b', 'e')) 

2891 1 

2892 """ 

2893 if method is not None: 

2894 raise NotImplementedError( 

2895 "only the default get_loc method is " 

2896 "currently supported for MultiIndex" 

2897 ) 

2898 

2899 self._check_indexing_error(key) 

2900 

2901 def _maybe_to_slice(loc): 

2902 """convert integer indexer to boolean mask or slice if possible""" 

2903 if not isinstance(loc, np.ndarray) or loc.dtype != np.intp: 

2904 return loc 

2905 

2906 loc = lib.maybe_indices_to_slice(loc, len(self)) 

2907 if isinstance(loc, slice): 

2908 return loc 

2909 

2910 mask = np.empty(len(self), dtype="bool") 

2911 mask.fill(False) 

2912 mask[loc] = True 

2913 return mask 

2914 

2915 if not isinstance(key, tuple): 

2916 loc = self._get_level_indexer(key, level=0) 

2917 return _maybe_to_slice(loc) 

2918 

2919 keylen = len(key) 

2920 if self.nlevels < keylen: 

2921 raise KeyError( 

2922 f"Key length ({keylen}) exceeds index depth ({self.nlevels})" 

2923 ) 

2924 

2925 if keylen == self.nlevels and self.is_unique: 

2926 try: 

2927 return self._engine.get_loc(key) 

2928 except TypeError: 

2929 # e.g. test_partial_slicing_with_multiindex partial string slicing 

2930 loc, _ = self.get_loc_level(key, list(range(self.nlevels))) 

2931 return loc 

2932 

2933 # -- partial selection or non-unique index 

2934 # break the key into 2 parts based on the lexsort_depth of the index; 

2935 # the first part returns a continuous slice of the index; the 2nd part 

2936 # needs linear search within the slice 

2937 i = self._lexsort_depth 

2938 lead_key, follow_key = key[:i], key[i:] 

2939 

2940 if not lead_key: 

2941 start = 0 

2942 stop = len(self) 

2943 else: 

2944 try: 

2945 start, stop = self.slice_locs(lead_key, lead_key) 

2946 except TypeError as err: 

2947 # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col") 

2948 # when self has 5 integer levels 

2949 raise KeyError(key) from err 

2950 

2951 if start == stop: 

2952 raise KeyError(key) 

2953 

2954 if not follow_key: 

2955 return slice(start, stop) 

2956 

2957 warnings.warn( 

2958 "indexing past lexsort depth may impact performance.", 

2959 PerformanceWarning, 

2960 stacklevel=find_stack_level(), 

2961 ) 

2962 

2963 loc = np.arange(start, stop, dtype=np.intp) 

2964 

2965 for i, k in enumerate(follow_key, len(lead_key)): 

2966 mask = self.codes[i][loc] == self._get_loc_single_level_index( 

2967 self.levels[i], k 

2968 ) 

2969 if not mask.all(): 

2970 loc = loc[mask] 

2971 if not len(loc): 

2972 raise KeyError(key) 

2973 

2974 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) 

2975 

2976 def get_loc_level(self, key, level=0, drop_level: bool = True): 

2977 """ 

2978 Get location and sliced index for requested label(s)/level(s). 

2979 

2980 Parameters 

2981 ---------- 

2982 key : label or sequence of labels 

2983 level : int/level name or list thereof, optional 

2984 drop_level : bool, default True 

2985 If ``False``, the resulting index will not drop any level. 

2986 

2987 Returns 

2988 ------- 

2989 loc : A 2-tuple where the elements are: 

2990 Element 0: int, slice object or boolean array 

2991 Element 1: The resulting sliced multiindex/index. If the key 

2992 contains all levels, this will be ``None``. 

2993 

2994 See Also 

2995 -------- 

2996 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2997 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2998 sequence of such. 

2999 

3000 Examples 

3001 -------- 

3002 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')], 

3003 ... names=['A', 'B']) 

3004 

3005 >>> mi.get_loc_level('b') 

3006 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B')) 

3007 

3008 >>> mi.get_loc_level('e', level='B') 

3009 (array([False, True, False]), Index(['b'], dtype='object', name='A')) 

3010 

3011 >>> mi.get_loc_level(['b', 'e']) 

3012 (1, None) 

3013 """ 

3014 if not isinstance(level, (list, tuple)): 

3015 level = self._get_level_number(level) 

3016 else: 

3017 level = [self._get_level_number(lev) for lev in level] 

3018 

3019 loc, mi = self._get_loc_level(key, level=level) 

3020 if not drop_level: 

3021 if lib.is_integer(loc): 

3022 mi = self[loc : loc + 1] 

3023 else: 

3024 mi = self[loc] 

3025 return loc, mi 

3026 

3027 def _get_loc_level(self, key, level: int | list[int] = 0): 

3028 """ 

3029 get_loc_level but with `level` known to be positional, not name-based. 

3030 """ 

3031 

3032 # different name to distinguish from maybe_droplevels 

3033 def maybe_mi_droplevels(indexer, levels): 

3034 """ 

3035 If level does not exist or all levels were dropped, the exception 

3036 has to be handled outside. 

3037 """ 

3038 new_index = self[indexer] 

3039 

3040 for i in sorted(levels, reverse=True): 

3041 new_index = new_index._drop_level_numbers([i]) 

3042 

3043 return new_index 

3044 

3045 if isinstance(level, (tuple, list)): 

3046 if len(key) != len(level): 

3047 raise AssertionError( 

3048 "Key for location must have same length as number of levels" 

3049 ) 

3050 result = None 

3051 for lev, k in zip(level, key): 

3052 loc, new_index = self._get_loc_level(k, level=lev) 

3053 if isinstance(loc, slice): 

3054 mask = np.zeros(len(self), dtype=bool) 

3055 mask[loc] = True 

3056 loc = mask 

3057 result = loc if result is None else result & loc 

3058 

3059 try: 

3060 # FIXME: we should be only dropping levels on which we are 

3061 # scalar-indexing 

3062 mi = maybe_mi_droplevels(result, level) 

3063 except ValueError: 

3064 # droplevel failed because we tried to drop all levels, 

3065 # i.e. len(level) == self.nlevels 

3066 mi = self[result] 

3067 

3068 return result, mi 

3069 

3070 # kludge for #1796 

3071 if isinstance(key, list): 

3072 key = tuple(key) 

3073 

3074 if isinstance(key, tuple) and level == 0: 

3075 

3076 try: 

3077 # Check if this tuple is a single key in our first level 

3078 if key in self.levels[0]: 

3079 indexer = self._get_level_indexer(key, level=level) 

3080 new_index = maybe_mi_droplevels(indexer, [0]) 

3081 return indexer, new_index 

3082 except (TypeError, InvalidIndexError): 

3083 pass 

3084 

3085 if not any(isinstance(k, slice) for k in key): 

3086 

3087 if len(key) == self.nlevels and self.is_unique: 

3088 # Complete key in unique index -> standard get_loc 

3089 try: 

3090 return (self._engine.get_loc(key), None) 

3091 except KeyError as err: 

3092 raise KeyError(key) from err 

3093 except TypeError: 

3094 # e.g. partial string indexing 

3095 # test_partial_string_timestamp_multiindex 

3096 pass 

3097 

3098 # partial selection 

3099 indexer = self.get_loc(key) 

3100 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] 

3101 if len(ilevels) == self.nlevels: 

3102 if is_integer(indexer): 

3103 # we are dropping all levels 

3104 return indexer, None 

3105 

3106 # TODO: in some cases we still need to drop some levels, 

3107 # e.g. test_multiindex_perf_warn 

3108 # test_partial_string_timestamp_multiindex 

3109 ilevels = [ 

3110 i 

3111 for i in range(len(key)) 

3112 if ( 

3113 not isinstance(key[i], str) 

3114 or not self.levels[i]._supports_partial_string_indexing 

3115 ) 

3116 and key[i] != slice(None, None) 

3117 ] 

3118 if len(ilevels) == self.nlevels: 

3119 # TODO: why? 

3120 ilevels = [] 

3121 return indexer, maybe_mi_droplevels(indexer, ilevels) 

3122 

3123 else: 

3124 indexer = None 

3125 for i, k in enumerate(key): 

3126 if not isinstance(k, slice): 

3127 loc_level = self._get_level_indexer(k, level=i) 

3128 if isinstance(loc_level, slice): 

3129 if com.is_null_slice(loc_level) or com.is_full_slice( 

3130 loc_level, len(self) 

3131 ): 

3132 # everything 

3133 continue 

3134 else: 

3135 # e.g. test_xs_IndexSlice_argument_not_implemented 

3136 k_index = np.zeros(len(self), dtype=bool) 

3137 k_index[loc_level] = True 

3138 

3139 else: 

3140 k_index = loc_level 

3141 

3142 elif com.is_null_slice(k): 

3143 # taking everything, does not affect `indexer` below 

3144 continue 

3145 

3146 else: 

3147 # FIXME: this message can be inaccurate, e.g. 

3148 # test_series_varied_multiindex_alignment 

3149 raise TypeError(f"Expected label or tuple of labels, got {key}") 

3150 

3151 if indexer is None: 

3152 indexer = k_index 

3153 else: 

3154 indexer &= k_index 

3155 if indexer is None: 

3156 indexer = slice(None, None) 

3157 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] 

3158 return indexer, maybe_mi_droplevels(indexer, ilevels) 

3159 else: 

3160 indexer = self._get_level_indexer(key, level=level) 

3161 if ( 

3162 isinstance(key, str) 

3163 and self.levels[level]._supports_partial_string_indexing 

3164 ): 

3165 # check to see if we did an exact lookup vs sliced 

3166 check = self.levels[level].get_loc(key) 

3167 if not is_integer(check): 

3168 # e.g. test_partial_string_timestamp_multiindex 

3169 return indexer, self[indexer] 

3170 

3171 try: 

3172 result_index = maybe_mi_droplevels(indexer, [level]) 

3173 except ValueError: 

3174 result_index = self[indexer] 

3175 

3176 return indexer, result_index 

3177 

3178 def _get_level_indexer( 

3179 self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None 

3180 ): 

3181 # `level` kwarg is _always_ positional, never name 

3182 # return a boolean array or slice showing where the key is 

3183 # in the totality of values 

3184 # if the indexer is provided, then use this 

3185 

3186 level_index = self.levels[level] 

3187 level_codes = self.codes[level] 

3188 

3189 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): 

3190 # Compute a bool indexer to identify the positions to take. 

3191 # If we have an existing indexer, we only need to examine the 

3192 # subset of positions where the existing indexer is True. 

3193 if indexer is not None: 

3194 # we only need to look at the subset of codes where the 

3195 # existing indexer equals True 

3196 codes = codes[indexer] 

3197 

3198 if step is None or step == 1: 

3199 new_indexer = (codes >= start) & (codes < stop) 

3200 else: 

3201 r = np.arange(start, stop, step, dtype=codes.dtype) 

3202 new_indexer = algos.isin(codes, r) 

3203 

3204 if indexer is None: 

3205 return new_indexer 

3206 

3207 indexer = indexer.copy() 

3208 indexer[indexer] = new_indexer 

3209 return indexer 

3210 

3211 if isinstance(key, slice): 

3212 # handle a slice, returning a slice if we can 

3213 # otherwise a boolean indexer 

3214 step = key.step 

3215 is_negative_step = step is not None and step < 0 

3216 

3217 try: 

3218 if key.start is not None: 

3219 start = level_index.get_loc(key.start) 

3220 elif is_negative_step: 

3221 start = len(level_index) - 1 

3222 else: 

3223 start = 0 

3224 

3225 if key.stop is not None: 

3226 stop = level_index.get_loc(key.stop) 

3227 elif is_negative_step: 

3228 stop = 0 

3229 elif isinstance(start, slice): 

3230 stop = len(level_index) 

3231 else: 

3232 stop = len(level_index) - 1 

3233 except KeyError: 

3234 

3235 # we have a partial slice (like looking up a partial date 

3236 # string) 

3237 start = stop = level_index.slice_indexer(key.start, key.stop, key.step) 

3238 step = start.step 

3239 

3240 if isinstance(start, slice) or isinstance(stop, slice): 

3241 # we have a slice for start and/or stop 

3242 # a partial date slicer on a DatetimeIndex generates a slice 

3243 # note that the stop ALREADY includes the stopped point (if 

3244 # it was a string sliced) 

3245 start = getattr(start, "start", start) 

3246 stop = getattr(stop, "stop", stop) 

3247 return convert_indexer(start, stop, step) 

3248 

3249 elif level > 0 or self._lexsort_depth == 0 or step is not None: 

3250 # need to have like semantics here to right 

3251 # searching as when we are using a slice 

3252 # so adjust the stop by 1 (so we include stop) 

3253 stop = (stop - 1) if is_negative_step else (stop + 1) 

3254 return convert_indexer(start, stop, step) 

3255 else: 

3256 # sorted, so can return slice object -> view 

3257 i = algos.searchsorted(level_codes, start, side="left") 

3258 j = algos.searchsorted(level_codes, stop, side="right") 

3259 return slice(i, j, step) 

3260 

3261 else: 

3262 

3263 idx = self._get_loc_single_level_index(level_index, key) 

3264 

3265 if level > 0 or self._lexsort_depth == 0: 

3266 # Desired level is not sorted 

3267 if isinstance(idx, slice): 

3268 # test_get_loc_partial_timestamp_multiindex 

3269 locs = (level_codes >= idx.start) & (level_codes < idx.stop) 

3270 return locs 

3271 

3272 locs = np.array(level_codes == idx, dtype=bool, copy=False) 

3273 

3274 if not locs.any(): 

3275 # The label is present in self.levels[level] but unused: 

3276 raise KeyError(key) 

3277 return locs 

3278 

3279 if isinstance(idx, slice): 

3280 # e.g. test_partial_string_timestamp_multiindex 

3281 start = algos.searchsorted(level_codes, idx.start, side="left") 

3282 # NB: "left" here bc of slice semantics 

3283 end = algos.searchsorted(level_codes, idx.stop, side="left") 

3284 else: 

3285 start = algos.searchsorted(level_codes, idx, side="left") 

3286 end = algos.searchsorted(level_codes, idx, side="right") 

3287 

3288 if start == end: 

3289 # The label is present in self.levels[level] but unused: 

3290 raise KeyError(key) 

3291 return slice(start, end) 

3292 

3293 def get_locs(self, seq): 

3294 """ 

3295 Get location for a sequence of labels. 

3296 

3297 Parameters 

3298 ---------- 

3299 seq : label, slice, list, mask or a sequence of such 

3300 You should use one of the above for each level. 

3301 If a level should not be used, set it to ``slice(None)``. 

3302 

3303 Returns 

3304 ------- 

3305 numpy.ndarray 

3306 NumPy array of integers suitable for passing to iloc. 

3307 

3308 See Also 

3309 -------- 

3310 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

3311 MultiIndex.slice_locs : Get slice location given start label(s) and 

3312 end label(s). 

3313 

3314 Examples 

3315 -------- 

3316 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) 

3317 

3318 >>> mi.get_locs('b') # doctest: +SKIP 

3319 array([1, 2], dtype=int64) 

3320 

3321 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP 

3322 array([1, 2], dtype=int64) 

3323 

3324 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP 

3325 array([2], dtype=int64) 

3326 """ 

3327 

3328 # must be lexsorted to at least as many levels 

3329 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] 

3330 if true_slices and true_slices[-1] >= self._lexsort_depth: 

3331 raise UnsortedIndexError( 

3332 "MultiIndex slicing requires the index to be lexsorted: slicing " 

3333 f"on levels {true_slices}, lexsort depth {self._lexsort_depth}" 

3334 ) 

3335 

3336 if any(x is Ellipsis for x in seq): 

3337 raise NotImplementedError( 

3338 "MultiIndex does not support indexing with Ellipsis" 

3339 ) 

3340 

3341 n = len(self) 

3342 

3343 def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: 

3344 if isinstance(indexer, slice): 

3345 new_indexer = np.zeros(n, dtype=np.bool_) 

3346 new_indexer[indexer] = True 

3347 return new_indexer 

3348 return indexer 

3349 

3350 # a bool indexer for the positions we want to take 

3351 indexer: npt.NDArray[np.bool_] | None = None 

3352 

3353 for i, k in enumerate(seq): 

3354 

3355 lvl_indexer: npt.NDArray[np.bool_] | slice | None = None 

3356 

3357 if com.is_bool_indexer(k): 

3358 if len(k) != n: 

3359 raise ValueError( 

3360 "cannot index with a boolean indexer that " 

3361 "is not the same length as the index" 

3362 ) 

3363 lvl_indexer = np.asarray(k) 

3364 

3365 elif is_list_like(k): 

3366 # a collection of labels to include from this level (these are or'd) 

3367 

3368 # GH#27591 check if this is a single tuple key in the level 

3369 try: 

3370 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer) 

3371 except (InvalidIndexError, TypeError, KeyError) as err: 

3372 # InvalidIndexError e.g. non-hashable, fall back to treating 

3373 # this as a sequence of labels 

3374 # KeyError it can be ambiguous if this is a label or sequence 

3375 # of labels 

3376 # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708 

3377 for x in k: 

3378 if not is_hashable(x): 

3379 # e.g. slice 

3380 raise err 

3381 try: 

3382 item_indexer = self._get_level_indexer( 

3383 x, level=i, indexer=indexer 

3384 ) 

3385 except KeyError: 

3386 # ignore not founds; see discussion in GH#39424 

3387 warnings.warn( 

3388 "The behavior of indexing on a MultiIndex with a " 

3389 "nested sequence of labels is deprecated and will " 

3390 "change in a future version. " 

3391 "`series.loc[label, sequence]` will raise if any " 

3392 "members of 'sequence' or not present in " 

3393 "the index's second level. To retain the old " 

3394 "behavior, use `series.index.isin(sequence, level=1)`", 

3395 # TODO: how to opt in to the future behavior? 

3396 # TODO: how to handle IntervalIndex level? 

3397 # (no test cases) 

3398 FutureWarning, 

3399 stacklevel=find_stack_level(), 

3400 ) 

3401 continue 

3402 else: 

3403 if lvl_indexer is None: 

3404 lvl_indexer = _to_bool_indexer(item_indexer) 

3405 elif isinstance(item_indexer, slice): 

3406 lvl_indexer[item_indexer] = True # type: ignore[index] 

3407 else: 

3408 lvl_indexer |= item_indexer 

3409 

3410 if lvl_indexer is None: 

3411 # no matches we are done 

3412 # test_loc_getitem_duplicates_multiindex_empty_indexer 

3413 return np.array([], dtype=np.intp) 

3414 

3415 elif com.is_null_slice(k): 

3416 # empty slice 

3417 if indexer is None and i == len(seq) - 1: 

3418 return np.arange(n, dtype=np.intp) 

3419 continue 

3420 

3421 else: 

3422 # a slice or a single label 

3423 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer) 

3424 

3425 # update indexer 

3426 lvl_indexer = _to_bool_indexer(lvl_indexer) 

3427 if indexer is None: 

3428 indexer = lvl_indexer 

3429 else: 

3430 indexer &= lvl_indexer 

3431 if not np.any(indexer) and np.any(lvl_indexer): 

3432 raise KeyError(seq) 

3433 

3434 # empty indexer 

3435 if indexer is None: 

3436 return np.array([], dtype=np.intp) 

3437 

3438 pos_indexer = indexer.nonzero()[0] 

3439 return self._reorder_indexer(seq, pos_indexer) 

3440 

3441 # -------------------------------------------------------------------- 

3442 

3443 def _reorder_indexer( 

3444 self, 

3445 seq: tuple[Scalar | Iterable | AnyArrayLike, ...], 

3446 indexer: npt.NDArray[np.intp], 

3447 ) -> npt.NDArray[np.intp]: 

3448 """ 

3449 Reorder an indexer of a MultiIndex (self) so that the labels are in the 

3450 same order as given in seq 

3451 

3452 Parameters 

3453 ---------- 

3454 seq : label/slice/list/mask or a sequence of such 

3455 indexer: a position indexer of self 

3456 

3457 Returns 

3458 ------- 

3459 indexer : a sorted position indexer of self ordered as seq 

3460 """ 

3461 # If the index is lexsorted and the list_like label in seq are sorted 

3462 # then we do not need to sort 

3463 if self._is_lexsorted(): 

3464 need_sort = False 

3465 for i, k in enumerate(seq): 

3466 if is_list_like(k): 

3467 if not need_sort: 

3468 k_codes = self.levels[i].get_indexer(k) 

3469 k_codes = k_codes[k_codes >= 0] # Filter absent keys 

3470 # True if the given codes are not ordered 

3471 need_sort = (k_codes[:-1] > k_codes[1:]).any() 

3472 elif isinstance(k, slice) and k.step is not None and k.step < 0: 

3473 need_sort = True 

3474 # Bail out if both index and seq are sorted 

3475 if not need_sort: 

3476 return indexer 

3477 

3478 n = len(self) 

3479 keys: tuple[np.ndarray, ...] = () 

3480 # For each level of the sequence in seq, map the level codes with the 

3481 # order they appears in a list-like sequence 

3482 # This mapping is then use to reorder the indexer 

3483 for i, k in enumerate(seq): 

3484 if is_scalar(k): 

3485 # GH#34603 we want to treat a scalar the same as an all equal list 

3486 k = [k] 

3487 if com.is_bool_indexer(k): 

3488 new_order = np.arange(n)[indexer] 

3489 elif is_list_like(k): 

3490 # Generate a map with all level codes as sorted initially 

3491 k = algos.unique(k) 

3492 key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len( 

3493 self.levels[i] 

3494 ) 

3495 # Set order as given in the indexer list 

3496 level_indexer = self.levels[i].get_indexer(k) 

3497 level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys 

3498 key_order_map[level_indexer] = np.arange(len(level_indexer)) 

3499 

3500 new_order = key_order_map[self.codes[i][indexer]] 

3501 elif isinstance(k, slice) and k.step is not None and k.step < 0: 

3502 # flip order for negative step 

3503 new_order = np.arange(n)[::-1][indexer] 

3504 elif isinstance(k, slice) and k.start is None and k.stop is None: 

3505 # slice(None) should not determine order GH#31330 

3506 new_order = np.ones((n,))[indexer] 

3507 else: 

3508 # For all other case, use the same order as the level 

3509 new_order = np.arange(n)[indexer] 

3510 keys = (new_order,) + keys 

3511 

3512 # Find the reordering using lexsort on the keys mapping 

3513 ind = np.lexsort(keys) 

3514 return indexer[ind] 

3515 

3516 def truncate(self, before=None, after=None) -> MultiIndex: 

3517 """ 

3518 Slice index between two labels / tuples, return new MultiIndex 

3519 

3520 Parameters 

3521 ---------- 

3522 before : label or tuple, can be partial. Default None 

3523 None defaults to start 

3524 after : label or tuple, can be partial. Default None 

3525 None defaults to end 

3526 

3527 Returns 

3528 ------- 

3529 truncated : MultiIndex 

3530 """ 

3531 if after and before and after < before: 

3532 raise ValueError("after < before") 

3533 

3534 i, j = self.levels[0].slice_locs(before, after) 

3535 left, right = self.slice_locs(before, after) 

3536 

3537 new_levels = list(self.levels) 

3538 new_levels[0] = new_levels[0][i:j] 

3539 

3540 new_codes = [level_codes[left:right] for level_codes in self.codes] 

3541 new_codes[0] = new_codes[0] - i 

3542 

3543 return MultiIndex( 

3544 levels=new_levels, 

3545 codes=new_codes, 

3546 names=self._names, 

3547 verify_integrity=False, 

3548 ) 

3549 

3550 def equals(self, other: object) -> bool: 

3551 """ 

3552 Determines if two MultiIndex objects have the same labeling information 

3553 (the levels themselves do not necessarily have to be the same) 

3554 

3555 See Also 

3556 -------- 

3557 equal_levels 

3558 """ 

3559 if self.is_(other): 

3560 return True 

3561 

3562 if not isinstance(other, Index): 

3563 return False 

3564 

3565 if len(self) != len(other): 

3566 return False 

3567 

3568 if not isinstance(other, MultiIndex): 

3569 # d-level MultiIndex can equal d-tuple Index 

3570 if not self._should_compare(other): 

3571 # object Index or Categorical[object] may contain tuples 

3572 return False 

3573 return array_equivalent(self._values, other._values) 

3574 

3575 if self.nlevels != other.nlevels: 

3576 return False 

3577 

3578 for i in range(self.nlevels): 

3579 self_codes = self.codes[i] 

3580 other_codes = other.codes[i] 

3581 self_mask = self_codes == -1 

3582 other_mask = other_codes == -1 

3583 if not np.array_equal(self_mask, other_mask): 

3584 return False 

3585 self_codes = self_codes[~self_mask] 

3586 self_values = self.levels[i]._values.take(self_codes) 

3587 

3588 other_codes = other_codes[~other_mask] 

3589 other_values = other.levels[i]._values.take(other_codes) 

3590 

3591 # since we use NaT both datetime64 and timedelta64 we can have a 

3592 # situation where a level is typed say timedelta64 in self (IOW it 

3593 # has other values than NaT) but types datetime64 in other (where 

3594 # its all NaT) but these are equivalent 

3595 if len(self_values) == 0 and len(other_values) == 0: 

3596 continue 

3597 

3598 if not isinstance(self_values, np.ndarray): 

3599 # i.e. ExtensionArray 

3600 if not self_values.equals(other_values): 

3601 return False 

3602 elif not isinstance(other_values, np.ndarray): 

3603 # i.e. other is ExtensionArray 

3604 if not other_values.equals(self_values): 

3605 return False 

3606 else: 

3607 if not array_equivalent(self_values, other_values): 

3608 return False 

3609 

3610 return True 

3611 

3612 def equal_levels(self, other: MultiIndex) -> bool: 

3613 """ 

3614 Return True if the levels of both MultiIndex objects are the same 

3615 

3616 """ 

3617 if self.nlevels != other.nlevels: 

3618 return False 

3619 

3620 for i in range(self.nlevels): 

3621 if not self.levels[i].equals(other.levels[i]): 

3622 return False 

3623 return True 

3624 

3625 # -------------------------------------------------------------------- 

3626 # Set Methods 

3627 

3628 def _union(self, other, sort) -> MultiIndex: 

3629 other, result_names = self._convert_can_do_setop(other) 

3630 if ( 

3631 any(-1 in code for code in self.codes) 

3632 and any(-1 in code for code in other.codes) 

3633 or self.has_duplicates 

3634 or other.has_duplicates 

3635 ): 

3636 # This is only necessary if both sides have nans or one has dups, 

3637 # fast_unique_multiple is faster 

3638 result = super()._union(other, sort) 

3639 else: 

3640 rvals = other._values.astype(object, copy=False) 

3641 result = lib.fast_unique_multiple([self._values, rvals], sort=sort) 

3642 

3643 return MultiIndex.from_arrays(zip(*result), sortorder=None, names=result_names) 

3644 

3645 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

3646 return is_object_dtype(dtype) 

3647 

3648 def _get_reconciled_name_object(self, other) -> MultiIndex: 

3649 """ 

3650 If the result of a set operation will be self, 

3651 return self, unless the names change, in which 

3652 case make a shallow copy of self. 

3653 """ 

3654 names = self._maybe_match_names(other) 

3655 if self.names != names: 

3656 # Incompatible return value type (got "Optional[MultiIndex]", expected 

3657 # "MultiIndex") 

3658 return self.rename(names) # type: ignore[return-value] 

3659 return self 

3660 

3661 def _maybe_match_names(self, other): 

3662 """ 

3663 Try to find common names to attach to the result of an operation between 

3664 a and b. Return a consensus list of names if they match at least partly 

3665 or list of None if they have completely different names. 

3666 """ 

3667 if len(self.names) != len(other.names): 

3668 return [None] * len(self.names) 

3669 names = [] 

3670 for a_name, b_name in zip(self.names, other.names): 

3671 if a_name == b_name: 

3672 names.append(a_name) 

3673 else: 

3674 # TODO: what if they both have np.nan for their names? 

3675 names.append(None) 

3676 return names 

3677 

3678 def _wrap_intersection_result(self, other, result) -> MultiIndex: 

3679 _, result_names = self._convert_can_do_setop(other) 

3680 

3681 if len(result) == 0: 

3682 return MultiIndex( 

3683 levels=self.levels, 

3684 codes=[[]] * self.nlevels, 

3685 names=result_names, 

3686 verify_integrity=False, 

3687 ) 

3688 else: 

3689 return MultiIndex.from_arrays(zip(*result), sortorder=0, names=result_names) 

3690 

3691 def _wrap_difference_result(self, other, result) -> MultiIndex: 

3692 _, result_names = self._convert_can_do_setop(other) 

3693 

3694 if len(result) == 0: 

3695 return MultiIndex( 

3696 levels=[[]] * self.nlevels, 

3697 codes=[[]] * self.nlevels, 

3698 names=result_names, 

3699 verify_integrity=False, 

3700 ) 

3701 else: 

3702 return MultiIndex.from_tuples(result, sortorder=0, names=result_names) 

3703 

3704 def _convert_can_do_setop(self, other): 

3705 result_names = self.names 

3706 

3707 if not isinstance(other, Index): 

3708 

3709 if len(other) == 0: 

3710 return self[:0], self.names 

3711 else: 

3712 msg = "other must be a MultiIndex or a list of tuples" 

3713 try: 

3714 other = MultiIndex.from_tuples(other, names=self.names) 

3715 except (ValueError, TypeError) as err: 

3716 # ValueError raised by tuples_to_object_array if we 

3717 # have non-object dtype 

3718 raise TypeError(msg) from err 

3719 else: 

3720 result_names = get_unanimous_names(self, other) 

3721 

3722 return other, result_names 

3723 

3724 # -------------------------------------------------------------------- 

3725 

3726 @doc(Index.astype) 

3727 def astype(self, dtype, copy: bool = True): 

3728 dtype = pandas_dtype(dtype) 

3729 if is_categorical_dtype(dtype): 

3730 msg = "> 1 ndim Categorical are not supported at this time" 

3731 raise NotImplementedError(msg) 

3732 elif not is_object_dtype(dtype): 

3733 raise TypeError( 

3734 "Setting a MultiIndex dtype to anything other than object " 

3735 "is not supported" 

3736 ) 

3737 elif copy is True: 

3738 return self._view() 

3739 return self 

3740 

3741 def _validate_fill_value(self, item): 

3742 if isinstance(item, MultiIndex): 

3743 # GH#43212 

3744 if item.nlevels != self.nlevels: 

3745 raise ValueError("Item must have length equal to number of levels.") 

3746 return item._values 

3747 elif not isinstance(item, tuple): 

3748 # Pad the key with empty strings if lower levels of the key 

3749 # aren't specified: 

3750 item = (item,) + ("",) * (self.nlevels - 1) 

3751 elif len(item) != self.nlevels: 

3752 raise ValueError("Item must have length equal to number of levels.") 

3753 return item 

3754 

3755 def insert(self, loc: int, item) -> MultiIndex: 

3756 """ 

3757 Make new MultiIndex inserting new item at location 

3758 

3759 Parameters 

3760 ---------- 

3761 loc : int 

3762 item : tuple 

3763 Must be same length as number of levels in the MultiIndex 

3764 

3765 Returns 

3766 ------- 

3767 new_index : Index 

3768 """ 

3769 item = self._validate_fill_value(item) 

3770 

3771 new_levels = [] 

3772 new_codes = [] 

3773 for k, level, level_codes in zip(item, self.levels, self.codes): 

3774 if k not in level: 

3775 # have to insert into level 

3776 # must insert at end otherwise you have to recompute all the 

3777 # other codes 

3778 lev_loc = len(level) 

3779 level = level.insert(lev_loc, k) 

3780 else: 

3781 lev_loc = level.get_loc(k) 

3782 

3783 new_levels.append(level) 

3784 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc)) 

3785 

3786 return MultiIndex( 

3787 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False 

3788 ) 

3789 

3790 def delete(self, loc) -> MultiIndex: 

3791 """ 

3792 Make new index with passed location deleted 

3793 

3794 Returns 

3795 ------- 

3796 new_index : MultiIndex 

3797 """ 

3798 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes] 

3799 return MultiIndex( 

3800 levels=self.levels, 

3801 codes=new_codes, 

3802 names=self.names, 

3803 verify_integrity=False, 

3804 ) 

3805 

3806 @doc(Index.isin) 

3807 def isin(self, values, level=None) -> npt.NDArray[np.bool_]: 

3808 if level is None: 

3809 values = MultiIndex.from_tuples(values, names=self.names)._values 

3810 return algos.isin(self._values, values) 

3811 else: 

3812 num = self._get_level_number(level) 

3813 levs = self.get_level_values(num) 

3814 

3815 if levs.size == 0: 

3816 return np.zeros(len(levs), dtype=np.bool_) 

3817 return levs.isin(values) 

3818 

3819 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"]) 

3820 def set_names(self, names, level=None, inplace: bool = False) -> MultiIndex | None: 

3821 return super().set_names(names=names, level=level, inplace=inplace) 

3822 

3823 rename = set_names 

3824 

3825 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) 

3826 def drop_duplicates(self, keep: str | bool = "first") -> MultiIndex: 

3827 return super().drop_duplicates(keep=keep) 

3828 

3829 # --------------------------------------------------------------- 

3830 # Arithmetic/Numeric Methods - Disabled 

3831 

3832 __add__ = make_invalid_op("__add__") 

3833 __radd__ = make_invalid_op("__radd__") 

3834 __iadd__ = make_invalid_op("__iadd__") 

3835 __sub__ = make_invalid_op("__sub__") 

3836 __rsub__ = make_invalid_op("__rsub__") 

3837 __isub__ = make_invalid_op("__isub__") 

3838 __pow__ = make_invalid_op("__pow__") 

3839 __rpow__ = make_invalid_op("__rpow__") 

3840 __mul__ = make_invalid_op("__mul__") 

3841 __rmul__ = make_invalid_op("__rmul__") 

3842 __floordiv__ = make_invalid_op("__floordiv__") 

3843 __rfloordiv__ = make_invalid_op("__rfloordiv__") 

3844 __truediv__ = make_invalid_op("__truediv__") 

3845 __rtruediv__ = make_invalid_op("__rtruediv__") 

3846 __mod__ = make_invalid_op("__mod__") 

3847 __rmod__ = make_invalid_op("__rmod__") 

3848 __divmod__ = make_invalid_op("__divmod__") 

3849 __rdivmod__ = make_invalid_op("__rdivmod__") 

3850 # Unary methods disabled 

3851 __neg__ = make_invalid_op("__neg__") 

3852 __pos__ = make_invalid_op("__pos__") 

3853 __abs__ = make_invalid_op("__abs__") 

3854 __invert__ = make_invalid_op("__invert__") 

3855 

3856 

3857def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: 

3858 """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" 

3859 int64_codes = [ensure_int64(level_codes) for level_codes in codes] 

3860 for k in range(nlevels, 0, -1): 

3861 if libalgos.is_lexsorted(int64_codes[:k]): 

3862 return k 

3863 return 0 

3864 

3865 

3866def sparsify_labels(label_list, start: int = 0, sentinel=""): 

3867 pivoted = list(zip(*label_list)) 

3868 k = len(label_list) 

3869 

3870 result = pivoted[: start + 1] 

3871 prev = pivoted[start] 

3872 

3873 for cur in pivoted[start + 1 :]: 

3874 sparse_cur = [] 

3875 

3876 for i, (p, t) in enumerate(zip(prev, cur)): 

3877 if i == k - 1: 

3878 sparse_cur.append(t) 

3879 result.append(sparse_cur) 

3880 break 

3881 

3882 if p == t: 

3883 sparse_cur.append(sentinel) 

3884 else: 

3885 sparse_cur.extend(cur[i:]) 

3886 result.append(sparse_cur) 

3887 break 

3888 

3889 prev = cur 

3890 

3891 return list(zip(*result)) 

3892 

3893 

3894def _get_na_rep(dtype) -> str: 

3895 if is_extension_array_dtype(dtype): 

3896 return f"{dtype.na_value}" 

3897 else: 

3898 dtype = dtype.type 

3899 

3900 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") 

3901 

3902 

3903def maybe_droplevels(index: Index, key) -> Index: 

3904 """ 

3905 Attempt to drop level or levels from the given index. 

3906 

3907 Parameters 

3908 ---------- 

3909 index: Index 

3910 key : scalar or tuple 

3911 

3912 Returns 

3913 ------- 

3914 Index 

3915 """ 

3916 # drop levels 

3917 original_index = index 

3918 if isinstance(key, tuple): 

3919 for _ in key: 

3920 try: 

3921 index = index._drop_level_numbers([0]) 

3922 except ValueError: 

3923 # we have dropped too much, so back out 

3924 return original_index 

3925 else: 

3926 try: 

3927 index = index._drop_level_numbers([0]) 

3928 except ValueError: 

3929 pass 

3930 

3931 return index 

3932 

3933 

3934def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: 

3935 """ 

3936 Coerce the array-like indexer to the smallest integer dtype that can encode all 

3937 of the given categories. 

3938 

3939 Parameters 

3940 ---------- 

3941 array_like : array-like 

3942 categories : array-like 

3943 copy : bool 

3944 

3945 Returns 

3946 ------- 

3947 np.ndarray 

3948 Non-writeable. 

3949 """ 

3950 array_like = coerce_indexer_dtype(array_like, categories) 

3951 if copy: 

3952 array_like = array_like.copy() 

3953 array_like.flags.writeable = False 

3954 return array_like 

3955 

3956 

3957def _require_listlike(level, arr, arrname: str): 

3958 """ 

3959 Ensure that level is either None or listlike, and arr is list-of-listlike. 

3960 """ 

3961 if level is not None and not is_list_like(level): 

3962 if not is_list_like(arr): 

3963 raise TypeError(f"{arrname} must be list-like") 

3964 if is_list_like(arr[0]): 

3965 raise TypeError(f"{arrname} must be list-like") 

3966 level = [level] 

3967 arr = [arr] 

3968 elif level is None or is_list_like(level): 

3969 if not is_list_like(arr) or not is_list_like(arr[0]): 

3970 raise TypeError(f"{arrname} must be list of lists-like") 

3971 return level, arr