Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexing.py: 11%

1from __future__ import annotations

3from contextlib import suppress

4from typing import (

5 TYPE_CHECKING,

6 Hashable,

7 Sequence,

8 TypeVar,

9 cast,

10 final,

11)

12import warnings

14import numpy as np

16from pandas._libs.indexing import NDFrameIndexerBase

17from pandas._libs.lib import item_from_zerodim

18from pandas.errors import (

19 AbstractMethodError,

20 IndexingError,

21 InvalidIndexError,

22)

23from pandas.util._decorators import doc

24from pandas.util._exceptions import find_stack_level

26from pandas.core.dtypes.cast import (

27 can_hold_element,

28 maybe_promote,

29)

30from pandas.core.dtypes.common import (

31 is_array_like,

32 is_bool_dtype,

33 is_extension_array_dtype,

34 is_hashable,

35 is_integer,

36 is_iterator,

37 is_list_like,

38 is_numeric_dtype,

39 is_object_dtype,

40 is_scalar,

41 is_sequence,

42)

43from pandas.core.dtypes.concat import concat_compat

44from pandas.core.dtypes.generic import (

45 ABCDataFrame,

46 ABCSeries,

47)

48from pandas.core.dtypes.missing import (

49 infer_fill_value,

50 is_valid_na_for_dtype,

51 isna,

52 na_value_for_dtype,

53)

55from pandas.core import algorithms as algos

56import pandas.core.common as com

57from pandas.core.construction import (

58 array as pd_array,

59 extract_array,

60)

61from pandas.core.indexers import (

62 check_array_indexer,

63 is_empty_indexer,

64 is_list_like_indexer,

65 is_scalar_indexer,

66 length_of_indexer,

67)

68from pandas.core.indexes.api import (

69 Index,

70 MultiIndex,

71)

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from pandas import (

75 DataFrame,

76 Series,

77 )

79_LocationIndexerT = TypeVar("_LocationIndexerT", bound="_LocationIndexer")

81# "null slice"

82_NS = slice(None, None)

83_one_ellipsis_message = "indexer may only contain one '...' entry"

86# the public IndexSlicerMaker

87class _IndexSlice:

88 """

89 Create an object to more easily perform multi-index slicing.

91 See Also

92 --------

93 MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.

95 Notes

96 -----

97 See :ref:`Defined Levels <advanced.shown_levels>`

98 for further info on slicing a MultiIndex.

100 Examples

101 --------

102 >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])

103 >>> columns = ['foo', 'bar']

104 >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),

105 ... index=midx, columns=columns)

106

107 Using the default slice command:

108

109 >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]

110 foo bar

111 A0 B0 0 1

112 B1 2 3

113 A1 B0 8 9

114 B1 10 11

115

116 Using the IndexSlice class for a more intuitive command:

117

118 >>> idx = pd.IndexSlice

119 >>> dfmi.loc[idx[:, 'B0':'B1'], :]

120 foo bar

121 A0 B0 0 1

122 B1 2 3

123 A1 B0 8 9

124 B1 10 11

125 """

126

127 def __getitem__(self, arg):

128 return arg

129

130

131IndexSlice = _IndexSlice()

132

133

134class IndexingMixin:

135 """

136 Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series.

137 """

138

139 @property

140 def iloc(self) -> _iLocIndexer:

141 """

142 Purely integer-location based indexing for selection by position.

143

144 ``.iloc[]`` is primarily integer position based (from ``0`` to

145 ``length-1`` of the axis), but may also be used with a boolean

146 array.

147

148 Allowed inputs are:

149

150 - An integer, e.g. ``5``.

151 - A list or array of integers, e.g. ``[4, 3, 0]``.

152 - A slice object with ints, e.g. ``1:7``.

153 - A boolean array.

154 - A ``callable`` function with one argument (the calling Series or

155 DataFrame) and that returns valid output for indexing (one of the above).

156 This is useful in method chains, when you don't have a reference to the

157 calling object, but would like to base your selection on some value.

158 - A tuple of row and column indexes. The tuple elements consist of one of the

159 above inputs, e.g. ``(0, 1)``.

160

161 ``.iloc`` will raise ``IndexError`` if a requested indexer is

162 out-of-bounds, except *slice* indexers which allow out-of-bounds

163 indexing (this conforms with python/numpy *slice* semantics).

164

165 See more at :ref:`Selection by Position <indexing.integer>`.

166

167 See Also

168 --------

169 DataFrame.iat : Fast integer location scalar accessor.

170 DataFrame.loc : Purely label-location based indexer for selection by label.

171 Series.iloc : Purely integer-location based indexing for

172 selection by position.

173

174 Examples

175 --------

176 >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},

177 ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},

178 ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]

179 >>> df = pd.DataFrame(mydict)

180 >>> df

181 a b c d

182 0 1 2 3 4

183 1 100 200 300 400

184 2 1000 2000 3000 4000

185

186 **Indexing just the rows**

187

188 With a scalar integer.

189

190 >>> type(df.iloc[0])

191 <class 'pandas.core.series.Series'>

192 >>> df.iloc[0]

193 a 1

194 b 2

195 c 3

196 d 4

197 Name: 0, dtype: int64

198

199 With a list of integers.

200

201 >>> df.iloc[[0]]

202 a b c d

203 0 1 2 3 4

204 >>> type(df.iloc[[0]])

205 <class 'pandas.core.frame.DataFrame'>

206

207 >>> df.iloc[[0, 1]]

208 a b c d

209 0 1 2 3 4

210 1 100 200 300 400

211

212 With a `slice` object.

213

214 >>> df.iloc[:3]

215 a b c d

216 0 1 2 3 4

217 1 100 200 300 400

218 2 1000 2000 3000 4000

219

220 With a boolean mask the same length as the index.

221

222 >>> df.iloc[[True, False, True]]

223 a b c d

224 0 1 2 3 4

225 2 1000 2000 3000 4000

226

227 With a callable, useful in method chains. The `x` passed

228 to the ``lambda`` is the DataFrame being sliced. This selects

229 the rows whose index label even.

230

231 >>> df.iloc[lambda x: x.index % 2 == 0]

232 a b c d

233 0 1 2 3 4

234 2 1000 2000 3000 4000

235

236 **Indexing both axes**

237

238 You can mix the indexer types for the index and columns. Use ``:`` to

239 select the entire axis.

240

241 With scalar integers.

242

243 >>> df.iloc[0, 1]

244 2

245

246 With lists of integers.

247

248 >>> df.iloc[[0, 2], [1, 3]]

249 b d

250 0 2 4

251 2 2000 4000

252

253 With `slice` objects.

254

255 >>> df.iloc[1:3, 0:3]

256 a b c

257 1 100 200 300

258 2 1000 2000 3000

259

260 With a boolean array whose length matches the columns.

261

262 >>> df.iloc[:, [True, False, True, False]]

263 a c

264 0 1 3

265 1 100 300

266 2 1000 3000

267

268 With a callable function that expects the Series or DataFrame.

269

270 >>> df.iloc[:, lambda df: [0, 2]]

271 a c

272 0 1 3

273 1 100 300

274 2 1000 3000

275 """

276 return _iLocIndexer("iloc", self)

277

278 @property

279 def loc(self) -> _LocIndexer:

280 """

281 Access a group of rows and columns by label(s) or a boolean array.

282

283 ``.loc[]`` is primarily label based, but may also be used with a

284 boolean array.

285

286 Allowed inputs are:

287

288 - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is

289 interpreted as a *label* of the index, and **never** as an

290 integer position along the index).

291 - A list or array of labels, e.g. ``['a', 'b', 'c']``.

292 - A slice object with labels, e.g. ``'a':'f'``.

293

294 .. warning:: Note that contrary to usual python slices, **both** the

295 start and the stop are included

296

297 - A boolean array of the same length as the axis being sliced,

298 e.g. ``[True, False, True]``.

299 - An alignable boolean Series. The index of the key will be aligned before

300 masking.

301 - An alignable Index. The Index of the returned selection will be the input.

302 - A ``callable`` function with one argument (the calling Series or

303 DataFrame) and that returns valid output for indexing (one of the above)

304

305 See more at :ref:`Selection by Label <indexing.label>`.

306

307 Raises

308 ------

309 KeyError

310 If any items are not found.

311 IndexingError

312 If an indexed key is passed and its index is unalignable to the frame index.

313

314 See Also

315 --------

316 DataFrame.at : Access a single value for a row/column label pair.

317 DataFrame.iloc : Access group of rows and columns by integer position(s).

318 DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the

319 Series/DataFrame.

320 Series.loc : Access group of values using labels.

321

322 Examples

323 --------

324 **Getting values**

325

326 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],

327 ... index=['cobra', 'viper', 'sidewinder'],

328 ... columns=['max_speed', 'shield'])

329 >>> df

330 max_speed shield

331 cobra 1 2

332 viper 4 5

333 sidewinder 7 8

334

335 Single label. Note this returns the row as a Series.

336

337 >>> df.loc['viper']

338 max_speed 4

339 shield 5

340 Name: viper, dtype: int64

341

342 List of labels. Note using ``[[]]`` returns a DataFrame.

343

344 >>> df.loc[['viper', 'sidewinder']]

345 max_speed shield

346 viper 4 5

347 sidewinder 7 8

348

349 Single label for row and column

350

351 >>> df.loc['cobra', 'shield']

352 2

353

354 Slice with labels for row and single label for column. As mentioned

355 above, note that both the start and stop of the slice are included.

356

357 >>> df.loc['cobra':'viper', 'max_speed']

358 cobra 1

359 viper 4

360 Name: max_speed, dtype: int64

361

362 Boolean list with the same length as the row axis

363

364 >>> df.loc[[False, False, True]]

365 max_speed shield

366 sidewinder 7 8

367

368 Alignable boolean Series:

369

370 >>> df.loc[pd.Series([False, True, False],

371 ... index=['viper', 'sidewinder', 'cobra'])]

372 max_speed shield

373 sidewinder 7 8

374

375 Index (same behavior as ``df.reindex``)

376

377 >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]

378 max_speed shield

379 foo

380 cobra 1 2

381 viper 4 5

382

383 Conditional that returns a boolean Series

384

385 >>> df.loc[df['shield'] > 6]

386 max_speed shield

387 sidewinder 7 8

388

389 Conditional that returns a boolean Series with column labels specified

390

391 >>> df.loc[df['shield'] > 6, ['max_speed']]

392 max_speed

393 sidewinder 7

394

395 Callable that returns a boolean Series

396

397 >>> df.loc[lambda df: df['shield'] == 8]

398 max_speed shield

399 sidewinder 7 8

400

401 **Setting values**

402

403 Set value for all items matching the list of labels

404

405 >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50

406 >>> df

407 max_speed shield

408 cobra 1 2

409 viper 4 50

410 sidewinder 7 50

411

412 Set value for an entire row

413

414 >>> df.loc['cobra'] = 10

415 >>> df

416 max_speed shield

417 cobra 10 10

418 viper 4 50

419 sidewinder 7 50

420

421 Set value for an entire column

422

423 >>> df.loc[:, 'max_speed'] = 30

424 >>> df

425 max_speed shield

426 cobra 30 10

427 viper 30 50

428 sidewinder 30 50

429

430 Set value for rows matching callable condition

431

432 >>> df.loc[df['shield'] > 35] = 0

433 >>> df

434 max_speed shield

435 cobra 30 10

436 viper 0 0

437 sidewinder 0 0

438

439 **Getting values on a DataFrame with an index that has integer labels**

440

441 Another example using integers for the index

442

443 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],

444 ... index=[7, 8, 9], columns=['max_speed', 'shield'])

445 >>> df

446 max_speed shield

447 7 1 2

448 8 4 5

449 9 7 8

450

451 Slice with integer labels for rows. As mentioned above, note that both

452 the start and stop of the slice are included.

453

454 >>> df.loc[7:9]

455 max_speed shield

456 7 1 2

457 8 4 5

458 9 7 8

459

460 **Getting values with a MultiIndex**

461

462 A number of examples using a DataFrame with a MultiIndex

463

464 >>> tuples = [

465 ... ('cobra', 'mark i'), ('cobra', 'mark ii'),

466 ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),

467 ... ('viper', 'mark ii'), ('viper', 'mark iii')

468 ... ]

469 >>> index = pd.MultiIndex.from_tuples(tuples)

470 >>> values = [[12, 2], [0, 4], [10, 20],

471 ... [1, 4], [7, 1], [16, 36]]

472 >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)

473 >>> df

474 max_speed shield

475 cobra mark i 12 2

476 mark ii 0 4

477 sidewinder mark i 10 20

478 mark ii 1 4

479 viper mark ii 7 1

480 mark iii 16 36

481

482 Single label. Note this returns a DataFrame with a single index.

483

484 >>> df.loc['cobra']

485 max_speed shield

486 mark i 12 2

487 mark ii 0 4

488

489 Single index tuple. Note this returns a Series.

490

491 >>> df.loc[('cobra', 'mark ii')]

492 max_speed 0

493 shield 4

494 Name: (cobra, mark ii), dtype: int64

495

496 Single label for row and column. Similar to passing in a tuple, this

497 returns a Series.

498

499 >>> df.loc['cobra', 'mark i']

500 max_speed 12

501 shield 2

502 Name: (cobra, mark i), dtype: int64

503

504 Single tuple. Note using ``[[]]`` returns a DataFrame.

505

506 >>> df.loc[[('cobra', 'mark ii')]]

507 max_speed shield

508 cobra mark ii 0 4

509

510 Single tuple for the index with a single label for the column

511

512 >>> df.loc[('cobra', 'mark i'), 'shield']

513 2

514

515 Slice from index tuple to single label

516

517 >>> df.loc[('cobra', 'mark i'):'viper']

518 max_speed shield

519 cobra mark i 12 2

520 mark ii 0 4

521 sidewinder mark i 10 20

522 mark ii 1 4

523 viper mark ii 7 1

524 mark iii 16 36

525

526 Slice from index tuple to index tuple

527

528 >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]

529 max_speed shield

530 cobra mark i 12 2

531 mark ii 0 4

532 sidewinder mark i 10 20

533 mark ii 1 4

534 viper mark ii 7 1

535

536 Please see the :ref:`user guide<advanced.advanced_hierarchical>`

537 for more details and explanations of advanced indexing.

538 """

539 return _LocIndexer("loc", self)

540

541 @property

542 def at(self) -> _AtIndexer:

543 """

544 Access a single value for a row/column label pair.

545

546 Similar to ``loc``, in that both provide label-based lookups. Use

547 ``at`` if you only need to get or set a single value in a DataFrame

548 or Series.

549

550 Raises

551 ------

552 KeyError

553 * If getting a value and 'label' does not exist in a DataFrame or

554 Series.

555 ValueError

556 * If row/column label pair is not a tuple or if any label from

557 the pair is not a scalar for DataFrame.

558 * If label is list-like (*excluding* NamedTuple) for Series.

559

560 See Also

561 --------

562 DataFrame.at : Access a single value for a row/column pair by label.

563 DataFrame.iat : Access a single value for a row/column pair by integer

564 position.

565 DataFrame.loc : Access a group of rows and columns by label(s).

566 DataFrame.iloc : Access a group of rows and columns by integer

567 position(s).

568 Series.at : Access a single value by label.

569 Series.iat : Access a single value by integer position.

570 Series.loc : Access a group of rows by label(s).

571 Series.iloc : Access a group of rows by integer position(s).

572

573 Notes

574 -----

575 See :ref:`Fast scalar value getting and setting <indexing.basics.get_value>`

576 for more details.

577

578 Examples

579 --------

580 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],

581 ... index=[4, 5, 6], columns=['A', 'B', 'C'])

582 >>> df

583 A B C

584 4 0 2 3

585 5 0 4 1

586 6 10 20 30

587

588 Get value at specified row/column pair

589

590 >>> df.at[4, 'B']

591 2

592

593 Set value at specified row/column pair

594

595 >>> df.at[4, 'B'] = 10

596 >>> df.at[4, 'B']

597 10

598

599 Get value within a Series

600

601 >>> df.loc[5].at['B']

602 4

603 """

604 return _AtIndexer("at", self)

605

606 @property

607 def iat(self) -> _iAtIndexer:

608 """

609 Access a single value for a row/column pair by integer position.

610

611 Similar to ``iloc``, in that both provide integer-based lookups. Use

612 ``iat`` if you only need to get or set a single value in a DataFrame

613 or Series.

614

615 Raises

616 ------

617 IndexError

618 When integer position is out of bounds.

619

620 See Also

621 --------

622 DataFrame.at : Access a single value for a row/column label pair.

623 DataFrame.loc : Access a group of rows and columns by label(s).

624 DataFrame.iloc : Access a group of rows and columns by integer position(s).

625

626 Examples

627 --------

628 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],

629 ... columns=['A', 'B', 'C'])

630 >>> df

631 A B C

632 0 0 2 3

633 1 0 4 1

634 2 10 20 30

635

636 Get value at specified row/column pair

637

638 >>> df.iat[1, 2]

639 1

640

641 Set value at specified row/column pair

642

643 >>> df.iat[1, 2] = 10

644 >>> df.iat[1, 2]

645 10

646

647 Get value within a series

648

649 >>> df.loc[0].iat[1]

650 2

651 """

652 return _iAtIndexer("iat", self)

653

654

655class _LocationIndexer(NDFrameIndexerBase):

656 _valid_types: str

657 axis: int | None = None

658

659 # sub-classes need to set _takeable

660 _takeable: bool

661

662 @final

663 def __call__(self: _LocationIndexerT, axis=None) -> _LocationIndexerT:

664 # we need to return a copy of ourselves

665 new_self = type(self)(self.name, self.obj)

666

667 if axis is not None:

668 axis = self.obj._get_axis_number(axis)

669 new_self.axis = axis

670 return new_self

671

672 def _get_setitem_indexer(self, key):

673 """

674 Convert a potentially-label-based key into a positional indexer.

675 """

676 if self.name == "loc":

677 # always holds here bc iloc overrides _get_setitem_indexer

678 self._ensure_listlike_indexer(key)

679

680 if isinstance(key, tuple):

681 for x in key:

682 check_deprecated_indexers(x)

683

684 if self.axis is not None:

685 key = _tupleize_axis_indexer(self.ndim, self.axis, key)

686

687 ax = self.obj._get_axis(0)

688

689 if isinstance(ax, MultiIndex) and self.name != "iloc" and is_hashable(key):

690 with suppress(KeyError, InvalidIndexError):

691 # TypeError e.g. passed a bool

692 return ax.get_loc(key)

693

694 if isinstance(key, tuple):

695 with suppress(IndexingError):

696 # suppress "Too many indexers"

697 return self._convert_tuple(key)

698

699 if isinstance(key, range):

700 # GH#45479 test_loc_setitem_range_key

701 key = list(key)

702

703 return self._convert_to_indexer(key, axis=0)

704

705 @final

706 def _maybe_mask_setitem_value(self, indexer, value):

707 """

708 If we have obj.iloc[mask] = series_or_frame and series_or_frame has the

709 same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],

710 similar to Series.__setitem__.

711

712 Note this is only for loc, not iloc.

713 """

714

715 if (

716 isinstance(indexer, tuple)

717 and len(indexer) == 2

718 and isinstance(value, (ABCSeries, ABCDataFrame))

719 ):

720 pi, icols = indexer

721 ndim = value.ndim

722 if com.is_bool_indexer(pi) and len(value) == len(pi):

723 newkey = pi.nonzero()[0]

724

725 if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:

726 # e.g. test_loc_setitem_boolean_mask_allfalse

727 if len(newkey) == 0:

728 # FIXME: kludge for test_loc_setitem_boolean_mask_allfalse

729 # TODO(GH#45333): may be fixed when deprecation is enforced

730

731 value = value.iloc[:0]

732 else:

733 # test_loc_setitem_ndframe_values_alignment

734 value = self.obj.iloc._align_series(indexer, value)

735 indexer = (newkey, icols)

736

737 elif (

738 isinstance(icols, np.ndarray)

739 and icols.dtype.kind == "i"

740 and len(icols) == 1

741 ):

742 if ndim == 1:

743 # We implicitly broadcast, though numpy does not, see

744 # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825

745 if len(newkey) == 0:

746 # FIXME: kludge for

747 # test_setitem_loc_only_false_indexer_dtype_changed

748 # TODO(GH#45333): may be fixed when deprecation is enforced

749 value = value.iloc[:0]

750 else:

751 # test_loc_setitem_ndframe_values_alignment

752 value = self.obj.iloc._align_series(indexer, value)

753 indexer = (newkey, icols)

754

755 elif ndim == 2 and value.shape[1] == 1:

756 if len(newkey) == 0:

757 # FIXME: kludge for

758 # test_loc_setitem_all_false_boolean_two_blocks

759 # TODO(GH#45333): may be fixed when deprecation is enforced

760 value = value.iloc[:0]

761 else:

762 # test_loc_setitem_ndframe_values_alignment

763 value = self.obj.iloc._align_frame(indexer, value)

764 indexer = (newkey, icols)

765 elif com.is_bool_indexer(indexer):

766 indexer = indexer.nonzero()[0]

767

768 return indexer, value

769

770 @final

771 def _ensure_listlike_indexer(self, key, axis=None, value=None):

772 """

773 Ensure that a list-like of column labels are all present by adding them if

774 they do not already exist.

775

776 Parameters

777 ----------

778 key : list-like of column labels

779 Target labels.

780 axis : key axis if known

781 """

782 column_axis = 1

783

784 # column only exists in 2-dimensional DataFrame

785 if self.ndim != 2:

786 return

787

788 if isinstance(key, tuple) and len(key) > 1:

789 # key may be a tuple if we are .loc

790 # if length of key is > 1 set key to column part

791 key = key[column_axis]

792 axis = column_axis

793

794 if (

795 axis == column_axis

796 and not isinstance(self.obj.columns, MultiIndex)

797 and is_list_like_indexer(key)

798 and not com.is_bool_indexer(key)

799 and all(is_hashable(k) for k in key)

800 ):

801 # GH#38148

802 keys = self.obj.columns.union(key, sort=False)

803

804 self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)

805

806 @final

807 def __setitem__(self, key, value) -> None:

808 check_deprecated_indexers(key)

809 if isinstance(key, tuple):

810 key = tuple(list(x) if is_iterator(x) else x for x in key)

811 key = tuple(com.apply_if_callable(x, self.obj) for x in key)

812 else:

813 key = com.apply_if_callable(key, self.obj)

814 indexer = self._get_setitem_indexer(key)

815 self._has_valid_setitem_indexer(key)

816

817 iloc = self if self.name == "iloc" else self.obj.iloc

818 iloc._setitem_with_indexer(indexer, value, self.name)

819

820 def _validate_key(self, key, axis: int):

821 """

822 Ensure that key is valid for current indexer.

823

824 Parameters

825 ----------

826 key : scalar, slice or list-like

827 Key requested.

828 axis : int

829 Dimension on which the indexing is being made.

830

831 Raises

832 ------

833 TypeError

834 If the key (or some element of it) has wrong type.

835 IndexError

836 If the key (or some element of it) is out of bounds.

837 KeyError

838 If the key was not found.

839 """

840 raise AbstractMethodError(self)

841

842 @final

843 def _expand_ellipsis(self, tup: tuple) -> tuple:

844 """

845 If a tuple key includes an Ellipsis, replace it with an appropriate

846 number of null slices.

847 """

848 if any(x is Ellipsis for x in tup):

849 if tup.count(Ellipsis) > 1:

850 raise IndexingError(_one_ellipsis_message)

851

852 if len(tup) == self.ndim:

853 # It is unambiguous what axis this Ellipsis is indexing,

854 # treat as a single null slice.

855 i = tup.index(Ellipsis)

856 # FIXME: this assumes only one Ellipsis

857 new_key = tup[:i] + (_NS,) + tup[i + 1 :]

858 return new_key

859

860 # TODO: other cases? only one test gets here, and that is covered

861 # by _validate_key_length

862 return tup

863

864 @final

865 def _validate_tuple_indexer(self, key: tuple) -> tuple:

866 """

867 Check the key for valid keys across my indexer.

868 """

869 key = self._validate_key_length(key)

870 key = self._expand_ellipsis(key)

871 for i, k in enumerate(key):

872 try:

873 self._validate_key(k, i)

874 except ValueError as err:

875 raise ValueError(

876 "Location based indexing can only have "

877 f"[{self._valid_types}] types"

878 ) from err

879 return key

880

881 @final

882 def _is_nested_tuple_indexer(self, tup: tuple) -> bool:

883 """

884 Returns

885 -------

886 bool

887 """

888 if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):

889 return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)

890 return False

891

892 @final

893 def _convert_tuple(self, key: tuple) -> tuple:

894 # Note: we assume _tupleize_axis_indexer has been called, if necessary.

895 self._validate_key_length(key)

896 keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]

897 return tuple(keyidx)

898

899 @final

900 def _validate_key_length(self, key: tuple) -> tuple:

901 if len(key) > self.ndim:

902 if key[0] is Ellipsis:

903 # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]

904 key = key[1:]

905 if Ellipsis in key:

906 raise IndexingError(_one_ellipsis_message)

907 return self._validate_key_length(key)

908 raise IndexingError("Too many indexers")

909 return key

910

911 @final

912 def _getitem_tuple_same_dim(self, tup: tuple):

913 """

914 Index with indexers that should return an object of the same dimension

915 as self.obj.

916

917 This is only called after a failed call to _getitem_lowerdim.

918 """

919 retval = self.obj

920 for i, key in enumerate(tup):

921 if com.is_null_slice(key):

922 continue

923

924 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)

925 # We should never have retval.ndim < self.ndim, as that should

926 # be handled by the _getitem_lowerdim call above.

927 assert retval.ndim == self.ndim

928

929 return retval

930

931 @final

932 def _getitem_lowerdim(self, tup: tuple):

933

934 # we can directly get the axis result since the axis is specified

935 if self.axis is not None:

936 axis = self.obj._get_axis_number(self.axis)

937 return self._getitem_axis(tup, axis=axis)

938

939 # we may have a nested tuples indexer here

940 if self._is_nested_tuple_indexer(tup):

941 return self._getitem_nested_tuple(tup)

942

943 # we maybe be using a tuple to represent multiple dimensions here

944 ax0 = self.obj._get_axis(0)

945 # ...but iloc should handle the tuple as simple integer-location

946 # instead of checking it as multiindex representation (GH 13797)

947 if (

948 isinstance(ax0, MultiIndex)

949 and self.name != "iloc"

950 and not any(isinstance(x, slice) for x in tup)

951 ):

952 # Note: in all extant test cases, replacing the slice condition with

953 # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)`

954 # is equivalent.

955 # (see the other place where we call _handle_lowerdim_multi_index_axis0)

956 with suppress(IndexingError):

957 # error "_LocationIndexer" has no attribute

958 # "_handle_lowerdim_multi_index_axis0"

959 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup)

960

961 tup = self._validate_key_length(tup)

962

963 for i, key in enumerate(tup):

964 if is_label_like(key):

965 # We don't need to check for tuples here because those are

966 # caught by the _is_nested_tuple_indexer check above.

967 section = self._getitem_axis(key, axis=i)

968

969 # We should never have a scalar section here, because

970 # _getitem_lowerdim is only called after a check for

971 # is_scalar_access, which that would be.

972 if section.ndim == self.ndim:

973 # we're in the middle of slicing through a MultiIndex

974 # revise the key wrt to `section` by inserting an _NS

975 new_key = tup[:i] + (_NS,) + tup[i + 1 :]

976

977 else:

978 # Note: the section.ndim == self.ndim check above

979 # rules out having DataFrame here, so we dont need to worry

980 # about transposing.

981 new_key = tup[:i] + tup[i + 1 :]

982

983 if len(new_key) == 1:

984 new_key = new_key[0]

985

986 # Slices should return views, but calling iloc/loc with a null

987 # slice returns a new object.

988 if com.is_null_slice(new_key):

989 return section

990 # This is an elided recursive call to iloc/loc

991 return getattr(section, self.name)[new_key]

992

993 raise IndexingError("not applicable")

994

995 @final

996 def _getitem_nested_tuple(self, tup: tuple):

997 # we have a nested tuple so have at least 1 multi-index level

998 # we should be able to match up the dimensionality here

999

1000 for key in tup:

1001 check_deprecated_indexers(key)

1002

1003 # we have too many indexers for our dim, but have at least 1

1004 # multi-index dimension, try to see if we have something like

1005 # a tuple passed to a series with a multi-index

1006 if len(tup) > self.ndim:

1007 if self.name != "loc":

1008 # This should never be reached, but let's be explicit about it

1009 raise ValueError("Too many indices") # pragma: no cover

1010 if all(is_hashable(x) or com.is_null_slice(x) for x in tup):

1011 # GH#10521 Series should reduce MultiIndex dimensions instead of

1012 # DataFrame, IndexingError is not raised when slice(None,None,None)

1013 # with one row.

1014 with suppress(IndexingError):

1015 # error "_LocationIndexer" has no attribute

1016 # "_handle_lowerdim_multi_index_axis0"

1017 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(

1018 tup

1019 )

1020 elif isinstance(self.obj, ABCSeries) and any(

1021 isinstance(k, tuple) for k in tup

1022 ):

1023 # GH#35349 Raise if tuple in tuple for series

1024 # Do this after the all-hashable-or-null-slice check so that

1025 # we are only getting non-hashable tuples, in particular ones

1026 # that themselves contain a slice entry

1027 # See test_loc_series_getitem_too_many_dimensions

1028 raise IndexingError("Too many indexers")

1029

1030 # this is a series with a multi-index specified a tuple of

1031 # selectors

1032 axis = self.axis or 0

1033 return self._getitem_axis(tup, axis=axis)

1034

1035 # handle the multi-axis by taking sections and reducing

1036 # this is iterative

1037 obj = self.obj

1038 # GH#41369 Loop in reverse order ensures indexing along columns before rows

1039 # which selects only necessary blocks which avoids dtype conversion if possible

1040 axis = len(tup) - 1

1041 for key in tup[::-1]:

1042

1043 if com.is_null_slice(key):

1044 axis -= 1

1045 continue

1046

1047 obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)

1048 axis -= 1

1049

1050 # if we have a scalar, we are done

1051 if is_scalar(obj) or not hasattr(obj, "ndim"):

1052 break

1053

1054 return obj

1055

1056 def _convert_to_indexer(self, key, axis: int):

1057 raise AbstractMethodError(self)

1058

1059 @final

1060 def __getitem__(self, key):

1061 check_deprecated_indexers(key)

1062 if type(key) is tuple:

1063 key = tuple(list(x) if is_iterator(x) else x for x in key)

1064 key = tuple(com.apply_if_callable(x, self.obj) for x in key)

1065 if self._is_scalar_access(key):

1066 return self.obj._get_value(*key, takeable=self._takeable)

1067 return self._getitem_tuple(key)

1068 else:

1069 # we by definition only have the 0th axis

1070 axis = self.axis or 0

1071

1072 maybe_callable = com.apply_if_callable(key, self.obj)

1073 return self._getitem_axis(maybe_callable, axis=axis)

1074

1075 def _is_scalar_access(self, key: tuple):

1076 raise NotImplementedError()

1077

1078 def _getitem_tuple(self, tup: tuple):

1079 raise AbstractMethodError(self)

1080

1081 def _getitem_axis(self, key, axis: int):

1082 raise NotImplementedError()

1083

1084 def _has_valid_setitem_indexer(self, indexer) -> bool:

1085 raise AbstractMethodError(self)

1086

1087 @final

1088 def _getbool_axis(self, key, axis: int):

1089 # caller is responsible for ensuring non-None axis

1090 labels = self.obj._get_axis(axis)

1091 key = check_bool_indexer(labels, key)

1092 inds = key.nonzero()[0]

1093 return self.obj._take_with_is_copy(inds, axis=axis)

1094

1095

1096@doc(IndexingMixin.loc)

1097class _LocIndexer(_LocationIndexer):

1098 _takeable: bool = False

1099 _valid_types = (

1100 "labels (MUST BE IN THE INDEX), slices of labels (BOTH "

1101 "endpoints included! Can be slices of integers if the "

1102 "index is integers), listlike of labels, boolean"

1103 )

1104

1105 # -------------------------------------------------------------------

1106 # Key Checks

1107

1108 @doc(_LocationIndexer._validate_key)

1109 def _validate_key(self, key, axis: int):

1110 # valid for a collection of labels (we check their presence later)

1111 # slice of labels (where start-end in labels)

1112 # slice of integers (only if in the labels)

1113 # boolean not in slice and with boolean index

1114 if isinstance(key, bool) and not (

1115 is_bool_dtype(self.obj._get_axis(axis))

1116 or self.obj._get_axis(axis).dtype.name == "boolean"

1117 ):

1118 raise KeyError(

1119 f"{key}: boolean label can not be used without a boolean index"

1120 )

1121

1122 if isinstance(key, slice) and (

1123 isinstance(key.start, bool) or isinstance(key.stop, bool)

1124 ):

1125 raise TypeError(f"{key}: boolean values can not be used in a slice")

1126

1127 def _has_valid_setitem_indexer(self, indexer) -> bool:

1128 return True

1129

1130 def _is_scalar_access(self, key: tuple) -> bool:

1131 """

1132 Returns

1133 -------

1134 bool

1135 """

1136 # this is a shortcut accessor to both .loc and .iloc

1137 # that provide the equivalent access of .at and .iat

1138 # a) avoid getting things via sections and (to minimize dtype changes)

1139 # b) provide a performant path

1140 if len(key) != self.ndim:

1141 return False

1142

1143 for i, k in enumerate(key):

1144 if not is_scalar(k):

1145 return False

1146

1147 ax = self.obj.axes[i]

1148 if isinstance(ax, MultiIndex):

1149 return False

1150

1151 if isinstance(k, str) and ax._supports_partial_string_indexing:

1152 # partial string indexing, df.loc['2000', 'A']

1153 # should not be considered scalar

1154 return False

1155

1156 if not ax._index_as_unique:

1157 return False

1158

1159 return True

1160

1161 # -------------------------------------------------------------------

1162 # MultiIndex Handling

1163

1164 def _multi_take_opportunity(self, tup: tuple) -> bool:

1165 """

1166 Check whether there is the possibility to use ``_multi_take``.

1167

1168 Currently the limit is that all axes being indexed, must be indexed with

1169 list-likes.

1170

1171 Parameters

1172 ----------

1173 tup : tuple

1174 Tuple of indexers, one per axis.

1175

1176 Returns

1177 -------

1178 bool

1179 Whether the current indexing,

1180 can be passed through `_multi_take`.

1181 """

1182 if not all(is_list_like_indexer(x) for x in tup):

1183 return False

1184

1185 # just too complicated

1186 return not any(com.is_bool_indexer(x) for x in tup)

1187

1188 def _multi_take(self, tup: tuple):

1189 """

1190 Create the indexers for the passed tuple of keys, and

1191 executes the take operation. This allows the take operation to be

1192 executed all at once, rather than once for each dimension.

1193 Improving efficiency.

1194

1195 Parameters

1196 ----------

1197 tup : tuple

1198 Tuple of indexers, one per axis.

1199

1200 Returns

1201 -------

1202 values: same type as the object being indexed

1203 """

1204 # GH 836

1205 d = {

1206 axis: self._get_listlike_indexer(key, axis)

1207 for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)

1208 }

1209 return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)

1210

1211 # -------------------------------------------------------------------

1212

1213 def _getitem_iterable(self, key, axis: int):

1214 """

1215 Index current object with an iterable collection of keys.

1216

1217 Parameters

1218 ----------

1219 key : iterable

1220 Targeted labels.

1221 axis : int

1222 Dimension on which the indexing is being made.

1223

1224 Raises

1225 ------

1226 KeyError

1227 If no key was found. Will change in the future to raise if not all

1228 keys were found.

1229

1230 Returns

1231 -------

1232 scalar, DataFrame, or Series: indexed value(s).

1233 """

1234 # we assume that not com.is_bool_indexer(key), as that is

1235 # handled before we get here.

1236 self._validate_key(key, axis)

1237

1238 # A collection of keys

1239 keyarr, indexer = self._get_listlike_indexer(key, axis)

1240 return self.obj._reindex_with_indexers(

1241 {axis: [keyarr, indexer]}, copy=True, allow_dups=True

1242 )

1243

1244 def _getitem_tuple(self, tup: tuple):

1245 with suppress(IndexingError):

1246 tup = self._expand_ellipsis(tup)

1247 return self._getitem_lowerdim(tup)

1248

1249 # no multi-index, so validate all of the indexers

1250 tup = self._validate_tuple_indexer(tup)

1251

1252 # ugly hack for GH #836

1253 if self._multi_take_opportunity(tup):

1254 return self._multi_take(tup)

1255

1256 return self._getitem_tuple_same_dim(tup)

1257

1258 def _get_label(self, label, axis: int):

1259 # GH#5567 this will fail if the label is not present in the axis.

1260 return self.obj.xs(label, axis=axis)

1261

1262 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):

1263 # we have an axis0 multi-index, handle or raise

1264 axis = self.axis or 0

1265 try:

1266 # fast path for series or for tup devoid of slices

1267 return self._get_label(tup, axis=axis)

1268

1269 except KeyError as ek:

1270 # raise KeyError if number of indexers match

1271 # else IndexingError will be raised

1272 if self.ndim < len(tup) <= self.obj.index.nlevels:

1273 raise ek

1274 raise IndexingError("No label returned") from ek

1275

1276 def _getitem_axis(self, key, axis: int):

1277 key = item_from_zerodim(key)

1278 if is_iterator(key):

1279 key = list(key)

1280 if key is Ellipsis:

1281 key = slice(None)

1282

1283 labels = self.obj._get_axis(axis)

1284

1285 if isinstance(key, tuple) and isinstance(labels, MultiIndex):

1286 key = tuple(key)

1287

1288 if isinstance(key, slice):

1289 self._validate_key(key, axis)

1290 return self._get_slice_axis(key, axis=axis)

1291 elif com.is_bool_indexer(key):

1292 return self._getbool_axis(key, axis=axis)

1293 elif is_list_like_indexer(key):

1294

1295 # an iterable multi-selection

1296 if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):

1297

1298 if hasattr(key, "ndim") and key.ndim > 1:

1299 raise ValueError("Cannot index with multidimensional key")

1300

1301 return self._getitem_iterable(key, axis=axis)

1302

1303 # nested tuple slicing

1304 if is_nested_tuple(key, labels):

1305 locs = labels.get_locs(key)

1306 indexer = [slice(None)] * self.ndim

1307 indexer[axis] = locs

1308 return self.obj.iloc[tuple(indexer)]

1309

1310 # fall thru to straight lookup

1311 self._validate_key(key, axis)

1312 return self._get_label(key, axis=axis)

1313

1314 def _get_slice_axis(self, slice_obj: slice, axis: int):

1315 """

1316 This is pretty simple as we just have to deal with labels.

1317 """

1318 # caller is responsible for ensuring non-None axis

1319 obj = self.obj

1320 if not need_slice(slice_obj):

1321 return obj.copy(deep=False)

1322

1323 labels = obj._get_axis(axis)

1324 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)

1325

1326 if isinstance(indexer, slice):

1327 return self.obj._slice(indexer, axis=axis)

1328 else:

1329 # DatetimeIndex overrides Index.slice_indexer and may

1330 # return a DatetimeIndex instead of a slice object.

1331 return self.obj.take(indexer, axis=axis)

1332

1333 def _convert_to_indexer(self, key, axis: int):

1334 """

1335 Convert indexing key into something we can use to do actual fancy

1336 indexing on a ndarray.

1337

1338 Examples

1339 ix[:5] -> slice(0, 5)

1340 ix[[1,2,3]] -> [1,2,3]

1341 ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)

1342

1343 Going by Zen of Python?

1344 'In the face of ambiguity, refuse the temptation to guess.'

1345 raise AmbiguousIndexError with integer labels?

1346 - No, prefer label-based indexing

1347 """

1348 labels = self.obj._get_axis(axis)

1349

1350 if isinstance(key, slice):

1351 return labels._convert_slice_indexer(key, kind="loc")

1352

1353 if (

1354 isinstance(key, tuple)

1355 and not isinstance(labels, MultiIndex)

1356 and self.ndim < 2

1357 and len(key) > 1

1358 ):

1359 raise IndexingError("Too many indexers")

1360

1361 if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)):

1362 # Otherwise get_loc will raise InvalidIndexError

1363

1364 # if we are a label return me

1365 try:

1366 return labels.get_loc(key)

1367 except LookupError:

1368 if isinstance(key, tuple) and isinstance(labels, MultiIndex):

1369 if len(key) == labels.nlevels:

1370 return {"key": key}

1371 raise

1372 except InvalidIndexError:

1373 # GH35015, using datetime as column indices raises exception

1374 if not isinstance(labels, MultiIndex):

1375 raise

1376 except ValueError:

1377 if not is_integer(key):

1378 raise

1379 return {"key": key}

1380

1381 if is_nested_tuple(key, labels):

1382 if self.ndim == 1 and any(isinstance(k, tuple) for k in key):

1383 # GH#35349 Raise if tuple in tuple for series

1384 raise IndexingError("Too many indexers")

1385 return labels.get_locs(key)

1386

1387 elif is_list_like_indexer(key):

1388

1389 if is_iterator(key):

1390 key = list(key)

1391

1392 if com.is_bool_indexer(key):

1393 key = check_bool_indexer(labels, key)

1394 return key

1395 else:

1396 return self._get_listlike_indexer(key, axis)[1]

1397 else:

1398 try:

1399 return labels.get_loc(key)

1400 except LookupError:

1401 # allow a not found key only if we are a setter

1402 if not is_list_like_indexer(key):

1403 return {"key": key}

1404 raise

1405

1406 def _get_listlike_indexer(self, key, axis: int):

1407 """

1408 Transform a list-like of keys into a new index and an indexer.

1409

1410 Parameters

1411 ----------

1412 key : list-like

1413 Targeted labels.

1414 axis: int

1415 Dimension on which the indexing is being made.

1416

1417 Raises

1418 ------

1419 KeyError

1420 If at least one key was requested but none was found.

1421

1422 Returns

1423 -------

1424 keyarr: Index

1425 New index (coinciding with 'key' if the axis is unique).

1426 values : array-like

1427 Indexer for the return object, -1 denotes keys not found.

1428 """

1429 ax = self.obj._get_axis(axis)

1430 axis_name = self.obj._get_axis_name(axis)

1431

1432 keyarr, indexer = ax._get_indexer_strict(key, axis_name)

1433

1434 return keyarr, indexer

1435

1436

1437@doc(IndexingMixin.iloc)

1438class _iLocIndexer(_LocationIndexer):

1439 _valid_types = (

1440 "integer, integer slice (START point is INCLUDED, END "

1441 "point is EXCLUDED), listlike of integers, boolean array"

1442 )

1443 _takeable = True

1444

1445 # -------------------------------------------------------------------

1446 # Key Checks

1447

1448 def _validate_key(self, key, axis: int):

1449 if com.is_bool_indexer(key):

1450 if hasattr(key, "index") and isinstance(key.index, Index):

1451 if key.index.inferred_type == "integer":

1452 raise NotImplementedError(

1453 "iLocation based boolean "

1454 "indexing on an integer type "

1455 "is not available"

1456 )

1457 raise ValueError(

1458 "iLocation based boolean indexing cannot use "

1459 "an indexable as a mask"

1460 )

1461 return

1462

1463 if isinstance(key, slice):

1464 return

1465 elif is_integer(key):

1466 self._validate_integer(key, axis)

1467 elif isinstance(key, tuple):

1468 # a tuple should already have been caught by this point

1469 # so don't treat a tuple as a valid indexer

1470 raise IndexingError("Too many indexers")

1471 elif is_list_like_indexer(key):

1472 arr = np.array(key)

1473 len_axis = len(self.obj._get_axis(axis))

1474

1475 # check that the key has a numeric dtype

1476 if not is_numeric_dtype(arr.dtype):

1477 raise IndexError(f".iloc requires numeric indexers, got {arr}")

1478

1479 # check that the key does not exceed the maximum size of the index

1480 if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):

1481 raise IndexError("positional indexers are out-of-bounds")

1482 else:

1483 raise ValueError(f"Can only index by location with a [{self._valid_types}]")

1484

1485 def _has_valid_setitem_indexer(self, indexer) -> bool:

1486 """

1487 Validate that a positional indexer cannot enlarge its target

1488 will raise if needed, does not modify the indexer externally.

1489

1490 Returns

1491 -------

1492 bool

1493 """

1494 if isinstance(indexer, dict):

1495 raise IndexError("iloc cannot enlarge its target object")

1496

1497 if isinstance(indexer, ABCDataFrame):

1498 warnings.warn(

1499 "DataFrame indexer for .iloc is deprecated and will be removed in "

1500 "a future version.\n"

1501 "consider using .loc with a DataFrame indexer for automatic alignment.",

1502 FutureWarning,

1503 stacklevel=find_stack_level(),

1504 )

1505

1506 if not isinstance(indexer, tuple):

1507 indexer = _tuplify(self.ndim, indexer)

1508

1509 for ax, i in zip(self.obj.axes, indexer):

1510 if isinstance(i, slice):

1511 # should check the stop slice?

1512 pass

1513 elif is_list_like_indexer(i):

1514 # should check the elements?

1515 pass

1516 elif is_integer(i):

1517 if i >= len(ax):

1518 raise IndexError("iloc cannot enlarge its target object")

1519 elif isinstance(i, dict):

1520 raise IndexError("iloc cannot enlarge its target object")

1521

1522 return True

1523

1524 def _is_scalar_access(self, key: tuple) -> bool:

1525 """

1526 Returns

1527 -------

1528 bool

1529 """

1530 # this is a shortcut accessor to both .loc and .iloc

1531 # that provide the equivalent access of .at and .iat

1532 # a) avoid getting things via sections and (to minimize dtype changes)

1533 # b) provide a performant path

1534 if len(key) != self.ndim:

1535 return False

1536

1537 return all(is_integer(k) for k in key)

1538

1539 def _validate_integer(self, key: int, axis: int) -> None:

1540 """

1541 Check that 'key' is a valid position in the desired axis.

1542

1543 Parameters

1544 ----------

1545 key : int

1546 Requested position.

1547 axis : int

1548 Desired axis.

1549

1550 Raises

1551 ------

1552 IndexError

1553 If 'key' is not a valid position in axis 'axis'.

1554 """

1555 len_axis = len(self.obj._get_axis(axis))

1556 if key >= len_axis or key < -len_axis:

1557 raise IndexError("single positional indexer is out-of-bounds")

1558

1559 # -------------------------------------------------------------------

1560

1561 def _getitem_tuple(self, tup: tuple):

1562

1563 tup = self._validate_tuple_indexer(tup)

1564 with suppress(IndexingError):

1565 return self._getitem_lowerdim(tup)

1566

1567 return self._getitem_tuple_same_dim(tup)

1568

1569 def _get_list_axis(self, key, axis: int):

1570 """

1571 Return Series values by list or array of integers.

1572

1573 Parameters

1574 ----------

1575 key : list-like positional indexer

1576 axis : int

1577

1578 Returns

1579 -------

1580 Series object

1581

1582 Notes

1583 -----

1584 `axis` can only be zero.

1585 """

1586 try:

1587 return self.obj._take_with_is_copy(key, axis=axis)

1588 except IndexError as err:

1589 # re-raise with different error message

1590 raise IndexError("positional indexers are out-of-bounds") from err

1591

1592 def _getitem_axis(self, key, axis: int):

1593 if key is Ellipsis:

1594 key = slice(None)

1595 elif isinstance(key, ABCDataFrame):

1596 raise IndexError(

1597 "DataFrame indexer is not allowed for .iloc\n"

1598 "Consider using .loc for automatic alignment."

1599 )

1600

1601 if isinstance(key, slice):

1602 return self._get_slice_axis(key, axis=axis)

1603

1604 if is_iterator(key):

1605 key = list(key)

1606

1607 if isinstance(key, list):

1608 key = np.asarray(key)

1609

1610 if com.is_bool_indexer(key):

1611 self._validate_key(key, axis)

1612 return self._getbool_axis(key, axis=axis)

1613

1614 # a list of integers

1615 elif is_list_like_indexer(key):

1616 return self._get_list_axis(key, axis=axis)

1617

1618 # a single integer

1619 else:

1620 key = item_from_zerodim(key)

1621 if not is_integer(key):

1622 raise TypeError("Cannot index by location index with a non-integer key")

1623

1624 # validate the location

1625 self._validate_integer(key, axis)

1626

1627 return self.obj._ixs(key, axis=axis)

1628

1629 def _get_slice_axis(self, slice_obj: slice, axis: int):

1630 # caller is responsible for ensuring non-None axis

1631 obj = self.obj

1632

1633 if not need_slice(slice_obj):

1634 return obj.copy(deep=False)

1635

1636 labels = obj._get_axis(axis)

1637 labels._validate_positional_slice(slice_obj)

1638 return self.obj._slice(slice_obj, axis=axis)

1639

1640 def _convert_to_indexer(self, key, axis: int):

1641 """

1642 Much simpler as we only have to deal with our valid types.

1643 """

1644 return key

1645

1646 def _get_setitem_indexer(self, key):

1647 # GH#32257 Fall through to let numpy do validation

1648 if is_iterator(key):

1649 key = list(key)

1650

1651 if self.axis is not None:

1652 key = _tupleize_axis_indexer(self.ndim, self.axis, key)

1653

1654 return key

1655

1656 # -------------------------------------------------------------------

1657

1658 def _setitem_with_indexer(self, indexer, value, name="iloc"):

1659 """

1660 _setitem_with_indexer is for setting values on a Series/DataFrame

1661 using positional indexers.

1662

1663 If the relevant keys are not present, the Series/DataFrame may be

1664 expanded.

1665

1666 This method is currently broken when dealing with non-unique Indexes,

1667 since it goes from positional indexers back to labels when calling

1668 BlockManager methods, see GH#12991, GH#22046, GH#15686.

1669 """

1670 info_axis = self.obj._info_axis_number

1671

1672 # maybe partial set

1673 take_split_path = not self.obj._mgr.is_single_block

1674

1675 # if there is only one block/type, still have to take split path

1676 # unless the block is one-dimensional or it can hold the value

1677 if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:

1678 # in case of dict, keys are indices

1679 val = list(value.values()) if isinstance(value, dict) else value

1680 arr = self.obj._mgr.arrays[0]

1681 take_split_path = not can_hold_element(

1682 arr, extract_array(val, extract_numpy=True)

1683 )

1684

1685 # if we have any multi-indexes that have non-trivial slices

1686 # (not null slices) then we must take the split path, xref

1687 # GH 10360, GH 27841

1688 if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):

1689 for i, ax in zip(indexer, self.obj.axes):

1690 if isinstance(ax, MultiIndex) and not (

1691 is_integer(i) or com.is_null_slice(i)

1692 ):

1693 take_split_path = True

1694 break

1695

1696 if isinstance(indexer, tuple):

1697 nindexer = []

1698 for i, idx in enumerate(indexer):

1699 if isinstance(idx, dict):

1700

1701 # reindex the axis to the new value

1702 # and set inplace

1703 key, _ = convert_missing_indexer(idx)

1704

1705 # if this is the items axes, then take the main missing

1706 # path first

1707 # this correctly sets the dtype and avoids cache issues

1708 # essentially this separates out the block that is needed

1709 # to possibly be modified

1710 if self.ndim > 1 and i == info_axis:

1711

1712 # add the new item, and set the value

1713 # must have all defined axes if we have a scalar

1714 # or a list-like on the non-info axes if we have a

1715 # list-like

1716 if not len(self.obj):

1717 if not is_list_like_indexer(value):

1718 raise ValueError(

1719 "cannot set a frame with no "

1720 "defined index and a scalar"

1721 )

1722 self.obj[key] = value

1723 return

1724

1725 # add a new item with the dtype setup

1726 if com.is_null_slice(indexer[0]):

1727 # We are setting an entire column

1728 self.obj[key] = value

1729 return

1730 elif is_array_like(value):

1731 # GH#42099

1732 arr = extract_array(value, extract_numpy=True)

1733 taker = -1 * np.ones(len(self.obj), dtype=np.intp)

1734 empty_value = algos.take_nd(arr, taker)

1735 if not isinstance(value, ABCSeries):

1736 # if not Series (in which case we need to align),

1737 # we can short-circuit

1738 empty_value[indexer[0]] = arr

1739 self.obj[key] = empty_value

1740 return

1741

1742 self.obj[key] = empty_value

1743

1744 else:

1745 self.obj[key] = infer_fill_value(value)

1746

1747 new_indexer = convert_from_missing_indexer_tuple(

1748 indexer, self.obj.axes

1749 )

1750 self._setitem_with_indexer(new_indexer, value, name)

1751

1752 return

1753

1754 # reindex the axis

1755 # make sure to clear the cache because we are

1756 # just replacing the block manager here

1757 # so the object is the same

1758 index = self.obj._get_axis(i)

1759 labels = index.insert(len(index), key)

1760

1761 # We are expanding the Series/DataFrame values to match

1762 # the length of thenew index `labels`. GH#40096 ensure

1763 # this is valid even if the index has duplicates.

1764 taker = np.arange(len(index) + 1, dtype=np.intp)

1765 taker[-1] = -1

1766 reindexers = {i: (labels, taker)}

1767 new_obj = self.obj._reindex_with_indexers(

1768 reindexers, allow_dups=True

1769 )

1770 self.obj._mgr = new_obj._mgr

1771 self.obj._maybe_update_cacher(clear=True)

1772 self.obj._is_copy = None

1773

1774 nindexer.append(labels.get_loc(key))

1775

1776 else:

1777 nindexer.append(idx)

1778

1779 indexer = tuple(nindexer)

1780 else:

1781

1782 indexer, missing = convert_missing_indexer(indexer)

1783

1784 if missing:

1785 self._setitem_with_indexer_missing(indexer, value)

1786 return

1787

1788 if name == "loc":

1789 # must come after setting of missing

1790 indexer, value = self._maybe_mask_setitem_value(indexer, value)

1791

1792 # align and set the values

1793 if take_split_path:

1794 # We have to operate column-wise

1795 self._setitem_with_indexer_split_path(indexer, value, name)

1796 else:

1797 self._setitem_single_block(indexer, value, name)

1798

1799 def _setitem_with_indexer_split_path(self, indexer, value, name: str):

1800 """

1801 Setitem column-wise.

1802 """

1803 # Above we only set take_split_path to True for 2D cases

1804 assert self.ndim == 2

1805

1806 if not isinstance(indexer, tuple):

1807 indexer = _tuplify(self.ndim, indexer)

1808 if len(indexer) > self.ndim:

1809 raise IndexError("too many indices for array")

1810 if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:

1811 raise ValueError(r"Cannot set values with ndim > 2")

1812

1813 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):

1814 from pandas import Series

1815

1816 value = self._align_series(indexer, Series(value))

1817

1818 # Ensure we have something we can iterate over

1819 info_axis = indexer[1]

1820 ilocs = self._ensure_iterable_column_indexer(info_axis)

1821

1822 pi = indexer[0]

1823 lplane_indexer = length_of_indexer(pi, self.obj.index)

1824 # lplane_indexer gives the expected length of obj[indexer[0]]

1825

1826 # we need an iterable, with a ndim of at least 1

1827 # eg. don't pass through np.array(0)

1828 if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:

1829

1830 if isinstance(value, ABCDataFrame):

1831 self._setitem_with_indexer_frame_value(indexer, value, name)

1832

1833 elif np.ndim(value) == 2:

1834 self._setitem_with_indexer_2d_value(indexer, value)

1835

1836 elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):

1837 # We are setting multiple rows in a single column.

1838 self._setitem_single_column(ilocs[0], value, pi)

1839

1840 elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):

1841 # We are trying to set N values into M entries of a single

1842 # column, which is invalid for N != M

1843 # Exclude zero-len for e.g. boolean masking that is all-false

1844

1845 if len(value) == 1 and not is_integer(info_axis):

1846 # This is a case like df.iloc[:3, [1]] = [0]

1847 # where we treat as df.iloc[:3, 1] = 0

1848 return self._setitem_with_indexer((pi, info_axis[0]), value[0])

1849

1850 raise ValueError(

1851 "Must have equal len keys and value "

1852 "when setting with an iterable"

1853 )

1854

1855 elif lplane_indexer == 0 and len(value) == len(self.obj.index):

1856 # We get here in one case via .loc with a all-False mask

1857 pass

1858

1859 elif self._is_scalar_access(indexer) and is_object_dtype(

1860 self.obj.dtypes[ilocs[0]]

1861 ):

1862 # We are setting nested data, only possible for object dtype data

1863 self._setitem_single_column(indexer[1], value, pi)

1864

1865 elif len(ilocs) == len(value):

1866 # We are setting multiple columns in a single row.

1867 for loc, v in zip(ilocs, value):

1868 self._setitem_single_column(loc, v, pi)

1869

1870 elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:

1871 # This is a setitem-with-expansion, see

1872 # test_loc_setitem_empty_append_expands_rows_mixed_dtype

1873 # e.g. df = DataFrame(columns=["x", "y"])

1874 # df["x"] = df["x"].astype(np.int64)

1875 # df.loc[:, "x"] = [1, 2, 3]

1876 self._setitem_single_column(ilocs[0], value, pi)

1877

1878 else:

1879 raise ValueError(

1880 "Must have equal len keys and value "

1881 "when setting with an iterable"

1882 )

1883

1884 else:

1885

1886 # scalar value

1887 for loc in ilocs:

1888 self._setitem_single_column(loc, value, pi)

1889

1890 def _setitem_with_indexer_2d_value(self, indexer, value):

1891 # We get here with np.ndim(value) == 2, excluding DataFrame,

1892 # which goes through _setitem_with_indexer_frame_value

1893 pi = indexer[0]

1894

1895 ilocs = self._ensure_iterable_column_indexer(indexer[1])

1896

1897 # GH#7551 Note that this coerces the dtype if we are mixed

1898 value = np.array(value, dtype=object)

1899 if len(ilocs) != value.shape[1]:

1900 raise ValueError(

1901 "Must have equal len keys and value when setting with an ndarray"

1902 )

1903

1904 for i, loc in enumerate(ilocs):

1905 # setting with a list, re-coerces

1906 self._setitem_single_column(loc, value[:, i].tolist(), pi)

1907

1908 def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):

1909 ilocs = self._ensure_iterable_column_indexer(indexer[1])

1910

1911 sub_indexer = list(indexer)

1912 pi = indexer[0]

1913

1914 multiindex_indexer = isinstance(self.obj.columns, MultiIndex)

1915

1916 unique_cols = value.columns.is_unique

1917

1918 # We do not want to align the value in case of iloc GH#37728

1919 if name == "iloc":

1920 for i, loc in enumerate(ilocs):

1921 val = value.iloc[:, i]

1922 self._setitem_single_column(loc, val, pi)

1923

1924 elif not unique_cols and value.columns.equals(self.obj.columns):

1925 # We assume we are already aligned, see

1926 # test_iloc_setitem_frame_duplicate_columns_multiple_blocks

1927 for loc in ilocs:

1928 item = self.obj.columns[loc]

1929 if item in value:

1930 sub_indexer[1] = item

1931 val = self._align_series(

1932 tuple(sub_indexer),

1933 value.iloc[:, loc],

1934 multiindex_indexer,

1935 )

1936 else:

1937 val = np.nan

1938

1939 self._setitem_single_column(loc, val, pi)

1940

1941 elif not unique_cols:

1942 raise ValueError("Setting with non-unique columns is not allowed.")

1943

1944 else:

1945 for loc in ilocs:

1946 item = self.obj.columns[loc]

1947 if item in value:

1948 sub_indexer[1] = item

1949 val = self._align_series(

1950 tuple(sub_indexer), value[item], multiindex_indexer

1951 )

1952 else:

1953 val = np.nan

1954

1955 self._setitem_single_column(loc, val, pi)

1956

1957 def _setitem_single_column(self, loc: int, value, plane_indexer):

1958 """

1959

1960 Parameters

1961 ----------

1962 loc : int

1963 Indexer for column position

1964 plane_indexer : int, slice, listlike[int]

1965 The indexer we use for setitem along axis=0.

1966 """

1967 pi = plane_indexer

1968

1969 orig_values = self.obj._get_column_array(loc)

1970

1971 # perform the equivalent of a setitem on the info axis

1972 # as we have a null slice or a slice with full bounds

1973 # which means essentially reassign to the columns of a

1974 # multi-dim object

1975 # GH#6149 (null slice), GH#10408 (full bounds)

1976 if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)):

1977 pass

1978 elif (

1979 is_array_like(value)

1980 and len(value.shape) > 0

1981 and self.obj.shape[0] == value.shape[0]

1982 and not is_empty_indexer(pi)

1983 ):

1984 if is_list_like(pi) and not is_bool_dtype(pi):

1985 value = value[np.argsort(pi)]

1986 else:

1987 # in case of slice

1988 value = value[pi]

1989 else:

1990 # set value into the column (first attempting to operate inplace, then

1991 # falling back to casting if necessary)

1992 self.obj._mgr.column_setitem(loc, plane_indexer, value)

1993 self.obj._clear_item_cache()

1994 return

1995

1996 self.obj._iset_item(loc, value)

1997

1998 # We will not operate in-place, but will attempt to in the future.

1999 # To determine whether we need to issue a FutureWarning, see if the

2000 # setting in-place would work, i.e. behavior will change.

2001

2002 new_values = self.obj._get_column_array(loc)

2003

2004 if can_hold_element(orig_values, new_values) and not len(new_values) == 0:

2005 # Don't issue the warning yet, as we can still trim a few cases where

2006 # behavior will not change.

2007

2008 if (

2009 isinstance(new_values, np.ndarray)

2010 and isinstance(orig_values, np.ndarray)

2011 and (

2012 np.shares_memory(new_values, orig_values)

2013 or new_values.shape != orig_values.shape

2014 )

2015 ):

2016 # TODO: get something like tm.shares_memory working?

2017 # The values were set inplace after all, no need to warn,

2018 # e.g. test_rename_nocopy

2019 # In case of enlarging we can not set inplace, so need to

2020 # warn either

2021 pass

2022 else:

2023 warnings.warn(

2024 "In a future version, `df.iloc[:, i] = newvals` will attempt "

2025 "to set the values inplace instead of always setting a new "

2026 "array. To retain the old behavior, use either "

2027 "`df[df.columns[i]] = newvals` or, if columns are non-unique, "

2028 "`df.isetitem(i, newvals)`",

2029 FutureWarning,

2030 stacklevel=find_stack_level(),

2031 )

2032 # TODO: how to get future behavior?

2033 # TODO: what if we got here indirectly via loc?

2034 return

2035

2036 def _setitem_single_block(self, indexer, value, name: str):

2037 """

2038 _setitem_with_indexer for the case when we have a single Block.

2039 """

2040 from pandas import Series

2041

2042 info_axis = self.obj._info_axis_number

2043 item_labels = self.obj._get_axis(info_axis)

2044 if isinstance(indexer, tuple):

2045

2046 # if we are setting on the info axis ONLY

2047 # set using those methods to avoid block-splitting

2048 # logic here

2049 if (

2050 self.ndim == len(indexer) == 2

2051 and is_integer(indexer[1])

2052 and com.is_null_slice(indexer[0])

2053 ):

2054 col = item_labels[indexer[info_axis]]

2055 if len(item_labels.get_indexer_for([col])) == 1:

2056 # e.g. test_loc_setitem_empty_append_expands_rows

2057 loc = item_labels.get_loc(col)

2058 # Go through _setitem_single_column to get

2059 # FutureWarning if relevant.

2060 self._setitem_single_column(loc, value, indexer[0])

2061 return

2062

2063 indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align

2064

2065 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):

2066 # TODO(EA): ExtensionBlock.setitem this causes issues with

2067 # setting for extensionarrays that store dicts. Need to decide

2068 # if it's worth supporting that.

2069 value = self._align_series(indexer, Series(value))

2070

2071 elif isinstance(value, ABCDataFrame) and name != "iloc":

2072 value = self._align_frame(indexer, value)

2073

2074 # check for chained assignment

2075 self.obj._check_is_chained_assignment_possible()

2076

2077 # actually do the set

2078 self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)

2079 self.obj._maybe_update_cacher(clear=True, inplace=True)

2080

2081 def _setitem_with_indexer_missing(self, indexer, value):

2082 """

2083 Insert new row(s) or column(s) into the Series or DataFrame.

2084 """

2085 from pandas import Series

2086

2087 # reindex the axis to the new value

2088 # and set inplace

2089 if self.ndim == 1:

2090 index = self.obj.index

2091 new_index = index.insert(len(index), indexer)

2092

2093 # we have a coerced indexer, e.g. a float

2094 # that matches in an Int64Index, so

2095 # we will not create a duplicate index, rather

2096 # index to that element

2097 # e.g. 0.0 -> 0

2098 # GH#12246

2099 if index.is_unique:

2100 # pass new_index[-1:] instead if [new_index[-1]]

2101 # so that we retain dtype

2102 new_indexer = index.get_indexer(new_index[-1:])

2103 if (new_indexer != -1).any():

2104 # We get only here with loc, so can hard code

2105 return self._setitem_with_indexer(new_indexer, value, "loc")

2106

2107 # this preserves dtype of the value and of the object

2108 if not is_scalar(value):

2109 new_dtype = None

2110

2111 elif is_valid_na_for_dtype(value, self.obj.dtype):

2112 if not is_object_dtype(self.obj.dtype):

2113 # Every NA value is suitable for object, no conversion needed

2114 value = na_value_for_dtype(self.obj.dtype, compat=False)

2115

2116 new_dtype = maybe_promote(self.obj.dtype, value)[0]

2117

2118 elif isna(value):

2119 new_dtype = None

2120 elif not self.obj.empty and not is_object_dtype(self.obj.dtype):

2121 # We should not cast, if we have object dtype because we can

2122 # set timedeltas into object series

2123 curr_dtype = self.obj.dtype

2124 curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)

2125 new_dtype = maybe_promote(curr_dtype, value)[0]

2126 else:

2127 new_dtype = None

2128

2129 new_values = Series([value], dtype=new_dtype)._values

2130

2131 if len(self.obj._values):

2132 # GH#22717 handle casting compatibility that np.concatenate

2133 # does incorrectly

2134 new_values = concat_compat([self.obj._values, new_values])

2135 self.obj._mgr = self.obj._constructor(

2136 new_values, index=new_index, name=self.obj.name

2137 )._mgr

2138 self.obj._maybe_update_cacher(clear=True)

2139

2140 elif self.ndim == 2:

2141

2142 if not len(self.obj.columns):

2143 # no columns and scalar

2144 raise ValueError("cannot set a frame with no defined columns")

2145

2146 has_dtype = hasattr(value, "dtype")

2147 if isinstance(value, ABCSeries):

2148 # append a Series

2149 value = value.reindex(index=self.obj.columns, copy=True)

2150 value.name = indexer

2151 elif isinstance(value, dict):

2152 value = Series(

2153 value, index=self.obj.columns, name=indexer, dtype=object

2154 )

2155 else:

2156 # a list-list

2157 if is_list_like_indexer(value):

2158 # must have conforming columns

2159 if len(value) != len(self.obj.columns):

2160 raise ValueError("cannot set a row with mismatched columns")

2161

2162 value = Series(value, index=self.obj.columns, name=indexer)

2163

2164 if not len(self.obj):

2165 # We will ignore the existing dtypes instead of using

2166 # internals.concat logic

2167 df = value.to_frame().T

2168

2169 idx = self.obj.index

2170 if isinstance(idx, MultiIndex):

2171 name = idx.names

2172 else:

2173 name = idx.name

2174

2175 df.index = Index([indexer], name=name)

2176 if not has_dtype:

2177 # i.e. if we already had a Series or ndarray, keep that

2178 # dtype. But if we had a list or dict, then do inference

2179 df = df.infer_objects()

2180 self.obj._mgr = df._mgr

2181 else:

2182 self.obj._mgr = self.obj._append(value)._mgr

2183 self.obj._maybe_update_cacher(clear=True)

2184

2185 def _ensure_iterable_column_indexer(self, column_indexer):

2186 """

2187 Ensure that our column indexer is something that can be iterated over.

2188 """

2189 ilocs: Sequence[int] | np.ndarray

2190 if is_integer(column_indexer):

2191 ilocs = [column_indexer]

2192 elif isinstance(column_indexer, slice):

2193 ilocs = np.arange(len(self.obj.columns))[column_indexer]

2194 elif isinstance(column_indexer, np.ndarray) and is_bool_dtype(

2195 column_indexer.dtype

2196 ):

2197 ilocs = np.arange(len(column_indexer))[column_indexer]

2198 else:

2199 ilocs = column_indexer

2200 return ilocs

2201

2202 def _align_series(self, indexer, ser: Series, multiindex_indexer: bool = False):

2203 """

2204 Parameters

2205 ----------

2206 indexer : tuple, slice, scalar

2207 Indexer used to get the locations that will be set to `ser`.

2208 ser : pd.Series

2209 Values to assign to the locations specified by `indexer`.

2210 multiindex_indexer : bool, optional

2211 Defaults to False. Should be set to True if `indexer` was from

2212 a `pd.MultiIndex`, to avoid unnecessary broadcasting.

2213

2214 Returns

2215 -------

2216 `np.array` of `ser` broadcast to the appropriate shape for assignment

2217 to the locations selected by `indexer`

2218 """

2219 if isinstance(indexer, (slice, np.ndarray, list, Index)):

2220 indexer = (indexer,)

2221

2222 if isinstance(indexer, tuple):

2223

2224 # flatten np.ndarray indexers

2225 def ravel(i):

2226 return i.ravel() if isinstance(i, np.ndarray) else i

2227

2228 indexer = tuple(map(ravel, indexer))

2229

2230 aligners = [not com.is_null_slice(idx) for idx in indexer]

2231 sum_aligners = sum(aligners)

2232 single_aligner = sum_aligners == 1

2233 is_frame = self.ndim == 2

2234 obj = self.obj

2235

2236 # are we a single alignable value on a non-primary

2237 # dim (e.g. panel: 1,2, or frame: 0) ?

2238 # hence need to align to a single axis dimension

2239 # rather that find all valid dims

2240

2241 # frame

2242 if is_frame:

2243 single_aligner = single_aligner and aligners[0]

2244

2245 # we have a frame, with multiple indexers on both axes; and a

2246 # series, so need to broadcast (see GH5206)

2247 if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer):

2248 # TODO: This is hacky, align Series and DataFrame behavior GH#45778

2249 if obj.ndim == 2 and is_empty_indexer(indexer[0]):

2250 return ser._values.copy()

2251 ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values

2252

2253 # single indexer

2254 if len(indexer) > 1 and not multiindex_indexer:

2255 len_indexer = len(indexer[1])

2256 ser_values = (

2257 np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T

2258 )

2259

2260 return ser_values

2261

2262 for i, idx in enumerate(indexer):

2263 ax = obj.axes[i]

2264

2265 # multiple aligners (or null slices)

2266 if is_sequence(idx) or isinstance(idx, slice):

2267 if single_aligner and com.is_null_slice(idx):

2268 continue

2269 new_ix = ax[idx]

2270 if not is_list_like_indexer(new_ix):

2271 new_ix = Index([new_ix])

2272 else:

2273 new_ix = Index(new_ix)

2274 if ser.index.equals(new_ix) or not len(new_ix):

2275 return ser._values.copy()

2276

2277 return ser.reindex(new_ix)._values

2278

2279 # 2 dims

2280 elif single_aligner:

2281

2282 # reindex along index

2283 ax = self.obj.axes[1]

2284 if ser.index.equals(ax) or not len(ax):

2285 return ser._values.copy()

2286 return ser.reindex(ax)._values

2287

2288 elif is_integer(indexer) and self.ndim == 1:

2289 if is_object_dtype(self.obj):

2290 return ser

2291 ax = self.obj._get_axis(0)

2292

2293 if ser.index.equals(ax):

2294 return ser._values.copy()

2295

2296 return ser.reindex(ax)._values[indexer]

2297

2298 elif is_integer(indexer):

2299 ax = self.obj._get_axis(1)

2300

2301 if ser.index.equals(ax):

2302 return ser._values.copy()

2303

2304 return ser.reindex(ax)._values

2305

2306 raise ValueError("Incompatible indexer with Series")

2307

2308 def _align_frame(self, indexer, df: DataFrame):

2309 is_frame = self.ndim == 2

2310

2311 if isinstance(indexer, tuple):

2312

2313 idx, cols = None, None

2314 sindexers = []

2315 for i, ix in enumerate(indexer):

2316 ax = self.obj.axes[i]

2317 if is_sequence(ix) or isinstance(ix, slice):

2318 if isinstance(ix, np.ndarray):

2319 ix = ix.ravel()

2320 if idx is None:

2321 idx = ax[ix]

2322 elif cols is None:

2323 cols = ax[ix]

2324 else:

2325 break

2326 else:

2327 sindexers.append(i)

2328

2329 if idx is not None and cols is not None:

2330

2331 if df.index.equals(idx) and df.columns.equals(cols):

2332 val = df.copy()._values

2333 else:

2334 val = df.reindex(idx, columns=cols)._values

2335 return val

2336

2337 elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:

2338 ax = self.obj.index[indexer]

2339 if df.index.equals(ax):

2340 val = df.copy()._values

2341 else:

2342

2343 # we have a multi-index and are trying to align

2344 # with a particular, level GH3738

2345 if (

2346 isinstance(ax, MultiIndex)

2347 and isinstance(df.index, MultiIndex)

2348 and ax.nlevels != df.index.nlevels

2349 ):

2350 raise TypeError(

2351 "cannot align on a multi-index with out "

2352 "specifying the join levels"

2353 )

2354

2355 val = df.reindex(index=ax)._values

2356 return val

2357

2358 raise ValueError("Incompatible indexer with DataFrame")

2359

2360

2361class _ScalarAccessIndexer(NDFrameIndexerBase):

2362 """

2363 Access scalars quickly.

2364 """

2365

2366 # sub-classes need to set _takeable

2367 _takeable: bool

2368

2369 def _convert_key(self, key):

2370 raise AbstractMethodError(self)

2371

2372 def __getitem__(self, key):

2373 if not isinstance(key, tuple):

2374

2375 # we could have a convertible item here (e.g. Timestamp)

2376 if not is_list_like_indexer(key):

2377 key = (key,)

2378 else:

2379 raise ValueError("Invalid call for scalar access (getting)!")

2380

2381 key = self._convert_key(key)

2382 return self.obj._get_value(*key, takeable=self._takeable)

2383

2384 def __setitem__(self, key, value) -> None:

2385 if isinstance(key, tuple):

2386 key = tuple(com.apply_if_callable(x, self.obj) for x in key)

2387 else:

2388 # scalar callable may return tuple

2389 key = com.apply_if_callable(key, self.obj)

2390

2391 if not isinstance(key, tuple):

2392 key = _tuplify(self.ndim, key)

2393 key = list(self._convert_key(key))

2394 if len(key) != self.ndim:

2395 raise ValueError("Not enough indexers for scalar access (setting)!")

2396

2397 self.obj._set_value(*key, value=value, takeable=self._takeable)

2398

2399

2400@doc(IndexingMixin.at)

2401class _AtIndexer(_ScalarAccessIndexer):

2402 _takeable = False

2403

2404 def _convert_key(self, key):

2405 """

2406 Require they keys to be the same type as the index. (so we don't

2407 fallback)

2408 """

2409 # GH 26989

2410 # For series, unpacking key needs to result in the label.

2411 # This is already the case for len(key) == 1; e.g. (1,)

2412 if self.ndim == 1 and len(key) > 1:

2413 key = (key,)

2414

2415 return key

2416

2417 @property

2418 def _axes_are_unique(self) -> bool:

2419 # Only relevant for self.ndim == 2

2420 assert self.ndim == 2

2421 return self.obj.index.is_unique and self.obj.columns.is_unique

2422

2423 def __getitem__(self, key):

2424

2425 if self.ndim == 2 and not self._axes_are_unique:

2426 # GH#33041 fall back to .loc

2427 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):

2428 raise ValueError("Invalid call for scalar access (getting)!")

2429 return self.obj.loc[key]

2430

2431 return super().__getitem__(key)

2432

2433 def __setitem__(self, key, value):

2434 if self.ndim == 2 and not self._axes_are_unique:

2435 # GH#33041 fall back to .loc

2436 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):

2437 raise ValueError("Invalid call for scalar access (setting)!")

2438

2439 self.obj.loc[key] = value

2440 return

2441

2442 return super().__setitem__(key, value)

2443

2444

2445@doc(IndexingMixin.iat)

2446class _iAtIndexer(_ScalarAccessIndexer):

2447 _takeable = True

2448

2449 def _convert_key(self, key):

2450 """

2451 Require integer args. (and convert to label arguments)

2452 """

2453 for i in key:

2454 if not is_integer(i):

2455 raise ValueError("iAt based indexing can only have integer indexers")

2456 return key

2457

2458

2459def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]:

2460 """

2461 Given an indexer for the first dimension, create an equivalent tuple

2462 for indexing over all dimensions.

2463

2464 Parameters

2465 ----------

2466 ndim : int

2467 loc : object

2468

2469 Returns

2470 -------

2471 tuple

2472 """

2473 _tup: list[Hashable | slice]

2474 _tup = [slice(None, None) for _ in range(ndim)]

2475 _tup[0] = loc

2476 return tuple(_tup)

2477

2478

2479def _tupleize_axis_indexer(ndim: int, axis: int, key) -> tuple:

2480 """

2481 If we have an axis, adapt the given key to be axis-independent.

2482 """

2483 new_key = [slice(None)] * ndim

2484 new_key[axis] = key

2485 return tuple(new_key)

2486

2487

2488def convert_to_index_sliceable(obj: DataFrame, key):

2489 """

2490 If we are index sliceable, then return my slicer, otherwise return None.

2491 """

2492 idx = obj.index

2493 if isinstance(key, slice):

2494 return idx._convert_slice_indexer(key, kind="getitem", is_frame=True)

2495

2496 elif isinstance(key, str):

2497

2498 # we are an actual column

2499 if key in obj.columns:

2500 return None

2501

2502 # We might have a datetimelike string that we can translate to a

2503 # slice here via partial string indexing

2504 if idx._supports_partial_string_indexing:

2505 try:

2506 res = idx._get_string_slice(str(key))

2507 warnings.warn(

2508 "Indexing a DataFrame with a datetimelike index using a single "

2509 "string to slice the rows, like `frame[string]`, is deprecated "

2510 "and will be removed in a future version. Use `frame.loc[string]` "

2511 "instead.",

2512 FutureWarning,

2513 stacklevel=find_stack_level(),

2514 )

2515 return res

2516 except (KeyError, ValueError, NotImplementedError):

2517 return None

2518

2519 return None

2520

2521

2522def check_bool_indexer(index: Index, key) -> np.ndarray:

2523 """

2524 Check if key is a valid boolean indexer for an object with such index and

2525 perform reindexing or conversion if needed.

2526

2527 This function assumes that is_bool_indexer(key) == True.

2528

2529 Parameters

2530 ----------

2531 index : Index

2532 Index of the object on which the indexing is done.

2533 key : list-like

2534 Boolean indexer to check.

2535

2536 Returns

2537 -------

2538 np.array

2539 Resulting key.

2540

2541 Raises

2542 ------

2543 IndexError

2544 If the key does not have the same length as index.

2545 IndexingError

2546 If the index of the key is unalignable to index.

2547 """

2548 result = key

2549 if isinstance(key, ABCSeries) and not key.index.equals(index):

2550 indexer = result.index.get_indexer_for(index)

2551 if -1 in indexer:

2552 raise IndexingError(

2553 "Unalignable boolean Series provided as "

2554 "indexer (index of the boolean Series and of "

2555 "the indexed object do not match)."

2556 )

2557

2558 result = result.take(indexer)

2559

2560 # fall through for boolean

2561 if not is_extension_array_dtype(result.dtype):

2562 return result.astype(bool)._values

2563

2564 if is_object_dtype(key):

2565 # key might be object-dtype bool, check_array_indexer needs bool array

2566 result = np.asarray(result, dtype=bool)

2567 elif not is_array_like(result):

2568 # GH 33924

2569 # key may contain nan elements, check_array_indexer needs bool array

2570 result = pd_array(result, dtype=bool)

2571 return check_array_indexer(index, result)

2572

2573

2574def convert_missing_indexer(indexer):

2575 """

2576 Reverse convert a missing indexer, which is a dict

2577 return the scalar indexer and a boolean indicating if we converted

2578 """

2579 if isinstance(indexer, dict):

2580

2581 # a missing key (but not a tuple indexer)

2582 indexer = indexer["key"]

2583

2584 if isinstance(indexer, bool):

2585 raise KeyError("cannot use a single bool to index into setitem")

2586 return indexer, True

2587

2588 return indexer, False

2589

2590

2591def convert_from_missing_indexer_tuple(indexer, axes):

2592 """

2593 Create a filtered indexer that doesn't have any missing indexers.

2594 """

2595

2596 def get_indexer(_i, _idx):

2597 return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx

2598

2599 return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))

2600

2601

2602def maybe_convert_ix(*args):

2603 """

2604 We likely want to take the cross-product.

2605 """

2606 for arg in args:

2607 if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):

2608 return args

2609 return np.ix_(*args)

2610

2611

2612def is_nested_tuple(tup, labels) -> bool:

2613 """

2614 Returns

2615 -------

2616 bool

2617 """

2618 # check for a compatible nested tuple and multiindexes among the axes

2619 if not isinstance(tup, tuple):

2620 return False

2621

2622 for k in tup:

2623 if is_list_like(k) or isinstance(k, slice):

2624 return isinstance(labels, MultiIndex)

2625

2626 return False

2627

2628

2629def is_label_like(key) -> bool:

2630 """

2631 Returns

2632 -------

2633 bool

2634 """

2635 # select a label or row

2636 return (

2637 not isinstance(key, slice)

2638 and not is_list_like_indexer(key)

2639 and key is not Ellipsis

2640 )

2641

2642

2643def need_slice(obj: slice) -> bool:

2644 """

2645 Returns

2646 -------

2647 bool

2648 """

2649 return (

2650 obj.start is not None

2651 or obj.stop is not None

2652 or (obj.step is not None and obj.step != 1)

2653 )

2654

2655

2656def check_deprecated_indexers(key) -> None:

2657 """Checks if the key is a deprecated indexer."""

2658 if (

2659 isinstance(key, set)

2660 or isinstance(key, tuple)

2661 and any(isinstance(x, set) for x in key)

2662 ):

2663 warnings.warn(

2664 "Passing a set as an indexer is deprecated and will raise in "

2665 "a future version. Use a list instead.",

2666 FutureWarning,

2667 stacklevel=find_stack_level(),

2668 )

2669 if (

2670 isinstance(key, dict)

2671 or isinstance(key, tuple)

2672 and any(isinstance(x, dict) for x in key)

2673 ):

2674 warnings.warn(

2675 "Passing a dict as an indexer is deprecated and will raise in "

2676 "a future version. Use a list instead.",

2677 FutureWarning,

2678 stacklevel=find_stack_level(),

2679 )