Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexing.py: 11%
913 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from contextlib import suppress
4from typing import (
5 TYPE_CHECKING,
6 Hashable,
7 Sequence,
8 TypeVar,
9 cast,
10 final,
11)
12import warnings
14import numpy as np
16from pandas._libs.indexing import NDFrameIndexerBase
17from pandas._libs.lib import item_from_zerodim
18from pandas.errors import (
19 AbstractMethodError,
20 IndexingError,
21 InvalidIndexError,
22)
23from pandas.util._decorators import doc
24from pandas.util._exceptions import find_stack_level
26from pandas.core.dtypes.cast import (
27 can_hold_element,
28 maybe_promote,
29)
30from pandas.core.dtypes.common import (
31 is_array_like,
32 is_bool_dtype,
33 is_extension_array_dtype,
34 is_hashable,
35 is_integer,
36 is_iterator,
37 is_list_like,
38 is_numeric_dtype,
39 is_object_dtype,
40 is_scalar,
41 is_sequence,
42)
43from pandas.core.dtypes.concat import concat_compat
44from pandas.core.dtypes.generic import (
45 ABCDataFrame,
46 ABCSeries,
47)
48from pandas.core.dtypes.missing import (
49 infer_fill_value,
50 is_valid_na_for_dtype,
51 isna,
52 na_value_for_dtype,
53)
55from pandas.core import algorithms as algos
56import pandas.core.common as com
57from pandas.core.construction import (
58 array as pd_array,
59 extract_array,
60)
61from pandas.core.indexers import (
62 check_array_indexer,
63 is_empty_indexer,
64 is_list_like_indexer,
65 is_scalar_indexer,
66 length_of_indexer,
67)
68from pandas.core.indexes.api import (
69 Index,
70 MultiIndex,
71)
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from pandas import (
75 DataFrame,
76 Series,
77 )
79_LocationIndexerT = TypeVar("_LocationIndexerT", bound="_LocationIndexer")
81# "null slice"
82_NS = slice(None, None)
83_one_ellipsis_message = "indexer may only contain one '...' entry"
86# the public IndexSlicerMaker
87class _IndexSlice:
88 """
89 Create an object to more easily perform multi-index slicing.
91 See Also
92 --------
93 MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.
95 Notes
96 -----
97 See :ref:`Defined Levels <advanced.shown_levels>`
98 for further info on slicing a MultiIndex.
100 Examples
101 --------
102 >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])
103 >>> columns = ['foo', 'bar']
104 >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),
105 ... index=midx, columns=columns)
107 Using the default slice command:
109 >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]
110 foo bar
111 A0 B0 0 1
112 B1 2 3
113 A1 B0 8 9
114 B1 10 11
116 Using the IndexSlice class for a more intuitive command:
118 >>> idx = pd.IndexSlice
119 >>> dfmi.loc[idx[:, 'B0':'B1'], :]
120 foo bar
121 A0 B0 0 1
122 B1 2 3
123 A1 B0 8 9
124 B1 10 11
125 """
127 def __getitem__(self, arg):
128 return arg
131IndexSlice = _IndexSlice()
134class IndexingMixin:
135 """
136 Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series.
137 """
139 @property
140 def iloc(self) -> _iLocIndexer:
141 """
142 Purely integer-location based indexing for selection by position.
144 ``.iloc[]`` is primarily integer position based (from ``0`` to
145 ``length-1`` of the axis), but may also be used with a boolean
146 array.
148 Allowed inputs are:
150 - An integer, e.g. ``5``.
151 - A list or array of integers, e.g. ``[4, 3, 0]``.
152 - A slice object with ints, e.g. ``1:7``.
153 - A boolean array.
154 - A ``callable`` function with one argument (the calling Series or
155 DataFrame) and that returns valid output for indexing (one of the above).
156 This is useful in method chains, when you don't have a reference to the
157 calling object, but would like to base your selection on some value.
158 - A tuple of row and column indexes. The tuple elements consist of one of the
159 above inputs, e.g. ``(0, 1)``.
161 ``.iloc`` will raise ``IndexError`` if a requested indexer is
162 out-of-bounds, except *slice* indexers which allow out-of-bounds
163 indexing (this conforms with python/numpy *slice* semantics).
165 See more at :ref:`Selection by Position <indexing.integer>`.
167 See Also
168 --------
169 DataFrame.iat : Fast integer location scalar accessor.
170 DataFrame.loc : Purely label-location based indexer for selection by label.
171 Series.iloc : Purely integer-location based indexing for
172 selection by position.
174 Examples
175 --------
176 >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
177 ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},
178 ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
179 >>> df = pd.DataFrame(mydict)
180 >>> df
181 a b c d
182 0 1 2 3 4
183 1 100 200 300 400
184 2 1000 2000 3000 4000
186 **Indexing just the rows**
188 With a scalar integer.
190 >>> type(df.iloc[0])
191 <class 'pandas.core.series.Series'>
192 >>> df.iloc[0]
193 a 1
194 b 2
195 c 3
196 d 4
197 Name: 0, dtype: int64
199 With a list of integers.
201 >>> df.iloc[[0]]
202 a b c d
203 0 1 2 3 4
204 >>> type(df.iloc[[0]])
205 <class 'pandas.core.frame.DataFrame'>
207 >>> df.iloc[[0, 1]]
208 a b c d
209 0 1 2 3 4
210 1 100 200 300 400
212 With a `slice` object.
214 >>> df.iloc[:3]
215 a b c d
216 0 1 2 3 4
217 1 100 200 300 400
218 2 1000 2000 3000 4000
220 With a boolean mask the same length as the index.
222 >>> df.iloc[[True, False, True]]
223 a b c d
224 0 1 2 3 4
225 2 1000 2000 3000 4000
227 With a callable, useful in method chains. The `x` passed
228 to the ``lambda`` is the DataFrame being sliced. This selects
229 the rows whose index label even.
231 >>> df.iloc[lambda x: x.index % 2 == 0]
232 a b c d
233 0 1 2 3 4
234 2 1000 2000 3000 4000
236 **Indexing both axes**
238 You can mix the indexer types for the index and columns. Use ``:`` to
239 select the entire axis.
241 With scalar integers.
243 >>> df.iloc[0, 1]
244 2
246 With lists of integers.
248 >>> df.iloc[[0, 2], [1, 3]]
249 b d
250 0 2 4
251 2 2000 4000
253 With `slice` objects.
255 >>> df.iloc[1:3, 0:3]
256 a b c
257 1 100 200 300
258 2 1000 2000 3000
260 With a boolean array whose length matches the columns.
262 >>> df.iloc[:, [True, False, True, False]]
263 a c
264 0 1 3
265 1 100 300
266 2 1000 3000
268 With a callable function that expects the Series or DataFrame.
270 >>> df.iloc[:, lambda df: [0, 2]]
271 a c
272 0 1 3
273 1 100 300
274 2 1000 3000
275 """
276 return _iLocIndexer("iloc", self)
278 @property
279 def loc(self) -> _LocIndexer:
280 """
281 Access a group of rows and columns by label(s) or a boolean array.
283 ``.loc[]`` is primarily label based, but may also be used with a
284 boolean array.
286 Allowed inputs are:
288 - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
289 interpreted as a *label* of the index, and **never** as an
290 integer position along the index).
291 - A list or array of labels, e.g. ``['a', 'b', 'c']``.
292 - A slice object with labels, e.g. ``'a':'f'``.
294 .. warning:: Note that contrary to usual python slices, **both** the
295 start and the stop are included
297 - A boolean array of the same length as the axis being sliced,
298 e.g. ``[True, False, True]``.
299 - An alignable boolean Series. The index of the key will be aligned before
300 masking.
301 - An alignable Index. The Index of the returned selection will be the input.
302 - A ``callable`` function with one argument (the calling Series or
303 DataFrame) and that returns valid output for indexing (one of the above)
305 See more at :ref:`Selection by Label <indexing.label>`.
307 Raises
308 ------
309 KeyError
310 If any items are not found.
311 IndexingError
312 If an indexed key is passed and its index is unalignable to the frame index.
314 See Also
315 --------
316 DataFrame.at : Access a single value for a row/column label pair.
317 DataFrame.iloc : Access group of rows and columns by integer position(s).
318 DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
319 Series/DataFrame.
320 Series.loc : Access group of values using labels.
322 Examples
323 --------
324 **Getting values**
326 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
327 ... index=['cobra', 'viper', 'sidewinder'],
328 ... columns=['max_speed', 'shield'])
329 >>> df
330 max_speed shield
331 cobra 1 2
332 viper 4 5
333 sidewinder 7 8
335 Single label. Note this returns the row as a Series.
337 >>> df.loc['viper']
338 max_speed 4
339 shield 5
340 Name: viper, dtype: int64
342 List of labels. Note using ``[[]]`` returns a DataFrame.
344 >>> df.loc[['viper', 'sidewinder']]
345 max_speed shield
346 viper 4 5
347 sidewinder 7 8
349 Single label for row and column
351 >>> df.loc['cobra', 'shield']
352 2
354 Slice with labels for row and single label for column. As mentioned
355 above, note that both the start and stop of the slice are included.
357 >>> df.loc['cobra':'viper', 'max_speed']
358 cobra 1
359 viper 4
360 Name: max_speed, dtype: int64
362 Boolean list with the same length as the row axis
364 >>> df.loc[[False, False, True]]
365 max_speed shield
366 sidewinder 7 8
368 Alignable boolean Series:
370 >>> df.loc[pd.Series([False, True, False],
371 ... index=['viper', 'sidewinder', 'cobra'])]
372 max_speed shield
373 sidewinder 7 8
375 Index (same behavior as ``df.reindex``)
377 >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]
378 max_speed shield
379 foo
380 cobra 1 2
381 viper 4 5
383 Conditional that returns a boolean Series
385 >>> df.loc[df['shield'] > 6]
386 max_speed shield
387 sidewinder 7 8
389 Conditional that returns a boolean Series with column labels specified
391 >>> df.loc[df['shield'] > 6, ['max_speed']]
392 max_speed
393 sidewinder 7
395 Callable that returns a boolean Series
397 >>> df.loc[lambda df: df['shield'] == 8]
398 max_speed shield
399 sidewinder 7 8
401 **Setting values**
403 Set value for all items matching the list of labels
405 >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
406 >>> df
407 max_speed shield
408 cobra 1 2
409 viper 4 50
410 sidewinder 7 50
412 Set value for an entire row
414 >>> df.loc['cobra'] = 10
415 >>> df
416 max_speed shield
417 cobra 10 10
418 viper 4 50
419 sidewinder 7 50
421 Set value for an entire column
423 >>> df.loc[:, 'max_speed'] = 30
424 >>> df
425 max_speed shield
426 cobra 30 10
427 viper 30 50
428 sidewinder 30 50
430 Set value for rows matching callable condition
432 >>> df.loc[df['shield'] > 35] = 0
433 >>> df
434 max_speed shield
435 cobra 30 10
436 viper 0 0
437 sidewinder 0 0
439 **Getting values on a DataFrame with an index that has integer labels**
441 Another example using integers for the index
443 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
444 ... index=[7, 8, 9], columns=['max_speed', 'shield'])
445 >>> df
446 max_speed shield
447 7 1 2
448 8 4 5
449 9 7 8
451 Slice with integer labels for rows. As mentioned above, note that both
452 the start and stop of the slice are included.
454 >>> df.loc[7:9]
455 max_speed shield
456 7 1 2
457 8 4 5
458 9 7 8
460 **Getting values with a MultiIndex**
462 A number of examples using a DataFrame with a MultiIndex
464 >>> tuples = [
465 ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
466 ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
467 ... ('viper', 'mark ii'), ('viper', 'mark iii')
468 ... ]
469 >>> index = pd.MultiIndex.from_tuples(tuples)
470 >>> values = [[12, 2], [0, 4], [10, 20],
471 ... [1, 4], [7, 1], [16, 36]]
472 >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
473 >>> df
474 max_speed shield
475 cobra mark i 12 2
476 mark ii 0 4
477 sidewinder mark i 10 20
478 mark ii 1 4
479 viper mark ii 7 1
480 mark iii 16 36
482 Single label. Note this returns a DataFrame with a single index.
484 >>> df.loc['cobra']
485 max_speed shield
486 mark i 12 2
487 mark ii 0 4
489 Single index tuple. Note this returns a Series.
491 >>> df.loc[('cobra', 'mark ii')]
492 max_speed 0
493 shield 4
494 Name: (cobra, mark ii), dtype: int64
496 Single label for row and column. Similar to passing in a tuple, this
497 returns a Series.
499 >>> df.loc['cobra', 'mark i']
500 max_speed 12
501 shield 2
502 Name: (cobra, mark i), dtype: int64
504 Single tuple. Note using ``[[]]`` returns a DataFrame.
506 >>> df.loc[[('cobra', 'mark ii')]]
507 max_speed shield
508 cobra mark ii 0 4
510 Single tuple for the index with a single label for the column
512 >>> df.loc[('cobra', 'mark i'), 'shield']
513 2
515 Slice from index tuple to single label
517 >>> df.loc[('cobra', 'mark i'):'viper']
518 max_speed shield
519 cobra mark i 12 2
520 mark ii 0 4
521 sidewinder mark i 10 20
522 mark ii 1 4
523 viper mark ii 7 1
524 mark iii 16 36
526 Slice from index tuple to index tuple
528 >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
529 max_speed shield
530 cobra mark i 12 2
531 mark ii 0 4
532 sidewinder mark i 10 20
533 mark ii 1 4
534 viper mark ii 7 1
536 Please see the :ref:`user guide<advanced.advanced_hierarchical>`
537 for more details and explanations of advanced indexing.
538 """
539 return _LocIndexer("loc", self)
541 @property
542 def at(self) -> _AtIndexer:
543 """
544 Access a single value for a row/column label pair.
546 Similar to ``loc``, in that both provide label-based lookups. Use
547 ``at`` if you only need to get or set a single value in a DataFrame
548 or Series.
550 Raises
551 ------
552 KeyError
553 * If getting a value and 'label' does not exist in a DataFrame or
554 Series.
555 ValueError
556 * If row/column label pair is not a tuple or if any label from
557 the pair is not a scalar for DataFrame.
558 * If label is list-like (*excluding* NamedTuple) for Series.
560 See Also
561 --------
562 DataFrame.at : Access a single value for a row/column pair by label.
563 DataFrame.iat : Access a single value for a row/column pair by integer
564 position.
565 DataFrame.loc : Access a group of rows and columns by label(s).
566 DataFrame.iloc : Access a group of rows and columns by integer
567 position(s).
568 Series.at : Access a single value by label.
569 Series.iat : Access a single value by integer position.
570 Series.loc : Access a group of rows by label(s).
571 Series.iloc : Access a group of rows by integer position(s).
573 Notes
574 -----
575 See :ref:`Fast scalar value getting and setting <indexing.basics.get_value>`
576 for more details.
578 Examples
579 --------
580 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
581 ... index=[4, 5, 6], columns=['A', 'B', 'C'])
582 >>> df
583 A B C
584 4 0 2 3
585 5 0 4 1
586 6 10 20 30
588 Get value at specified row/column pair
590 >>> df.at[4, 'B']
591 2
593 Set value at specified row/column pair
595 >>> df.at[4, 'B'] = 10
596 >>> df.at[4, 'B']
597 10
599 Get value within a Series
601 >>> df.loc[5].at['B']
602 4
603 """
604 return _AtIndexer("at", self)
606 @property
607 def iat(self) -> _iAtIndexer:
608 """
609 Access a single value for a row/column pair by integer position.
611 Similar to ``iloc``, in that both provide integer-based lookups. Use
612 ``iat`` if you only need to get or set a single value in a DataFrame
613 or Series.
615 Raises
616 ------
617 IndexError
618 When integer position is out of bounds.
620 See Also
621 --------
622 DataFrame.at : Access a single value for a row/column label pair.
623 DataFrame.loc : Access a group of rows and columns by label(s).
624 DataFrame.iloc : Access a group of rows and columns by integer position(s).
626 Examples
627 --------
628 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
629 ... columns=['A', 'B', 'C'])
630 >>> df
631 A B C
632 0 0 2 3
633 1 0 4 1
634 2 10 20 30
636 Get value at specified row/column pair
638 >>> df.iat[1, 2]
639 1
641 Set value at specified row/column pair
643 >>> df.iat[1, 2] = 10
644 >>> df.iat[1, 2]
645 10
647 Get value within a series
649 >>> df.loc[0].iat[1]
650 2
651 """
652 return _iAtIndexer("iat", self)
655class _LocationIndexer(NDFrameIndexerBase):
656 _valid_types: str
657 axis: int | None = None
659 # sub-classes need to set _takeable
660 _takeable: bool
662 @final
663 def __call__(self: _LocationIndexerT, axis=None) -> _LocationIndexerT:
664 # we need to return a copy of ourselves
665 new_self = type(self)(self.name, self.obj)
667 if axis is not None:
668 axis = self.obj._get_axis_number(axis)
669 new_self.axis = axis
670 return new_self
672 def _get_setitem_indexer(self, key):
673 """
674 Convert a potentially-label-based key into a positional indexer.
675 """
676 if self.name == "loc":
677 # always holds here bc iloc overrides _get_setitem_indexer
678 self._ensure_listlike_indexer(key)
680 if isinstance(key, tuple):
681 for x in key:
682 check_deprecated_indexers(x)
684 if self.axis is not None:
685 key = _tupleize_axis_indexer(self.ndim, self.axis, key)
687 ax = self.obj._get_axis(0)
689 if isinstance(ax, MultiIndex) and self.name != "iloc" and is_hashable(key):
690 with suppress(KeyError, InvalidIndexError):
691 # TypeError e.g. passed a bool
692 return ax.get_loc(key)
694 if isinstance(key, tuple):
695 with suppress(IndexingError):
696 # suppress "Too many indexers"
697 return self._convert_tuple(key)
699 if isinstance(key, range):
700 # GH#45479 test_loc_setitem_range_key
701 key = list(key)
703 return self._convert_to_indexer(key, axis=0)
705 @final
706 def _maybe_mask_setitem_value(self, indexer, value):
707 """
708 If we have obj.iloc[mask] = series_or_frame and series_or_frame has the
709 same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],
710 similar to Series.__setitem__.
712 Note this is only for loc, not iloc.
713 """
715 if (
716 isinstance(indexer, tuple)
717 and len(indexer) == 2
718 and isinstance(value, (ABCSeries, ABCDataFrame))
719 ):
720 pi, icols = indexer
721 ndim = value.ndim
722 if com.is_bool_indexer(pi) and len(value) == len(pi):
723 newkey = pi.nonzero()[0]
725 if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:
726 # e.g. test_loc_setitem_boolean_mask_allfalse
727 if len(newkey) == 0:
728 # FIXME: kludge for test_loc_setitem_boolean_mask_allfalse
729 # TODO(GH#45333): may be fixed when deprecation is enforced
731 value = value.iloc[:0]
732 else:
733 # test_loc_setitem_ndframe_values_alignment
734 value = self.obj.iloc._align_series(indexer, value)
735 indexer = (newkey, icols)
737 elif (
738 isinstance(icols, np.ndarray)
739 and icols.dtype.kind == "i"
740 and len(icols) == 1
741 ):
742 if ndim == 1:
743 # We implicitly broadcast, though numpy does not, see
744 # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825
745 if len(newkey) == 0:
746 # FIXME: kludge for
747 # test_setitem_loc_only_false_indexer_dtype_changed
748 # TODO(GH#45333): may be fixed when deprecation is enforced
749 value = value.iloc[:0]
750 else:
751 # test_loc_setitem_ndframe_values_alignment
752 value = self.obj.iloc._align_series(indexer, value)
753 indexer = (newkey, icols)
755 elif ndim == 2 and value.shape[1] == 1:
756 if len(newkey) == 0:
757 # FIXME: kludge for
758 # test_loc_setitem_all_false_boolean_two_blocks
759 # TODO(GH#45333): may be fixed when deprecation is enforced
760 value = value.iloc[:0]
761 else:
762 # test_loc_setitem_ndframe_values_alignment
763 value = self.obj.iloc._align_frame(indexer, value)
764 indexer = (newkey, icols)
765 elif com.is_bool_indexer(indexer):
766 indexer = indexer.nonzero()[0]
768 return indexer, value
770 @final
771 def _ensure_listlike_indexer(self, key, axis=None, value=None):
772 """
773 Ensure that a list-like of column labels are all present by adding them if
774 they do not already exist.
776 Parameters
777 ----------
778 key : list-like of column labels
779 Target labels.
780 axis : key axis if known
781 """
782 column_axis = 1
784 # column only exists in 2-dimensional DataFrame
785 if self.ndim != 2:
786 return
788 if isinstance(key, tuple) and len(key) > 1:
789 # key may be a tuple if we are .loc
790 # if length of key is > 1 set key to column part
791 key = key[column_axis]
792 axis = column_axis
794 if (
795 axis == column_axis
796 and not isinstance(self.obj.columns, MultiIndex)
797 and is_list_like_indexer(key)
798 and not com.is_bool_indexer(key)
799 and all(is_hashable(k) for k in key)
800 ):
801 # GH#38148
802 keys = self.obj.columns.union(key, sort=False)
804 self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)
806 @final
807 def __setitem__(self, key, value) -> None:
808 check_deprecated_indexers(key)
809 if isinstance(key, tuple):
810 key = tuple(list(x) if is_iterator(x) else x for x in key)
811 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
812 else:
813 key = com.apply_if_callable(key, self.obj)
814 indexer = self._get_setitem_indexer(key)
815 self._has_valid_setitem_indexer(key)
817 iloc = self if self.name == "iloc" else self.obj.iloc
818 iloc._setitem_with_indexer(indexer, value, self.name)
820 def _validate_key(self, key, axis: int):
821 """
822 Ensure that key is valid for current indexer.
824 Parameters
825 ----------
826 key : scalar, slice or list-like
827 Key requested.
828 axis : int
829 Dimension on which the indexing is being made.
831 Raises
832 ------
833 TypeError
834 If the key (or some element of it) has wrong type.
835 IndexError
836 If the key (or some element of it) is out of bounds.
837 KeyError
838 If the key was not found.
839 """
840 raise AbstractMethodError(self)
842 @final
843 def _expand_ellipsis(self, tup: tuple) -> tuple:
844 """
845 If a tuple key includes an Ellipsis, replace it with an appropriate
846 number of null slices.
847 """
848 if any(x is Ellipsis for x in tup):
849 if tup.count(Ellipsis) > 1:
850 raise IndexingError(_one_ellipsis_message)
852 if len(tup) == self.ndim:
853 # It is unambiguous what axis this Ellipsis is indexing,
854 # treat as a single null slice.
855 i = tup.index(Ellipsis)
856 # FIXME: this assumes only one Ellipsis
857 new_key = tup[:i] + (_NS,) + tup[i + 1 :]
858 return new_key
860 # TODO: other cases? only one test gets here, and that is covered
861 # by _validate_key_length
862 return tup
864 @final
865 def _validate_tuple_indexer(self, key: tuple) -> tuple:
866 """
867 Check the key for valid keys across my indexer.
868 """
869 key = self._validate_key_length(key)
870 key = self._expand_ellipsis(key)
871 for i, k in enumerate(key):
872 try:
873 self._validate_key(k, i)
874 except ValueError as err:
875 raise ValueError(
876 "Location based indexing can only have "
877 f"[{self._valid_types}] types"
878 ) from err
879 return key
881 @final
882 def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
883 """
884 Returns
885 -------
886 bool
887 """
888 if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
889 return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
890 return False
892 @final
893 def _convert_tuple(self, key: tuple) -> tuple:
894 # Note: we assume _tupleize_axis_indexer has been called, if necessary.
895 self._validate_key_length(key)
896 keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]
897 return tuple(keyidx)
899 @final
900 def _validate_key_length(self, key: tuple) -> tuple:
901 if len(key) > self.ndim:
902 if key[0] is Ellipsis:
903 # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]
904 key = key[1:]
905 if Ellipsis in key:
906 raise IndexingError(_one_ellipsis_message)
907 return self._validate_key_length(key)
908 raise IndexingError("Too many indexers")
909 return key
911 @final
912 def _getitem_tuple_same_dim(self, tup: tuple):
913 """
914 Index with indexers that should return an object of the same dimension
915 as self.obj.
917 This is only called after a failed call to _getitem_lowerdim.
918 """
919 retval = self.obj
920 for i, key in enumerate(tup):
921 if com.is_null_slice(key):
922 continue
924 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
925 # We should never have retval.ndim < self.ndim, as that should
926 # be handled by the _getitem_lowerdim call above.
927 assert retval.ndim == self.ndim
929 return retval
931 @final
932 def _getitem_lowerdim(self, tup: tuple):
934 # we can directly get the axis result since the axis is specified
935 if self.axis is not None:
936 axis = self.obj._get_axis_number(self.axis)
937 return self._getitem_axis(tup, axis=axis)
939 # we may have a nested tuples indexer here
940 if self._is_nested_tuple_indexer(tup):
941 return self._getitem_nested_tuple(tup)
943 # we maybe be using a tuple to represent multiple dimensions here
944 ax0 = self.obj._get_axis(0)
945 # ...but iloc should handle the tuple as simple integer-location
946 # instead of checking it as multiindex representation (GH 13797)
947 if (
948 isinstance(ax0, MultiIndex)
949 and self.name != "iloc"
950 and not any(isinstance(x, slice) for x in tup)
951 ):
952 # Note: in all extant test cases, replacing the slice condition with
953 # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)`
954 # is equivalent.
955 # (see the other place where we call _handle_lowerdim_multi_index_axis0)
956 with suppress(IndexingError):
957 # error "_LocationIndexer" has no attribute
958 # "_handle_lowerdim_multi_index_axis0"
959 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup)
961 tup = self._validate_key_length(tup)
963 for i, key in enumerate(tup):
964 if is_label_like(key):
965 # We don't need to check for tuples here because those are
966 # caught by the _is_nested_tuple_indexer check above.
967 section = self._getitem_axis(key, axis=i)
969 # We should never have a scalar section here, because
970 # _getitem_lowerdim is only called after a check for
971 # is_scalar_access, which that would be.
972 if section.ndim == self.ndim:
973 # we're in the middle of slicing through a MultiIndex
974 # revise the key wrt to `section` by inserting an _NS
975 new_key = tup[:i] + (_NS,) + tup[i + 1 :]
977 else:
978 # Note: the section.ndim == self.ndim check above
979 # rules out having DataFrame here, so we dont need to worry
980 # about transposing.
981 new_key = tup[:i] + tup[i + 1 :]
983 if len(new_key) == 1:
984 new_key = new_key[0]
986 # Slices should return views, but calling iloc/loc with a null
987 # slice returns a new object.
988 if com.is_null_slice(new_key):
989 return section
990 # This is an elided recursive call to iloc/loc
991 return getattr(section, self.name)[new_key]
993 raise IndexingError("not applicable")
995 @final
996 def _getitem_nested_tuple(self, tup: tuple):
997 # we have a nested tuple so have at least 1 multi-index level
998 # we should be able to match up the dimensionality here
1000 for key in tup:
1001 check_deprecated_indexers(key)
1003 # we have too many indexers for our dim, but have at least 1
1004 # multi-index dimension, try to see if we have something like
1005 # a tuple passed to a series with a multi-index
1006 if len(tup) > self.ndim:
1007 if self.name != "loc":
1008 # This should never be reached, but let's be explicit about it
1009 raise ValueError("Too many indices") # pragma: no cover
1010 if all(is_hashable(x) or com.is_null_slice(x) for x in tup):
1011 # GH#10521 Series should reduce MultiIndex dimensions instead of
1012 # DataFrame, IndexingError is not raised when slice(None,None,None)
1013 # with one row.
1014 with suppress(IndexingError):
1015 # error "_LocationIndexer" has no attribute
1016 # "_handle_lowerdim_multi_index_axis0"
1017 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(
1018 tup
1019 )
1020 elif isinstance(self.obj, ABCSeries) and any(
1021 isinstance(k, tuple) for k in tup
1022 ):
1023 # GH#35349 Raise if tuple in tuple for series
1024 # Do this after the all-hashable-or-null-slice check so that
1025 # we are only getting non-hashable tuples, in particular ones
1026 # that themselves contain a slice entry
1027 # See test_loc_series_getitem_too_many_dimensions
1028 raise IndexingError("Too many indexers")
1030 # this is a series with a multi-index specified a tuple of
1031 # selectors
1032 axis = self.axis or 0
1033 return self._getitem_axis(tup, axis=axis)
1035 # handle the multi-axis by taking sections and reducing
1036 # this is iterative
1037 obj = self.obj
1038 # GH#41369 Loop in reverse order ensures indexing along columns before rows
1039 # which selects only necessary blocks which avoids dtype conversion if possible
1040 axis = len(tup) - 1
1041 for key in tup[::-1]:
1043 if com.is_null_slice(key):
1044 axis -= 1
1045 continue
1047 obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
1048 axis -= 1
1050 # if we have a scalar, we are done
1051 if is_scalar(obj) or not hasattr(obj, "ndim"):
1052 break
1054 return obj
1056 def _convert_to_indexer(self, key, axis: int):
1057 raise AbstractMethodError(self)
1059 @final
1060 def __getitem__(self, key):
1061 check_deprecated_indexers(key)
1062 if type(key) is tuple:
1063 key = tuple(list(x) if is_iterator(x) else x for x in key)
1064 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
1065 if self._is_scalar_access(key):
1066 return self.obj._get_value(*key, takeable=self._takeable)
1067 return self._getitem_tuple(key)
1068 else:
1069 # we by definition only have the 0th axis
1070 axis = self.axis or 0
1072 maybe_callable = com.apply_if_callable(key, self.obj)
1073 return self._getitem_axis(maybe_callable, axis=axis)
1075 def _is_scalar_access(self, key: tuple):
1076 raise NotImplementedError()
1078 def _getitem_tuple(self, tup: tuple):
1079 raise AbstractMethodError(self)
1081 def _getitem_axis(self, key, axis: int):
1082 raise NotImplementedError()
1084 def _has_valid_setitem_indexer(self, indexer) -> bool:
1085 raise AbstractMethodError(self)
1087 @final
1088 def _getbool_axis(self, key, axis: int):
1089 # caller is responsible for ensuring non-None axis
1090 labels = self.obj._get_axis(axis)
1091 key = check_bool_indexer(labels, key)
1092 inds = key.nonzero()[0]
1093 return self.obj._take_with_is_copy(inds, axis=axis)
1096@doc(IndexingMixin.loc)
1097class _LocIndexer(_LocationIndexer):
1098 _takeable: bool = False
1099 _valid_types = (
1100 "labels (MUST BE IN THE INDEX), slices of labels (BOTH "
1101 "endpoints included! Can be slices of integers if the "
1102 "index is integers), listlike of labels, boolean"
1103 )
1105 # -------------------------------------------------------------------
1106 # Key Checks
1108 @doc(_LocationIndexer._validate_key)
1109 def _validate_key(self, key, axis: int):
1110 # valid for a collection of labels (we check their presence later)
1111 # slice of labels (where start-end in labels)
1112 # slice of integers (only if in the labels)
1113 # boolean not in slice and with boolean index
1114 if isinstance(key, bool) and not (
1115 is_bool_dtype(self.obj._get_axis(axis))
1116 or self.obj._get_axis(axis).dtype.name == "boolean"
1117 ):
1118 raise KeyError(
1119 f"{key}: boolean label can not be used without a boolean index"
1120 )
1122 if isinstance(key, slice) and (
1123 isinstance(key.start, bool) or isinstance(key.stop, bool)
1124 ):
1125 raise TypeError(f"{key}: boolean values can not be used in a slice")
1127 def _has_valid_setitem_indexer(self, indexer) -> bool:
1128 return True
1130 def _is_scalar_access(self, key: tuple) -> bool:
1131 """
1132 Returns
1133 -------
1134 bool
1135 """
1136 # this is a shortcut accessor to both .loc and .iloc
1137 # that provide the equivalent access of .at and .iat
1138 # a) avoid getting things via sections and (to minimize dtype changes)
1139 # b) provide a performant path
1140 if len(key) != self.ndim:
1141 return False
1143 for i, k in enumerate(key):
1144 if not is_scalar(k):
1145 return False
1147 ax = self.obj.axes[i]
1148 if isinstance(ax, MultiIndex):
1149 return False
1151 if isinstance(k, str) and ax._supports_partial_string_indexing:
1152 # partial string indexing, df.loc['2000', 'A']
1153 # should not be considered scalar
1154 return False
1156 if not ax._index_as_unique:
1157 return False
1159 return True
1161 # -------------------------------------------------------------------
1162 # MultiIndex Handling
1164 def _multi_take_opportunity(self, tup: tuple) -> bool:
1165 """
1166 Check whether there is the possibility to use ``_multi_take``.
1168 Currently the limit is that all axes being indexed, must be indexed with
1169 list-likes.
1171 Parameters
1172 ----------
1173 tup : tuple
1174 Tuple of indexers, one per axis.
1176 Returns
1177 -------
1178 bool
1179 Whether the current indexing,
1180 can be passed through `_multi_take`.
1181 """
1182 if not all(is_list_like_indexer(x) for x in tup):
1183 return False
1185 # just too complicated
1186 return not any(com.is_bool_indexer(x) for x in tup)
1188 def _multi_take(self, tup: tuple):
1189 """
1190 Create the indexers for the passed tuple of keys, and
1191 executes the take operation. This allows the take operation to be
1192 executed all at once, rather than once for each dimension.
1193 Improving efficiency.
1195 Parameters
1196 ----------
1197 tup : tuple
1198 Tuple of indexers, one per axis.
1200 Returns
1201 -------
1202 values: same type as the object being indexed
1203 """
1204 # GH 836
1205 d = {
1206 axis: self._get_listlike_indexer(key, axis)
1207 for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)
1208 }
1209 return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)
1211 # -------------------------------------------------------------------
1213 def _getitem_iterable(self, key, axis: int):
1214 """
1215 Index current object with an iterable collection of keys.
1217 Parameters
1218 ----------
1219 key : iterable
1220 Targeted labels.
1221 axis : int
1222 Dimension on which the indexing is being made.
1224 Raises
1225 ------
1226 KeyError
1227 If no key was found. Will change in the future to raise if not all
1228 keys were found.
1230 Returns
1231 -------
1232 scalar, DataFrame, or Series: indexed value(s).
1233 """
1234 # we assume that not com.is_bool_indexer(key), as that is
1235 # handled before we get here.
1236 self._validate_key(key, axis)
1238 # A collection of keys
1239 keyarr, indexer = self._get_listlike_indexer(key, axis)
1240 return self.obj._reindex_with_indexers(
1241 {axis: [keyarr, indexer]}, copy=True, allow_dups=True
1242 )
1244 def _getitem_tuple(self, tup: tuple):
1245 with suppress(IndexingError):
1246 tup = self._expand_ellipsis(tup)
1247 return self._getitem_lowerdim(tup)
1249 # no multi-index, so validate all of the indexers
1250 tup = self._validate_tuple_indexer(tup)
1252 # ugly hack for GH #836
1253 if self._multi_take_opportunity(tup):
1254 return self._multi_take(tup)
1256 return self._getitem_tuple_same_dim(tup)
1258 def _get_label(self, label, axis: int):
1259 # GH#5567 this will fail if the label is not present in the axis.
1260 return self.obj.xs(label, axis=axis)
1262 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
1263 # we have an axis0 multi-index, handle or raise
1264 axis = self.axis or 0
1265 try:
1266 # fast path for series or for tup devoid of slices
1267 return self._get_label(tup, axis=axis)
1269 except KeyError as ek:
1270 # raise KeyError if number of indexers match
1271 # else IndexingError will be raised
1272 if self.ndim < len(tup) <= self.obj.index.nlevels:
1273 raise ek
1274 raise IndexingError("No label returned") from ek
1276 def _getitem_axis(self, key, axis: int):
1277 key = item_from_zerodim(key)
1278 if is_iterator(key):
1279 key = list(key)
1280 if key is Ellipsis:
1281 key = slice(None)
1283 labels = self.obj._get_axis(axis)
1285 if isinstance(key, tuple) and isinstance(labels, MultiIndex):
1286 key = tuple(key)
1288 if isinstance(key, slice):
1289 self._validate_key(key, axis)
1290 return self._get_slice_axis(key, axis=axis)
1291 elif com.is_bool_indexer(key):
1292 return self._getbool_axis(key, axis=axis)
1293 elif is_list_like_indexer(key):
1295 # an iterable multi-selection
1296 if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
1298 if hasattr(key, "ndim") and key.ndim > 1:
1299 raise ValueError("Cannot index with multidimensional key")
1301 return self._getitem_iterable(key, axis=axis)
1303 # nested tuple slicing
1304 if is_nested_tuple(key, labels):
1305 locs = labels.get_locs(key)
1306 indexer = [slice(None)] * self.ndim
1307 indexer[axis] = locs
1308 return self.obj.iloc[tuple(indexer)]
1310 # fall thru to straight lookup
1311 self._validate_key(key, axis)
1312 return self._get_label(key, axis=axis)
1314 def _get_slice_axis(self, slice_obj: slice, axis: int):
1315 """
1316 This is pretty simple as we just have to deal with labels.
1317 """
1318 # caller is responsible for ensuring non-None axis
1319 obj = self.obj
1320 if not need_slice(slice_obj):
1321 return obj.copy(deep=False)
1323 labels = obj._get_axis(axis)
1324 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
1326 if isinstance(indexer, slice):
1327 return self.obj._slice(indexer, axis=axis)
1328 else:
1329 # DatetimeIndex overrides Index.slice_indexer and may
1330 # return a DatetimeIndex instead of a slice object.
1331 return self.obj.take(indexer, axis=axis)
1333 def _convert_to_indexer(self, key, axis: int):
1334 """
1335 Convert indexing key into something we can use to do actual fancy
1336 indexing on a ndarray.
1338 Examples
1339 ix[:5] -> slice(0, 5)
1340 ix[[1,2,3]] -> [1,2,3]
1341 ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)
1343 Going by Zen of Python?
1344 'In the face of ambiguity, refuse the temptation to guess.'
1345 raise AmbiguousIndexError with integer labels?
1346 - No, prefer label-based indexing
1347 """
1348 labels = self.obj._get_axis(axis)
1350 if isinstance(key, slice):
1351 return labels._convert_slice_indexer(key, kind="loc")
1353 if (
1354 isinstance(key, tuple)
1355 and not isinstance(labels, MultiIndex)
1356 and self.ndim < 2
1357 and len(key) > 1
1358 ):
1359 raise IndexingError("Too many indexers")
1361 if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)):
1362 # Otherwise get_loc will raise InvalidIndexError
1364 # if we are a label return me
1365 try:
1366 return labels.get_loc(key)
1367 except LookupError:
1368 if isinstance(key, tuple) and isinstance(labels, MultiIndex):
1369 if len(key) == labels.nlevels:
1370 return {"key": key}
1371 raise
1372 except InvalidIndexError:
1373 # GH35015, using datetime as column indices raises exception
1374 if not isinstance(labels, MultiIndex):
1375 raise
1376 except ValueError:
1377 if not is_integer(key):
1378 raise
1379 return {"key": key}
1381 if is_nested_tuple(key, labels):
1382 if self.ndim == 1 and any(isinstance(k, tuple) for k in key):
1383 # GH#35349 Raise if tuple in tuple for series
1384 raise IndexingError("Too many indexers")
1385 return labels.get_locs(key)
1387 elif is_list_like_indexer(key):
1389 if is_iterator(key):
1390 key = list(key)
1392 if com.is_bool_indexer(key):
1393 key = check_bool_indexer(labels, key)
1394 return key
1395 else:
1396 return self._get_listlike_indexer(key, axis)[1]
1397 else:
1398 try:
1399 return labels.get_loc(key)
1400 except LookupError:
1401 # allow a not found key only if we are a setter
1402 if not is_list_like_indexer(key):
1403 return {"key": key}
1404 raise
1406 def _get_listlike_indexer(self, key, axis: int):
1407 """
1408 Transform a list-like of keys into a new index and an indexer.
1410 Parameters
1411 ----------
1412 key : list-like
1413 Targeted labels.
1414 axis: int
1415 Dimension on which the indexing is being made.
1417 Raises
1418 ------
1419 KeyError
1420 If at least one key was requested but none was found.
1422 Returns
1423 -------
1424 keyarr: Index
1425 New index (coinciding with 'key' if the axis is unique).
1426 values : array-like
1427 Indexer for the return object, -1 denotes keys not found.
1428 """
1429 ax = self.obj._get_axis(axis)
1430 axis_name = self.obj._get_axis_name(axis)
1432 keyarr, indexer = ax._get_indexer_strict(key, axis_name)
1434 return keyarr, indexer
1437@doc(IndexingMixin.iloc)
1438class _iLocIndexer(_LocationIndexer):
1439 _valid_types = (
1440 "integer, integer slice (START point is INCLUDED, END "
1441 "point is EXCLUDED), listlike of integers, boolean array"
1442 )
1443 _takeable = True
1445 # -------------------------------------------------------------------
1446 # Key Checks
1448 def _validate_key(self, key, axis: int):
1449 if com.is_bool_indexer(key):
1450 if hasattr(key, "index") and isinstance(key.index, Index):
1451 if key.index.inferred_type == "integer":
1452 raise NotImplementedError(
1453 "iLocation based boolean "
1454 "indexing on an integer type "
1455 "is not available"
1456 )
1457 raise ValueError(
1458 "iLocation based boolean indexing cannot use "
1459 "an indexable as a mask"
1460 )
1461 return
1463 if isinstance(key, slice):
1464 return
1465 elif is_integer(key):
1466 self._validate_integer(key, axis)
1467 elif isinstance(key, tuple):
1468 # a tuple should already have been caught by this point
1469 # so don't treat a tuple as a valid indexer
1470 raise IndexingError("Too many indexers")
1471 elif is_list_like_indexer(key):
1472 arr = np.array(key)
1473 len_axis = len(self.obj._get_axis(axis))
1475 # check that the key has a numeric dtype
1476 if not is_numeric_dtype(arr.dtype):
1477 raise IndexError(f".iloc requires numeric indexers, got {arr}")
1479 # check that the key does not exceed the maximum size of the index
1480 if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):
1481 raise IndexError("positional indexers are out-of-bounds")
1482 else:
1483 raise ValueError(f"Can only index by location with a [{self._valid_types}]")
1485 def _has_valid_setitem_indexer(self, indexer) -> bool:
1486 """
1487 Validate that a positional indexer cannot enlarge its target
1488 will raise if needed, does not modify the indexer externally.
1490 Returns
1491 -------
1492 bool
1493 """
1494 if isinstance(indexer, dict):
1495 raise IndexError("iloc cannot enlarge its target object")
1497 if isinstance(indexer, ABCDataFrame):
1498 warnings.warn(
1499 "DataFrame indexer for .iloc is deprecated and will be removed in "
1500 "a future version.\n"
1501 "consider using .loc with a DataFrame indexer for automatic alignment.",
1502 FutureWarning,
1503 stacklevel=find_stack_level(),
1504 )
1506 if not isinstance(indexer, tuple):
1507 indexer = _tuplify(self.ndim, indexer)
1509 for ax, i in zip(self.obj.axes, indexer):
1510 if isinstance(i, slice):
1511 # should check the stop slice?
1512 pass
1513 elif is_list_like_indexer(i):
1514 # should check the elements?
1515 pass
1516 elif is_integer(i):
1517 if i >= len(ax):
1518 raise IndexError("iloc cannot enlarge its target object")
1519 elif isinstance(i, dict):
1520 raise IndexError("iloc cannot enlarge its target object")
1522 return True
1524 def _is_scalar_access(self, key: tuple) -> bool:
1525 """
1526 Returns
1527 -------
1528 bool
1529 """
1530 # this is a shortcut accessor to both .loc and .iloc
1531 # that provide the equivalent access of .at and .iat
1532 # a) avoid getting things via sections and (to minimize dtype changes)
1533 # b) provide a performant path
1534 if len(key) != self.ndim:
1535 return False
1537 return all(is_integer(k) for k in key)
1539 def _validate_integer(self, key: int, axis: int) -> None:
1540 """
1541 Check that 'key' is a valid position in the desired axis.
1543 Parameters
1544 ----------
1545 key : int
1546 Requested position.
1547 axis : int
1548 Desired axis.
1550 Raises
1551 ------
1552 IndexError
1553 If 'key' is not a valid position in axis 'axis'.
1554 """
1555 len_axis = len(self.obj._get_axis(axis))
1556 if key >= len_axis or key < -len_axis:
1557 raise IndexError("single positional indexer is out-of-bounds")
1559 # -------------------------------------------------------------------
1561 def _getitem_tuple(self, tup: tuple):
1563 tup = self._validate_tuple_indexer(tup)
1564 with suppress(IndexingError):
1565 return self._getitem_lowerdim(tup)
1567 return self._getitem_tuple_same_dim(tup)
1569 def _get_list_axis(self, key, axis: int):
1570 """
1571 Return Series values by list or array of integers.
1573 Parameters
1574 ----------
1575 key : list-like positional indexer
1576 axis : int
1578 Returns
1579 -------
1580 Series object
1582 Notes
1583 -----
1584 `axis` can only be zero.
1585 """
1586 try:
1587 return self.obj._take_with_is_copy(key, axis=axis)
1588 except IndexError as err:
1589 # re-raise with different error message
1590 raise IndexError("positional indexers are out-of-bounds") from err
1592 def _getitem_axis(self, key, axis: int):
1593 if key is Ellipsis:
1594 key = slice(None)
1595 elif isinstance(key, ABCDataFrame):
1596 raise IndexError(
1597 "DataFrame indexer is not allowed for .iloc\n"
1598 "Consider using .loc for automatic alignment."
1599 )
1601 if isinstance(key, slice):
1602 return self._get_slice_axis(key, axis=axis)
1604 if is_iterator(key):
1605 key = list(key)
1607 if isinstance(key, list):
1608 key = np.asarray(key)
1610 if com.is_bool_indexer(key):
1611 self._validate_key(key, axis)
1612 return self._getbool_axis(key, axis=axis)
1614 # a list of integers
1615 elif is_list_like_indexer(key):
1616 return self._get_list_axis(key, axis=axis)
1618 # a single integer
1619 else:
1620 key = item_from_zerodim(key)
1621 if not is_integer(key):
1622 raise TypeError("Cannot index by location index with a non-integer key")
1624 # validate the location
1625 self._validate_integer(key, axis)
1627 return self.obj._ixs(key, axis=axis)
1629 def _get_slice_axis(self, slice_obj: slice, axis: int):
1630 # caller is responsible for ensuring non-None axis
1631 obj = self.obj
1633 if not need_slice(slice_obj):
1634 return obj.copy(deep=False)
1636 labels = obj._get_axis(axis)
1637 labels._validate_positional_slice(slice_obj)
1638 return self.obj._slice(slice_obj, axis=axis)
1640 def _convert_to_indexer(self, key, axis: int):
1641 """
1642 Much simpler as we only have to deal with our valid types.
1643 """
1644 return key
1646 def _get_setitem_indexer(self, key):
1647 # GH#32257 Fall through to let numpy do validation
1648 if is_iterator(key):
1649 key = list(key)
1651 if self.axis is not None:
1652 key = _tupleize_axis_indexer(self.ndim, self.axis, key)
1654 return key
1656 # -------------------------------------------------------------------
1658 def _setitem_with_indexer(self, indexer, value, name="iloc"):
1659 """
1660 _setitem_with_indexer is for setting values on a Series/DataFrame
1661 using positional indexers.
1663 If the relevant keys are not present, the Series/DataFrame may be
1664 expanded.
1666 This method is currently broken when dealing with non-unique Indexes,
1667 since it goes from positional indexers back to labels when calling
1668 BlockManager methods, see GH#12991, GH#22046, GH#15686.
1669 """
1670 info_axis = self.obj._info_axis_number
1672 # maybe partial set
1673 take_split_path = not self.obj._mgr.is_single_block
1675 # if there is only one block/type, still have to take split path
1676 # unless the block is one-dimensional or it can hold the value
1677 if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
1678 # in case of dict, keys are indices
1679 val = list(value.values()) if isinstance(value, dict) else value
1680 arr = self.obj._mgr.arrays[0]
1681 take_split_path = not can_hold_element(
1682 arr, extract_array(val, extract_numpy=True)
1683 )
1685 # if we have any multi-indexes that have non-trivial slices
1686 # (not null slices) then we must take the split path, xref
1687 # GH 10360, GH 27841
1688 if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
1689 for i, ax in zip(indexer, self.obj.axes):
1690 if isinstance(ax, MultiIndex) and not (
1691 is_integer(i) or com.is_null_slice(i)
1692 ):
1693 take_split_path = True
1694 break
1696 if isinstance(indexer, tuple):
1697 nindexer = []
1698 for i, idx in enumerate(indexer):
1699 if isinstance(idx, dict):
1701 # reindex the axis to the new value
1702 # and set inplace
1703 key, _ = convert_missing_indexer(idx)
1705 # if this is the items axes, then take the main missing
1706 # path first
1707 # this correctly sets the dtype and avoids cache issues
1708 # essentially this separates out the block that is needed
1709 # to possibly be modified
1710 if self.ndim > 1 and i == info_axis:
1712 # add the new item, and set the value
1713 # must have all defined axes if we have a scalar
1714 # or a list-like on the non-info axes if we have a
1715 # list-like
1716 if not len(self.obj):
1717 if not is_list_like_indexer(value):
1718 raise ValueError(
1719 "cannot set a frame with no "
1720 "defined index and a scalar"
1721 )
1722 self.obj[key] = value
1723 return
1725 # add a new item with the dtype setup
1726 if com.is_null_slice(indexer[0]):
1727 # We are setting an entire column
1728 self.obj[key] = value
1729 return
1730 elif is_array_like(value):
1731 # GH#42099
1732 arr = extract_array(value, extract_numpy=True)
1733 taker = -1 * np.ones(len(self.obj), dtype=np.intp)
1734 empty_value = algos.take_nd(arr, taker)
1735 if not isinstance(value, ABCSeries):
1736 # if not Series (in which case we need to align),
1737 # we can short-circuit
1738 empty_value[indexer[0]] = arr
1739 self.obj[key] = empty_value
1740 return
1742 self.obj[key] = empty_value
1744 else:
1745 self.obj[key] = infer_fill_value(value)
1747 new_indexer = convert_from_missing_indexer_tuple(
1748 indexer, self.obj.axes
1749 )
1750 self._setitem_with_indexer(new_indexer, value, name)
1752 return
1754 # reindex the axis
1755 # make sure to clear the cache because we are
1756 # just replacing the block manager here
1757 # so the object is the same
1758 index = self.obj._get_axis(i)
1759 labels = index.insert(len(index), key)
1761 # We are expanding the Series/DataFrame values to match
1762 # the length of thenew index `labels`. GH#40096 ensure
1763 # this is valid even if the index has duplicates.
1764 taker = np.arange(len(index) + 1, dtype=np.intp)
1765 taker[-1] = -1
1766 reindexers = {i: (labels, taker)}
1767 new_obj = self.obj._reindex_with_indexers(
1768 reindexers, allow_dups=True
1769 )
1770 self.obj._mgr = new_obj._mgr
1771 self.obj._maybe_update_cacher(clear=True)
1772 self.obj._is_copy = None
1774 nindexer.append(labels.get_loc(key))
1776 else:
1777 nindexer.append(idx)
1779 indexer = tuple(nindexer)
1780 else:
1782 indexer, missing = convert_missing_indexer(indexer)
1784 if missing:
1785 self._setitem_with_indexer_missing(indexer, value)
1786 return
1788 if name == "loc":
1789 # must come after setting of missing
1790 indexer, value = self._maybe_mask_setitem_value(indexer, value)
1792 # align and set the values
1793 if take_split_path:
1794 # We have to operate column-wise
1795 self._setitem_with_indexer_split_path(indexer, value, name)
1796 else:
1797 self._setitem_single_block(indexer, value, name)
1799 def _setitem_with_indexer_split_path(self, indexer, value, name: str):
1800 """
1801 Setitem column-wise.
1802 """
1803 # Above we only set take_split_path to True for 2D cases
1804 assert self.ndim == 2
1806 if not isinstance(indexer, tuple):
1807 indexer = _tuplify(self.ndim, indexer)
1808 if len(indexer) > self.ndim:
1809 raise IndexError("too many indices for array")
1810 if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
1811 raise ValueError(r"Cannot set values with ndim > 2")
1813 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
1814 from pandas import Series
1816 value = self._align_series(indexer, Series(value))
1818 # Ensure we have something we can iterate over
1819 info_axis = indexer[1]
1820 ilocs = self._ensure_iterable_column_indexer(info_axis)
1822 pi = indexer[0]
1823 lplane_indexer = length_of_indexer(pi, self.obj.index)
1824 # lplane_indexer gives the expected length of obj[indexer[0]]
1826 # we need an iterable, with a ndim of at least 1
1827 # eg. don't pass through np.array(0)
1828 if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:
1830 if isinstance(value, ABCDataFrame):
1831 self._setitem_with_indexer_frame_value(indexer, value, name)
1833 elif np.ndim(value) == 2:
1834 self._setitem_with_indexer_2d_value(indexer, value)
1836 elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
1837 # We are setting multiple rows in a single column.
1838 self._setitem_single_column(ilocs[0], value, pi)
1840 elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
1841 # We are trying to set N values into M entries of a single
1842 # column, which is invalid for N != M
1843 # Exclude zero-len for e.g. boolean masking that is all-false
1845 if len(value) == 1 and not is_integer(info_axis):
1846 # This is a case like df.iloc[:3, [1]] = [0]
1847 # where we treat as df.iloc[:3, 1] = 0
1848 return self._setitem_with_indexer((pi, info_axis[0]), value[0])
1850 raise ValueError(
1851 "Must have equal len keys and value "
1852 "when setting with an iterable"
1853 )
1855 elif lplane_indexer == 0 and len(value) == len(self.obj.index):
1856 # We get here in one case via .loc with a all-False mask
1857 pass
1859 elif self._is_scalar_access(indexer) and is_object_dtype(
1860 self.obj.dtypes[ilocs[0]]
1861 ):
1862 # We are setting nested data, only possible for object dtype data
1863 self._setitem_single_column(indexer[1], value, pi)
1865 elif len(ilocs) == len(value):
1866 # We are setting multiple columns in a single row.
1867 for loc, v in zip(ilocs, value):
1868 self._setitem_single_column(loc, v, pi)
1870 elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
1871 # This is a setitem-with-expansion, see
1872 # test_loc_setitem_empty_append_expands_rows_mixed_dtype
1873 # e.g. df = DataFrame(columns=["x", "y"])
1874 # df["x"] = df["x"].astype(np.int64)
1875 # df.loc[:, "x"] = [1, 2, 3]
1876 self._setitem_single_column(ilocs[0], value, pi)
1878 else:
1879 raise ValueError(
1880 "Must have equal len keys and value "
1881 "when setting with an iterable"
1882 )
1884 else:
1886 # scalar value
1887 for loc in ilocs:
1888 self._setitem_single_column(loc, value, pi)
1890 def _setitem_with_indexer_2d_value(self, indexer, value):
1891 # We get here with np.ndim(value) == 2, excluding DataFrame,
1892 # which goes through _setitem_with_indexer_frame_value
1893 pi = indexer[0]
1895 ilocs = self._ensure_iterable_column_indexer(indexer[1])
1897 # GH#7551 Note that this coerces the dtype if we are mixed
1898 value = np.array(value, dtype=object)
1899 if len(ilocs) != value.shape[1]:
1900 raise ValueError(
1901 "Must have equal len keys and value when setting with an ndarray"
1902 )
1904 for i, loc in enumerate(ilocs):
1905 # setting with a list, re-coerces
1906 self._setitem_single_column(loc, value[:, i].tolist(), pi)
1908 def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
1909 ilocs = self._ensure_iterable_column_indexer(indexer[1])
1911 sub_indexer = list(indexer)
1912 pi = indexer[0]
1914 multiindex_indexer = isinstance(self.obj.columns, MultiIndex)
1916 unique_cols = value.columns.is_unique
1918 # We do not want to align the value in case of iloc GH#37728
1919 if name == "iloc":
1920 for i, loc in enumerate(ilocs):
1921 val = value.iloc[:, i]
1922 self._setitem_single_column(loc, val, pi)
1924 elif not unique_cols and value.columns.equals(self.obj.columns):
1925 # We assume we are already aligned, see
1926 # test_iloc_setitem_frame_duplicate_columns_multiple_blocks
1927 for loc in ilocs:
1928 item = self.obj.columns[loc]
1929 if item in value:
1930 sub_indexer[1] = item
1931 val = self._align_series(
1932 tuple(sub_indexer),
1933 value.iloc[:, loc],
1934 multiindex_indexer,
1935 )
1936 else:
1937 val = np.nan
1939 self._setitem_single_column(loc, val, pi)
1941 elif not unique_cols:
1942 raise ValueError("Setting with non-unique columns is not allowed.")
1944 else:
1945 for loc in ilocs:
1946 item = self.obj.columns[loc]
1947 if item in value:
1948 sub_indexer[1] = item
1949 val = self._align_series(
1950 tuple(sub_indexer), value[item], multiindex_indexer
1951 )
1952 else:
1953 val = np.nan
1955 self._setitem_single_column(loc, val, pi)
1957 def _setitem_single_column(self, loc: int, value, plane_indexer):
1958 """
1960 Parameters
1961 ----------
1962 loc : int
1963 Indexer for column position
1964 plane_indexer : int, slice, listlike[int]
1965 The indexer we use for setitem along axis=0.
1966 """
1967 pi = plane_indexer
1969 orig_values = self.obj._get_column_array(loc)
1971 # perform the equivalent of a setitem on the info axis
1972 # as we have a null slice or a slice with full bounds
1973 # which means essentially reassign to the columns of a
1974 # multi-dim object
1975 # GH#6149 (null slice), GH#10408 (full bounds)
1976 if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)):
1977 pass
1978 elif (
1979 is_array_like(value)
1980 and len(value.shape) > 0
1981 and self.obj.shape[0] == value.shape[0]
1982 and not is_empty_indexer(pi)
1983 ):
1984 if is_list_like(pi) and not is_bool_dtype(pi):
1985 value = value[np.argsort(pi)]
1986 else:
1987 # in case of slice
1988 value = value[pi]
1989 else:
1990 # set value into the column (first attempting to operate inplace, then
1991 # falling back to casting if necessary)
1992 self.obj._mgr.column_setitem(loc, plane_indexer, value)
1993 self.obj._clear_item_cache()
1994 return
1996 self.obj._iset_item(loc, value)
1998 # We will not operate in-place, but will attempt to in the future.
1999 # To determine whether we need to issue a FutureWarning, see if the
2000 # setting in-place would work, i.e. behavior will change.
2002 new_values = self.obj._get_column_array(loc)
2004 if can_hold_element(orig_values, new_values) and not len(new_values) == 0:
2005 # Don't issue the warning yet, as we can still trim a few cases where
2006 # behavior will not change.
2008 if (
2009 isinstance(new_values, np.ndarray)
2010 and isinstance(orig_values, np.ndarray)
2011 and (
2012 np.shares_memory(new_values, orig_values)
2013 or new_values.shape != orig_values.shape
2014 )
2015 ):
2016 # TODO: get something like tm.shares_memory working?
2017 # The values were set inplace after all, no need to warn,
2018 # e.g. test_rename_nocopy
2019 # In case of enlarging we can not set inplace, so need to
2020 # warn either
2021 pass
2022 else:
2023 warnings.warn(
2024 "In a future version, `df.iloc[:, i] = newvals` will attempt "
2025 "to set the values inplace instead of always setting a new "
2026 "array. To retain the old behavior, use either "
2027 "`df[df.columns[i]] = newvals` or, if columns are non-unique, "
2028 "`df.isetitem(i, newvals)`",
2029 FutureWarning,
2030 stacklevel=find_stack_level(),
2031 )
2032 # TODO: how to get future behavior?
2033 # TODO: what if we got here indirectly via loc?
2034 return
2036 def _setitem_single_block(self, indexer, value, name: str):
2037 """
2038 _setitem_with_indexer for the case when we have a single Block.
2039 """
2040 from pandas import Series
2042 info_axis = self.obj._info_axis_number
2043 item_labels = self.obj._get_axis(info_axis)
2044 if isinstance(indexer, tuple):
2046 # if we are setting on the info axis ONLY
2047 # set using those methods to avoid block-splitting
2048 # logic here
2049 if (
2050 self.ndim == len(indexer) == 2
2051 and is_integer(indexer[1])
2052 and com.is_null_slice(indexer[0])
2053 ):
2054 col = item_labels[indexer[info_axis]]
2055 if len(item_labels.get_indexer_for([col])) == 1:
2056 # e.g. test_loc_setitem_empty_append_expands_rows
2057 loc = item_labels.get_loc(col)
2058 # Go through _setitem_single_column to get
2059 # FutureWarning if relevant.
2060 self._setitem_single_column(loc, value, indexer[0])
2061 return
2063 indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align
2065 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
2066 # TODO(EA): ExtensionBlock.setitem this causes issues with
2067 # setting for extensionarrays that store dicts. Need to decide
2068 # if it's worth supporting that.
2069 value = self._align_series(indexer, Series(value))
2071 elif isinstance(value, ABCDataFrame) and name != "iloc":
2072 value = self._align_frame(indexer, value)
2074 # check for chained assignment
2075 self.obj._check_is_chained_assignment_possible()
2077 # actually do the set
2078 self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
2079 self.obj._maybe_update_cacher(clear=True, inplace=True)
2081 def _setitem_with_indexer_missing(self, indexer, value):
2082 """
2083 Insert new row(s) or column(s) into the Series or DataFrame.
2084 """
2085 from pandas import Series
2087 # reindex the axis to the new value
2088 # and set inplace
2089 if self.ndim == 1:
2090 index = self.obj.index
2091 new_index = index.insert(len(index), indexer)
2093 # we have a coerced indexer, e.g. a float
2094 # that matches in an Int64Index, so
2095 # we will not create a duplicate index, rather
2096 # index to that element
2097 # e.g. 0.0 -> 0
2098 # GH#12246
2099 if index.is_unique:
2100 # pass new_index[-1:] instead if [new_index[-1]]
2101 # so that we retain dtype
2102 new_indexer = index.get_indexer(new_index[-1:])
2103 if (new_indexer != -1).any():
2104 # We get only here with loc, so can hard code
2105 return self._setitem_with_indexer(new_indexer, value, "loc")
2107 # this preserves dtype of the value and of the object
2108 if not is_scalar(value):
2109 new_dtype = None
2111 elif is_valid_na_for_dtype(value, self.obj.dtype):
2112 if not is_object_dtype(self.obj.dtype):
2113 # Every NA value is suitable for object, no conversion needed
2114 value = na_value_for_dtype(self.obj.dtype, compat=False)
2116 new_dtype = maybe_promote(self.obj.dtype, value)[0]
2118 elif isna(value):
2119 new_dtype = None
2120 elif not self.obj.empty and not is_object_dtype(self.obj.dtype):
2121 # We should not cast, if we have object dtype because we can
2122 # set timedeltas into object series
2123 curr_dtype = self.obj.dtype
2124 curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
2125 new_dtype = maybe_promote(curr_dtype, value)[0]
2126 else:
2127 new_dtype = None
2129 new_values = Series([value], dtype=new_dtype)._values
2131 if len(self.obj._values):
2132 # GH#22717 handle casting compatibility that np.concatenate
2133 # does incorrectly
2134 new_values = concat_compat([self.obj._values, new_values])
2135 self.obj._mgr = self.obj._constructor(
2136 new_values, index=new_index, name=self.obj.name
2137 )._mgr
2138 self.obj._maybe_update_cacher(clear=True)
2140 elif self.ndim == 2:
2142 if not len(self.obj.columns):
2143 # no columns and scalar
2144 raise ValueError("cannot set a frame with no defined columns")
2146 has_dtype = hasattr(value, "dtype")
2147 if isinstance(value, ABCSeries):
2148 # append a Series
2149 value = value.reindex(index=self.obj.columns, copy=True)
2150 value.name = indexer
2151 elif isinstance(value, dict):
2152 value = Series(
2153 value, index=self.obj.columns, name=indexer, dtype=object
2154 )
2155 else:
2156 # a list-list
2157 if is_list_like_indexer(value):
2158 # must have conforming columns
2159 if len(value) != len(self.obj.columns):
2160 raise ValueError("cannot set a row with mismatched columns")
2162 value = Series(value, index=self.obj.columns, name=indexer)
2164 if not len(self.obj):
2165 # We will ignore the existing dtypes instead of using
2166 # internals.concat logic
2167 df = value.to_frame().T
2169 idx = self.obj.index
2170 if isinstance(idx, MultiIndex):
2171 name = idx.names
2172 else:
2173 name = idx.name
2175 df.index = Index([indexer], name=name)
2176 if not has_dtype:
2177 # i.e. if we already had a Series or ndarray, keep that
2178 # dtype. But if we had a list or dict, then do inference
2179 df = df.infer_objects()
2180 self.obj._mgr = df._mgr
2181 else:
2182 self.obj._mgr = self.obj._append(value)._mgr
2183 self.obj._maybe_update_cacher(clear=True)
2185 def _ensure_iterable_column_indexer(self, column_indexer):
2186 """
2187 Ensure that our column indexer is something that can be iterated over.
2188 """
2189 ilocs: Sequence[int] | np.ndarray
2190 if is_integer(column_indexer):
2191 ilocs = [column_indexer]
2192 elif isinstance(column_indexer, slice):
2193 ilocs = np.arange(len(self.obj.columns))[column_indexer]
2194 elif isinstance(column_indexer, np.ndarray) and is_bool_dtype(
2195 column_indexer.dtype
2196 ):
2197 ilocs = np.arange(len(column_indexer))[column_indexer]
2198 else:
2199 ilocs = column_indexer
2200 return ilocs
2202 def _align_series(self, indexer, ser: Series, multiindex_indexer: bool = False):
2203 """
2204 Parameters
2205 ----------
2206 indexer : tuple, slice, scalar
2207 Indexer used to get the locations that will be set to `ser`.
2208 ser : pd.Series
2209 Values to assign to the locations specified by `indexer`.
2210 multiindex_indexer : bool, optional
2211 Defaults to False. Should be set to True if `indexer` was from
2212 a `pd.MultiIndex`, to avoid unnecessary broadcasting.
2214 Returns
2215 -------
2216 `np.array` of `ser` broadcast to the appropriate shape for assignment
2217 to the locations selected by `indexer`
2218 """
2219 if isinstance(indexer, (slice, np.ndarray, list, Index)):
2220 indexer = (indexer,)
2222 if isinstance(indexer, tuple):
2224 # flatten np.ndarray indexers
2225 def ravel(i):
2226 return i.ravel() if isinstance(i, np.ndarray) else i
2228 indexer = tuple(map(ravel, indexer))
2230 aligners = [not com.is_null_slice(idx) for idx in indexer]
2231 sum_aligners = sum(aligners)
2232 single_aligner = sum_aligners == 1
2233 is_frame = self.ndim == 2
2234 obj = self.obj
2236 # are we a single alignable value on a non-primary
2237 # dim (e.g. panel: 1,2, or frame: 0) ?
2238 # hence need to align to a single axis dimension
2239 # rather that find all valid dims
2241 # frame
2242 if is_frame:
2243 single_aligner = single_aligner and aligners[0]
2245 # we have a frame, with multiple indexers on both axes; and a
2246 # series, so need to broadcast (see GH5206)
2247 if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer):
2248 # TODO: This is hacky, align Series and DataFrame behavior GH#45778
2249 if obj.ndim == 2 and is_empty_indexer(indexer[0]):
2250 return ser._values.copy()
2251 ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
2253 # single indexer
2254 if len(indexer) > 1 and not multiindex_indexer:
2255 len_indexer = len(indexer[1])
2256 ser_values = (
2257 np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T
2258 )
2260 return ser_values
2262 for i, idx in enumerate(indexer):
2263 ax = obj.axes[i]
2265 # multiple aligners (or null slices)
2266 if is_sequence(idx) or isinstance(idx, slice):
2267 if single_aligner and com.is_null_slice(idx):
2268 continue
2269 new_ix = ax[idx]
2270 if not is_list_like_indexer(new_ix):
2271 new_ix = Index([new_ix])
2272 else:
2273 new_ix = Index(new_ix)
2274 if ser.index.equals(new_ix) or not len(new_ix):
2275 return ser._values.copy()
2277 return ser.reindex(new_ix)._values
2279 # 2 dims
2280 elif single_aligner:
2282 # reindex along index
2283 ax = self.obj.axes[1]
2284 if ser.index.equals(ax) or not len(ax):
2285 return ser._values.copy()
2286 return ser.reindex(ax)._values
2288 elif is_integer(indexer) and self.ndim == 1:
2289 if is_object_dtype(self.obj):
2290 return ser
2291 ax = self.obj._get_axis(0)
2293 if ser.index.equals(ax):
2294 return ser._values.copy()
2296 return ser.reindex(ax)._values[indexer]
2298 elif is_integer(indexer):
2299 ax = self.obj._get_axis(1)
2301 if ser.index.equals(ax):
2302 return ser._values.copy()
2304 return ser.reindex(ax)._values
2306 raise ValueError("Incompatible indexer with Series")
2308 def _align_frame(self, indexer, df: DataFrame):
2309 is_frame = self.ndim == 2
2311 if isinstance(indexer, tuple):
2313 idx, cols = None, None
2314 sindexers = []
2315 for i, ix in enumerate(indexer):
2316 ax = self.obj.axes[i]
2317 if is_sequence(ix) or isinstance(ix, slice):
2318 if isinstance(ix, np.ndarray):
2319 ix = ix.ravel()
2320 if idx is None:
2321 idx = ax[ix]
2322 elif cols is None:
2323 cols = ax[ix]
2324 else:
2325 break
2326 else:
2327 sindexers.append(i)
2329 if idx is not None and cols is not None:
2331 if df.index.equals(idx) and df.columns.equals(cols):
2332 val = df.copy()._values
2333 else:
2334 val = df.reindex(idx, columns=cols)._values
2335 return val
2337 elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:
2338 ax = self.obj.index[indexer]
2339 if df.index.equals(ax):
2340 val = df.copy()._values
2341 else:
2343 # we have a multi-index and are trying to align
2344 # with a particular, level GH3738
2345 if (
2346 isinstance(ax, MultiIndex)
2347 and isinstance(df.index, MultiIndex)
2348 and ax.nlevels != df.index.nlevels
2349 ):
2350 raise TypeError(
2351 "cannot align on a multi-index with out "
2352 "specifying the join levels"
2353 )
2355 val = df.reindex(index=ax)._values
2356 return val
2358 raise ValueError("Incompatible indexer with DataFrame")
2361class _ScalarAccessIndexer(NDFrameIndexerBase):
2362 """
2363 Access scalars quickly.
2364 """
2366 # sub-classes need to set _takeable
2367 _takeable: bool
2369 def _convert_key(self, key):
2370 raise AbstractMethodError(self)
2372 def __getitem__(self, key):
2373 if not isinstance(key, tuple):
2375 # we could have a convertible item here (e.g. Timestamp)
2376 if not is_list_like_indexer(key):
2377 key = (key,)
2378 else:
2379 raise ValueError("Invalid call for scalar access (getting)!")
2381 key = self._convert_key(key)
2382 return self.obj._get_value(*key, takeable=self._takeable)
2384 def __setitem__(self, key, value) -> None:
2385 if isinstance(key, tuple):
2386 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
2387 else:
2388 # scalar callable may return tuple
2389 key = com.apply_if_callable(key, self.obj)
2391 if not isinstance(key, tuple):
2392 key = _tuplify(self.ndim, key)
2393 key = list(self._convert_key(key))
2394 if len(key) != self.ndim:
2395 raise ValueError("Not enough indexers for scalar access (setting)!")
2397 self.obj._set_value(*key, value=value, takeable=self._takeable)
2400@doc(IndexingMixin.at)
2401class _AtIndexer(_ScalarAccessIndexer):
2402 _takeable = False
2404 def _convert_key(self, key):
2405 """
2406 Require they keys to be the same type as the index. (so we don't
2407 fallback)
2408 """
2409 # GH 26989
2410 # For series, unpacking key needs to result in the label.
2411 # This is already the case for len(key) == 1; e.g. (1,)
2412 if self.ndim == 1 and len(key) > 1:
2413 key = (key,)
2415 return key
2417 @property
2418 def _axes_are_unique(self) -> bool:
2419 # Only relevant for self.ndim == 2
2420 assert self.ndim == 2
2421 return self.obj.index.is_unique and self.obj.columns.is_unique
2423 def __getitem__(self, key):
2425 if self.ndim == 2 and not self._axes_are_unique:
2426 # GH#33041 fall back to .loc
2427 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
2428 raise ValueError("Invalid call for scalar access (getting)!")
2429 return self.obj.loc[key]
2431 return super().__getitem__(key)
2433 def __setitem__(self, key, value):
2434 if self.ndim == 2 and not self._axes_are_unique:
2435 # GH#33041 fall back to .loc
2436 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
2437 raise ValueError("Invalid call for scalar access (setting)!")
2439 self.obj.loc[key] = value
2440 return
2442 return super().__setitem__(key, value)
2445@doc(IndexingMixin.iat)
2446class _iAtIndexer(_ScalarAccessIndexer):
2447 _takeable = True
2449 def _convert_key(self, key):
2450 """
2451 Require integer args. (and convert to label arguments)
2452 """
2453 for i in key:
2454 if not is_integer(i):
2455 raise ValueError("iAt based indexing can only have integer indexers")
2456 return key
2459def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]:
2460 """
2461 Given an indexer for the first dimension, create an equivalent tuple
2462 for indexing over all dimensions.
2464 Parameters
2465 ----------
2466 ndim : int
2467 loc : object
2469 Returns
2470 -------
2471 tuple
2472 """
2473 _tup: list[Hashable | slice]
2474 _tup = [slice(None, None) for _ in range(ndim)]
2475 _tup[0] = loc
2476 return tuple(_tup)
2479def _tupleize_axis_indexer(ndim: int, axis: int, key) -> tuple:
2480 """
2481 If we have an axis, adapt the given key to be axis-independent.
2482 """
2483 new_key = [slice(None)] * ndim
2484 new_key[axis] = key
2485 return tuple(new_key)
2488def convert_to_index_sliceable(obj: DataFrame, key):
2489 """
2490 If we are index sliceable, then return my slicer, otherwise return None.
2491 """
2492 idx = obj.index
2493 if isinstance(key, slice):
2494 return idx._convert_slice_indexer(key, kind="getitem", is_frame=True)
2496 elif isinstance(key, str):
2498 # we are an actual column
2499 if key in obj.columns:
2500 return None
2502 # We might have a datetimelike string that we can translate to a
2503 # slice here via partial string indexing
2504 if idx._supports_partial_string_indexing:
2505 try:
2506 res = idx._get_string_slice(str(key))
2507 warnings.warn(
2508 "Indexing a DataFrame with a datetimelike index using a single "
2509 "string to slice the rows, like `frame[string]`, is deprecated "
2510 "and will be removed in a future version. Use `frame.loc[string]` "
2511 "instead.",
2512 FutureWarning,
2513 stacklevel=find_stack_level(),
2514 )
2515 return res
2516 except (KeyError, ValueError, NotImplementedError):
2517 return None
2519 return None
2522def check_bool_indexer(index: Index, key) -> np.ndarray:
2523 """
2524 Check if key is a valid boolean indexer for an object with such index and
2525 perform reindexing or conversion if needed.
2527 This function assumes that is_bool_indexer(key) == True.
2529 Parameters
2530 ----------
2531 index : Index
2532 Index of the object on which the indexing is done.
2533 key : list-like
2534 Boolean indexer to check.
2536 Returns
2537 -------
2538 np.array
2539 Resulting key.
2541 Raises
2542 ------
2543 IndexError
2544 If the key does not have the same length as index.
2545 IndexingError
2546 If the index of the key is unalignable to index.
2547 """
2548 result = key
2549 if isinstance(key, ABCSeries) and not key.index.equals(index):
2550 indexer = result.index.get_indexer_for(index)
2551 if -1 in indexer:
2552 raise IndexingError(
2553 "Unalignable boolean Series provided as "
2554 "indexer (index of the boolean Series and of "
2555 "the indexed object do not match)."
2556 )
2558 result = result.take(indexer)
2560 # fall through for boolean
2561 if not is_extension_array_dtype(result.dtype):
2562 return result.astype(bool)._values
2564 if is_object_dtype(key):
2565 # key might be object-dtype bool, check_array_indexer needs bool array
2566 result = np.asarray(result, dtype=bool)
2567 elif not is_array_like(result):
2568 # GH 33924
2569 # key may contain nan elements, check_array_indexer needs bool array
2570 result = pd_array(result, dtype=bool)
2571 return check_array_indexer(index, result)
2574def convert_missing_indexer(indexer):
2575 """
2576 Reverse convert a missing indexer, which is a dict
2577 return the scalar indexer and a boolean indicating if we converted
2578 """
2579 if isinstance(indexer, dict):
2581 # a missing key (but not a tuple indexer)
2582 indexer = indexer["key"]
2584 if isinstance(indexer, bool):
2585 raise KeyError("cannot use a single bool to index into setitem")
2586 return indexer, True
2588 return indexer, False
2591def convert_from_missing_indexer_tuple(indexer, axes):
2592 """
2593 Create a filtered indexer that doesn't have any missing indexers.
2594 """
2596 def get_indexer(_i, _idx):
2597 return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx
2599 return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))
2602def maybe_convert_ix(*args):
2603 """
2604 We likely want to take the cross-product.
2605 """
2606 for arg in args:
2607 if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):
2608 return args
2609 return np.ix_(*args)
2612def is_nested_tuple(tup, labels) -> bool:
2613 """
2614 Returns
2615 -------
2616 bool
2617 """
2618 # check for a compatible nested tuple and multiindexes among the axes
2619 if not isinstance(tup, tuple):
2620 return False
2622 for k in tup:
2623 if is_list_like(k) or isinstance(k, slice):
2624 return isinstance(labels, MultiIndex)
2626 return False
2629def is_label_like(key) -> bool:
2630 """
2631 Returns
2632 -------
2633 bool
2634 """
2635 # select a label or row
2636 return (
2637 not isinstance(key, slice)
2638 and not is_list_like_indexer(key)
2639 and key is not Ellipsis
2640 )
2643def need_slice(obj: slice) -> bool:
2644 """
2645 Returns
2646 -------
2647 bool
2648 """
2649 return (
2650 obj.start is not None
2651 or obj.stop is not None
2652 or (obj.step is not None and obj.step != 1)
2653 )
2656def check_deprecated_indexers(key) -> None:
2657 """Checks if the key is a deprecated indexer."""
2658 if (
2659 isinstance(key, set)
2660 or isinstance(key, tuple)
2661 and any(isinstance(x, set) for x in key)
2662 ):
2663 warnings.warn(
2664 "Passing a set as an indexer is deprecated and will raise in "
2665 "a future version. Use a list instead.",
2666 FutureWarning,
2667 stacklevel=find_stack_level(),
2668 )
2669 if (
2670 isinstance(key, dict)
2671 or isinstance(key, tuple)
2672 and any(isinstance(x, dict) for x in key)
2673 ):
2674 warnings.warn(
2675 "Passing a dict as an indexer is deprecated and will raise in "
2676 "a future version. Use a list instead.",
2677 FutureWarning,
2678 stacklevel=find_stack_level(),
2679 )