Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexers/utils.py: 9%
150 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Low-dependency indexing utilities.
3"""
4from __future__ import annotations
6from typing import (
7 TYPE_CHECKING,
8 Any,
9)
10import warnings
12import numpy as np
14from pandas._typing import AnyArrayLike
15from pandas.util._exceptions import find_stack_level
17from pandas.core.dtypes.common import (
18 is_array_like,
19 is_bool_dtype,
20 is_extension_array_dtype,
21 is_integer,
22 is_integer_dtype,
23 is_list_like,
24)
25from pandas.core.dtypes.generic import (
26 ABCIndex,
27 ABCSeries,
28)
30if TYPE_CHECKING: 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true
31 from pandas.core.frame import DataFrame
32 from pandas.core.indexes.base import Index
34# -----------------------------------------------------------
35# Indexer Identification
38def is_valid_positional_slice(slc: slice) -> bool:
39 """
40 Check if a slice object can be interpreted as a positional indexer.
42 Parameters
43 ----------
44 slc : slice
46 Returns
47 -------
48 bool
50 Notes
51 -----
52 A valid positional slice may also be interpreted as a label-based slice
53 depending on the index being sliced.
54 """
56 def is_int_or_none(val):
57 return val is None or is_integer(val)
59 return (
60 is_int_or_none(slc.start)
61 and is_int_or_none(slc.stop)
62 and is_int_or_none(slc.step)
63 )
66def is_list_like_indexer(key) -> bool:
67 """
68 Check if we have a list-like indexer that is *not* a NamedTuple.
70 Parameters
71 ----------
72 key : object
74 Returns
75 -------
76 bool
77 """
78 # allow a list_like, but exclude NamedTuples which can be indexers
79 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)
82def is_scalar_indexer(indexer, ndim: int) -> bool:
83 """
84 Return True if we are all scalar indexers.
86 Parameters
87 ----------
88 indexer : object
89 ndim : int
90 Number of dimensions in the object being indexed.
92 Returns
93 -------
94 bool
95 """
96 if ndim == 1 and is_integer(indexer):
97 # GH37748: allow indexer to be an integer for Series
98 return True
99 if isinstance(indexer, tuple) and len(indexer) == ndim:
100 return all(is_integer(x) for x in indexer)
101 return False
104def is_empty_indexer(indexer) -> bool:
105 """
106 Check if we have an empty indexer.
108 Parameters
109 ----------
110 indexer : object
112 Returns
113 -------
114 bool
115 """
116 if is_list_like(indexer) and not len(indexer):
117 return True
118 if not isinstance(indexer, tuple):
119 indexer = (indexer,)
120 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
123# -----------------------------------------------------------
124# Indexer Validation
127def check_setitem_lengths(indexer, value, values) -> bool:
128 """
129 Validate that value and indexer are the same length.
131 An special-case is allowed for when the indexer is a boolean array
132 and the number of true values equals the length of ``value``. In
133 this case, no exception is raised.
135 Parameters
136 ----------
137 indexer : sequence
138 Key for the setitem.
139 value : array-like
140 Value for the setitem.
141 values : array-like
142 Values being set into.
144 Returns
145 -------
146 bool
147 Whether this is an empty listlike setting which is a no-op.
149 Raises
150 ------
151 ValueError
152 When the indexer is an ndarray or list and the lengths don't match.
153 """
154 no_op = False
156 if isinstance(indexer, (np.ndarray, list)):
157 # We can ignore other listlikes because they are either
158 # a) not necessarily 1-D indexers, e.g. tuple
159 # b) boolean indexers e.g. BoolArray
160 if is_list_like(value):
161 if len(indexer) != len(value) and values.ndim == 1:
162 # boolean with truth values == len of the value is ok too
163 if isinstance(indexer, list):
164 indexer = np.array(indexer)
165 if not (
166 isinstance(indexer, np.ndarray)
167 and indexer.dtype == np.bool_
168 and indexer.sum() == len(value)
169 ):
170 raise ValueError(
171 "cannot set using a list-like indexer "
172 "with a different length than the value"
173 )
174 if not len(indexer):
175 no_op = True
177 elif isinstance(indexer, slice):
178 if is_list_like(value):
179 if len(value) != length_of_indexer(indexer, values) and values.ndim == 1:
180 # In case of two dimensional value is used row-wise and broadcasted
181 raise ValueError(
182 "cannot set using a slice indexer with a "
183 "different length than the value"
184 )
185 if not len(value):
186 no_op = True
188 return no_op
191def validate_indices(indices: np.ndarray, n: int) -> None:
192 """
193 Perform bounds-checking for an indexer.
195 -1 is allowed for indicating missing values.
197 Parameters
198 ----------
199 indices : ndarray
200 n : int
201 Length of the array being indexed.
203 Raises
204 ------
205 ValueError
207 Examples
208 --------
209 >>> validate_indices(np.array([1, 2]), 3) # OK
211 >>> validate_indices(np.array([1, -2]), 3)
212 Traceback (most recent call last):
213 ...
214 ValueError: negative dimensions are not allowed
216 >>> validate_indices(np.array([1, 2, 3]), 3)
217 Traceback (most recent call last):
218 ...
219 IndexError: indices are out-of-bounds
221 >>> validate_indices(np.array([-1, -1]), 0) # OK
223 >>> validate_indices(np.array([0, 1]), 0)
224 Traceback (most recent call last):
225 ...
226 IndexError: indices are out-of-bounds
227 """
228 if len(indices):
229 min_idx = indices.min()
230 if min_idx < -1:
231 msg = f"'indices' contains values less than allowed ({min_idx} < -1)"
232 raise ValueError(msg)
234 max_idx = indices.max()
235 if max_idx >= n:
236 raise IndexError("indices are out-of-bounds")
239# -----------------------------------------------------------
240# Indexer Conversion
243def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray:
244 """
245 Attempt to convert indices into valid, positive indices.
247 If we have negative indices, translate to positive here.
248 If we have indices that are out-of-bounds, raise an IndexError.
250 Parameters
251 ----------
252 indices : array-like
253 Array of indices that we are to convert.
254 n : int
255 Number of elements in the array that we are indexing.
256 verify : bool, default True
257 Check that all entries are between 0 and n - 1, inclusive.
259 Returns
260 -------
261 array-like
262 An array-like of positive indices that correspond to the ones
263 that were passed in initially to this function.
265 Raises
266 ------
267 IndexError
268 One of the converted indices either exceeded the number of,
269 elements (specified by `n`), or was still negative.
270 """
271 if isinstance(indices, list):
272 indices = np.array(indices)
273 if len(indices) == 0:
274 # If `indices` is empty, np.array will return a float,
275 # and will cause indexing errors.
276 return np.empty(0, dtype=np.intp)
278 mask = indices < 0
279 if mask.any():
280 indices = indices.copy()
281 indices[mask] += n
283 if verify:
284 mask = (indices >= n) | (indices < 0)
285 if mask.any():
286 raise IndexError("indices are out-of-bounds")
287 return indices
290# -----------------------------------------------------------
291# Unsorted
294def length_of_indexer(indexer, target=None) -> int:
295 """
296 Return the expected length of target[indexer]
298 Returns
299 -------
300 int
301 """
302 if target is not None and isinstance(indexer, slice):
303 target_len = len(target)
304 start = indexer.start
305 stop = indexer.stop
306 step = indexer.step
307 if start is None:
308 start = 0
309 elif start < 0:
310 start += target_len
311 if stop is None or stop > target_len:
312 stop = target_len
313 elif stop < 0:
314 stop += target_len
315 if step is None:
316 step = 1
317 elif step < 0:
318 start, stop = stop + 1, start + 1
319 step = -step
320 return (stop - start + step - 1) // step
321 elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)):
322 if isinstance(indexer, list):
323 indexer = np.array(indexer)
325 if indexer.dtype == bool:
326 # GH#25774
327 return indexer.sum()
328 return len(indexer)
329 elif isinstance(indexer, range):
330 return (indexer.stop - indexer.start) // indexer.step
331 elif not is_list_like_indexer(indexer):
332 return 1
333 raise AssertionError("cannot find the length of the indexer")
336def deprecate_ndim_indexing(result, stacklevel: int = 3) -> None:
337 """
338 Helper function to raise the deprecation warning for multi-dimensional
339 indexing on 1D Series/Index.
341 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that
342 and keep an index, so we currently return ndarray, which is deprecated
343 (Deprecation GH#30588).
344 """
345 if np.ndim(result) > 1:
346 warnings.warn(
347 "Support for multi-dimensional indexing (e.g. `obj[:, None]`) "
348 "is deprecated and will be removed in a future "
349 "version. Convert to a numpy array before indexing instead.",
350 FutureWarning,
351 stacklevel=find_stack_level(),
352 )
355def unpack_1tuple(tup):
356 """
357 If we have a length-1 tuple/list that contains a slice, unpack to just
358 the slice.
360 Notes
361 -----
362 The list case is deprecated.
363 """
364 if len(tup) == 1 and isinstance(tup[0], slice):
365 # if we don't have a MultiIndex, we may still be able to handle
366 # a 1-tuple. see test_1tuple_without_multiindex
368 if isinstance(tup, list):
369 # GH#31299
370 warnings.warn(
371 "Indexing with a single-item list containing a "
372 "slice is deprecated and will raise in a future "
373 "version. Pass a tuple instead.",
374 FutureWarning,
375 stacklevel=find_stack_level(),
376 )
378 return tup[0]
379 return tup
382def check_key_length(columns: Index, key, value: DataFrame) -> None:
383 """
384 Checks if a key used as indexer has the same length as the columns it is
385 associated with.
387 Parameters
388 ----------
389 columns : Index The columns of the DataFrame to index.
390 key : A list-like of keys to index with.
391 value : DataFrame The value to set for the keys.
393 Raises
394 ------
395 ValueError: If the length of key is not equal to the number of columns in value
396 or if the number of columns referenced by key is not equal to number
397 of columns.
398 """
399 if columns.is_unique:
400 if len(value.columns) != len(key):
401 raise ValueError("Columns must be same length as key")
402 else:
403 # Missing keys in columns are represented as -1
404 if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns):
405 raise ValueError("Columns must be same length as key")
408def unpack_tuple_and_ellipses(item: tuple):
409 """
410 Possibly unpack arr[..., n] to arr[n]
411 """
412 if len(item) > 1:
413 # Note: we are assuming this indexing is being done on a 1D arraylike
414 if item[0] is Ellipsis:
415 item = item[1:]
416 elif item[-1] is Ellipsis:
417 item = item[:-1]
419 if len(item) > 1:
420 raise IndexError("too many indices for array.")
422 item = item[0]
423 return item
426# -----------------------------------------------------------
427# Public indexer validation
430def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:
431 """
432 Check if `indexer` is a valid array indexer for `array`.
434 For a boolean mask, `array` and `indexer` are checked to have the same
435 length. The dtype is validated, and if it is an integer or boolean
436 ExtensionArray, it is checked if there are missing values present, and
437 it is converted to the appropriate numpy array. Other dtypes will raise
438 an error.
440 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed
441 through as is.
443 .. versionadded:: 1.0.0
445 Parameters
446 ----------
447 array : array-like
448 The array that is being indexed (only used for the length).
449 indexer : array-like or list-like
450 The array-like that's used to index. List-like input that is not yet
451 a numpy array or an ExtensionArray is converted to one. Other input
452 types are passed through as is.
454 Returns
455 -------
456 numpy.ndarray
457 The validated indexer as a numpy array that can be used to index.
459 Raises
460 ------
461 IndexError
462 When the lengths don't match.
463 ValueError
464 When `indexer` cannot be converted to a numpy ndarray to index
465 (e.g. presence of missing values).
467 See Also
468 --------
469 api.types.is_bool_dtype : Check if `key` is of boolean dtype.
471 Examples
472 --------
473 When checking a boolean mask, a boolean ndarray is returned when the
474 arguments are all valid.
476 >>> mask = pd.array([True, False])
477 >>> arr = pd.array([1, 2])
478 >>> pd.api.indexers.check_array_indexer(arr, mask)
479 array([ True, False])
481 An IndexError is raised when the lengths don't match.
483 >>> mask = pd.array([True, False, True])
484 >>> pd.api.indexers.check_array_indexer(arr, mask)
485 Traceback (most recent call last):
486 ...
487 IndexError: Boolean index has wrong length: 3 instead of 2.
489 NA values in a boolean array are treated as False.
491 >>> mask = pd.array([True, pd.NA])
492 >>> pd.api.indexers.check_array_indexer(arr, mask)
493 array([ True, False])
495 A numpy boolean mask will get passed through (if the length is correct):
497 >>> mask = np.array([True, False])
498 >>> pd.api.indexers.check_array_indexer(arr, mask)
499 array([ True, False])
501 Similarly for integer indexers, an integer ndarray is returned when it is
502 a valid indexer, otherwise an error is (for integer indexers, a matching
503 length is not required):
505 >>> indexer = pd.array([0, 2], dtype="Int64")
506 >>> arr = pd.array([1, 2, 3])
507 >>> pd.api.indexers.check_array_indexer(arr, indexer)
508 array([0, 2])
510 >>> indexer = pd.array([0, pd.NA], dtype="Int64")
511 >>> pd.api.indexers.check_array_indexer(arr, indexer)
512 Traceback (most recent call last):
513 ...
514 ValueError: Cannot index with an integer indexer containing NA values
516 For non-integer/boolean dtypes, an appropriate error is raised:
518 >>> indexer = np.array([0., 2.], dtype="float64")
519 >>> pd.api.indexers.check_array_indexer(arr, indexer)
520 Traceback (most recent call last):
521 ...
522 IndexError: arrays used as indices must be of integer or boolean type
523 """
524 from pandas.core.construction import array as pd_array
526 # whatever is not an array-like is returned as-is (possible valid array
527 # indexers that are not array-like: integer, slice, Ellipsis, None)
528 # In this context, tuples are not considered as array-like, as they have
529 # a specific meaning in indexing (multi-dimensional indexing)
530 if is_list_like(indexer):
531 if isinstance(indexer, tuple):
532 return indexer
533 else:
534 return indexer
536 # convert list-likes to array
537 if not is_array_like(indexer):
538 indexer = pd_array(indexer)
539 if len(indexer) == 0:
540 # empty list is converted to float array by pd.array
541 indexer = np.array([], dtype=np.intp)
543 dtype = indexer.dtype
544 if is_bool_dtype(dtype):
545 if is_extension_array_dtype(dtype):
546 indexer = indexer.to_numpy(dtype=bool, na_value=False)
547 else:
548 indexer = np.asarray(indexer, dtype=bool)
550 # GH26658
551 if len(indexer) != len(array):
552 raise IndexError(
553 f"Boolean index has wrong length: "
554 f"{len(indexer)} instead of {len(array)}"
555 )
556 elif is_integer_dtype(dtype):
557 try:
558 indexer = np.asarray(indexer, dtype=np.intp)
559 except ValueError as err:
560 raise ValueError(
561 "Cannot index with an integer indexer containing NA values"
562 ) from err
563 else:
564 raise IndexError("arrays used as indices must be of integer or boolean type")
566 return indexer