Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/array_algos/take.py: 12%
193 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3import functools
4from typing import (
5 TYPE_CHECKING,
6 cast,
7 overload,
8)
10import numpy as np
12from pandas._libs import (
13 algos as libalgos,
14 lib,
15)
16from pandas._typing import (
17 ArrayLike,
18 npt,
19)
21from pandas.core.dtypes.cast import maybe_promote
22from pandas.core.dtypes.common import (
23 ensure_platform_int,
24 is_1d_only_ea_obj,
25)
26from pandas.core.dtypes.missing import na_value_for_dtype
28from pandas.core.construction import ensure_wrapped_if_datetimelike
30if TYPE_CHECKING: 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true
31 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
32 from pandas.core.arrays.base import ExtensionArray
35@overload
36def take_nd(
37 arr: np.ndarray,
38 indexer,
39 axis: int = ...,
40 fill_value=...,
41 allow_fill: bool = ...,
42) -> np.ndarray:
43 ...
46@overload
47def take_nd(
48 arr: ExtensionArray,
49 indexer,
50 axis: int = ...,
51 fill_value=...,
52 allow_fill: bool = ...,
53) -> ArrayLike:
54 ...
57def take_nd(
58 arr: ArrayLike,
59 indexer,
60 axis: int = 0,
61 fill_value=lib.no_default,
62 allow_fill: bool = True,
63) -> ArrayLike:
65 """
66 Specialized Cython take which sets NaN values in one pass
68 This dispatches to ``take`` defined on ExtensionArrays. It does not
69 currently dispatch to ``SparseArray.take`` for sparse ``arr``.
71 Note: this function assumes that the indexer is a valid(ated) indexer with
72 no out of bound indices.
74 Parameters
75 ----------
76 arr : np.ndarray or ExtensionArray
77 Input array.
78 indexer : ndarray
79 1-D array of indices to take, subarrays corresponding to -1 value
80 indices are filed with fill_value
81 axis : int, default 0
82 Axis to take from
83 fill_value : any, default np.nan
84 Fill value to replace -1 values with
85 allow_fill : bool, default True
86 If False, indexer is assumed to contain no -1 values so no filling
87 will be done. This short-circuits computation of a mask. Result is
88 undefined if allow_fill == False and -1 is present in indexer.
90 Returns
91 -------
92 subarray : np.ndarray or ExtensionArray
93 May be the same type as the input, or cast to an ndarray.
94 """
95 if fill_value is lib.no_default:
96 fill_value = na_value_for_dtype(arr.dtype, compat=False)
97 elif isinstance(arr.dtype, np.dtype) and arr.dtype.kind in "mM":
98 dtype, fill_value = maybe_promote(arr.dtype, fill_value)
99 if arr.dtype != dtype:
100 # EA.take is strict about returning a new object of the same type
101 # so for that case cast upfront
102 arr = arr.astype(dtype)
104 if not isinstance(arr, np.ndarray):
105 # i.e. ExtensionArray,
106 # includes for EA to catch DatetimeArray, TimedeltaArray
107 if not is_1d_only_ea_obj(arr):
108 # i.e. DatetimeArray, TimedeltaArray
109 arr = cast("NDArrayBackedExtensionArray", arr)
110 return arr.take(
111 indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
112 )
114 return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
116 arr = np.asarray(arr)
117 return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
120def _take_nd_ndarray(
121 arr: np.ndarray,
122 indexer: npt.NDArray[np.intp] | None,
123 axis: int,
124 fill_value,
125 allow_fill: bool,
126) -> np.ndarray:
128 if indexer is None:
129 indexer = np.arange(arr.shape[axis], dtype=np.intp)
130 dtype, fill_value = arr.dtype, arr.dtype.type()
131 else:
132 indexer = ensure_platform_int(indexer)
134 dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
135 arr, indexer, fill_value, allow_fill
136 )
138 flip_order = False
139 if arr.ndim == 2 and arr.flags.f_contiguous:
140 flip_order = True
142 if flip_order:
143 arr = arr.T
144 axis = arr.ndim - axis - 1
146 # at this point, it's guaranteed that dtype can hold both the arr values
147 # and the fill_value
148 out_shape_ = list(arr.shape)
149 out_shape_[axis] = len(indexer)
150 out_shape = tuple(out_shape_)
151 if arr.flags.f_contiguous and axis == arr.ndim - 1:
152 # minor tweak that can make an order-of-magnitude difference
153 # for dataframes initialized directly from 2-d ndarrays
154 # (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its
155 # f-contiguous transpose)
156 out = np.empty(out_shape, dtype=dtype, order="F")
157 else:
158 out = np.empty(out_shape, dtype=dtype)
160 func = _get_take_nd_function(
161 arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info
162 )
163 func(arr, indexer, out, fill_value)
165 if flip_order:
166 out = out.T
167 return out
170def take_1d(
171 arr: ArrayLike,
172 indexer: npt.NDArray[np.intp],
173 fill_value=None,
174 allow_fill: bool = True,
175 mask: npt.NDArray[np.bool_] | None = None,
176) -> ArrayLike:
177 """
178 Specialized version for 1D arrays. Differences compared to `take_nd`:
180 - Assumes input array has already been converted to numpy array / EA
181 - Assumes indexer is already guaranteed to be intp dtype ndarray
182 - Only works for 1D arrays
184 To ensure the lowest possible overhead.
186 Note: similarly to `take_nd`, this function assumes that the indexer is
187 a valid(ated) indexer with no out of bound indices.
189 Parameters
190 ----------
191 arr : np.ndarray or ExtensionArray
192 Input array.
193 indexer : ndarray
194 1-D array of indices to take (validated indices, intp dtype).
195 fill_value : any, default np.nan
196 Fill value to replace -1 values with
197 allow_fill : bool, default True
198 If False, indexer is assumed to contain no -1 values so no filling
199 will be done. This short-circuits computation of a mask. Result is
200 undefined if allow_fill == False and -1 is present in indexer.
201 mask : np.ndarray, optional, default None
202 If `allow_fill` is True, and the mask (where indexer == -1) is already
203 known, it can be passed to avoid recomputation.
204 """
205 if not isinstance(arr, np.ndarray):
206 # ExtensionArray -> dispatch to their method
207 return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
209 if not allow_fill:
210 return arr.take(indexer)
212 dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
213 arr, indexer, fill_value, True, mask
214 )
216 # at this point, it's guaranteed that dtype can hold both the arr values
217 # and the fill_value
218 out = np.empty(indexer.shape, dtype=dtype)
220 func = _get_take_nd_function(
221 arr.ndim, arr.dtype, out.dtype, axis=0, mask_info=mask_info
222 )
223 func(arr, indexer, out, fill_value)
225 return out
228def take_2d_multi(
229 arr: np.ndarray,
230 indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
231 fill_value=np.nan,
232) -> np.ndarray:
233 """
234 Specialized Cython take which sets NaN values in one pass.
235 """
236 # This is only called from one place in DataFrame._reindex_multi,
237 # so we know indexer is well-behaved.
238 assert indexer is not None
239 assert indexer[0] is not None
240 assert indexer[1] is not None
242 row_idx, col_idx = indexer
244 row_idx = ensure_platform_int(row_idx)
245 col_idx = ensure_platform_int(col_idx)
246 indexer = row_idx, col_idx
247 mask_info = None
249 # check for promotion based on types only (do this first because
250 # it's faster than computing a mask)
251 dtype, fill_value = maybe_promote(arr.dtype, fill_value)
252 if dtype != arr.dtype:
253 # check if promotion is actually required based on indexer
254 row_mask = row_idx == -1
255 col_mask = col_idx == -1
256 row_needs = row_mask.any()
257 col_needs = col_mask.any()
258 mask_info = (row_mask, col_mask), (row_needs, col_needs)
260 if not (row_needs or col_needs):
261 # if not, then depromote, set fill_value to dummy
262 # (it won't be used but we don't want the cython code
263 # to crash when trying to cast it to dtype)
264 dtype, fill_value = arr.dtype, arr.dtype.type()
266 # at this point, it's guaranteed that dtype can hold both the arr values
267 # and the fill_value
268 out_shape = len(row_idx), len(col_idx)
269 out = np.empty(out_shape, dtype=dtype)
271 func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None)
272 if func is None and arr.dtype != out.dtype:
273 func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None)
274 if func is not None:
275 func = _convert_wrapper(func, out.dtype)
277 if func is not None:
278 func(arr, indexer, out=out, fill_value=fill_value)
279 else:
280 # test_reindex_multi
281 _take_2d_multi_object(
282 arr, indexer, out, fill_value=fill_value, mask_info=mask_info
283 )
285 return out
288@functools.lru_cache(maxsize=128)
289def _get_take_nd_function_cached(
290 ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int
291):
292 """
293 Part of _get_take_nd_function below that doesn't need `mask_info` and thus
294 can be cached (mask_info potentially contains a numpy ndarray which is not
295 hashable and thus cannot be used as argument for cached function).
296 """
297 tup = (arr_dtype.name, out_dtype.name)
298 if ndim == 1:
299 func = _take_1d_dict.get(tup, None)
300 elif ndim == 2:
301 if axis == 0:
302 func = _take_2d_axis0_dict.get(tup, None)
303 else:
304 func = _take_2d_axis1_dict.get(tup, None)
305 if func is not None:
306 return func
308 # We get here with string, uint, float16, and complex dtypes that could
309 # potentially be handled in algos_take_helper.
310 # Also a couple with (M8[ns], object) and (m8[ns], object)
311 tup = (out_dtype.name, out_dtype.name)
312 if ndim == 1:
313 func = _take_1d_dict.get(tup, None)
314 elif ndim == 2:
315 if axis == 0:
316 func = _take_2d_axis0_dict.get(tup, None)
317 else:
318 func = _take_2d_axis1_dict.get(tup, None)
319 if func is not None:
320 func = _convert_wrapper(func, out_dtype)
321 return func
323 return None
326def _get_take_nd_function(
327 ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int = 0, mask_info=None
328):
329 """
330 Get the appropriate "take" implementation for the given dimension, axis
331 and dtypes.
332 """
333 func = None
334 if ndim <= 2:
335 # for this part we don't need `mask_info` -> use the cached algo lookup
336 func = _get_take_nd_function_cached(ndim, arr_dtype, out_dtype, axis)
338 if func is None:
340 def func(arr, indexer, out, fill_value=np.nan):
341 indexer = ensure_platform_int(indexer)
342 _take_nd_object(
343 arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info
344 )
346 return func
349def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None):
350 def wrapper(
351 arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan
352 ):
353 if arr_dtype is not None:
354 arr = arr.view(arr_dtype)
355 if out_dtype is not None:
356 out = out.view(out_dtype)
357 if fill_wrap is not None:
358 fill_value = fill_wrap(fill_value)
359 f(arr, indexer, out, fill_value=fill_value)
361 return wrapper
364def _convert_wrapper(f, conv_dtype):
365 def wrapper(
366 arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan
367 ):
368 if conv_dtype == object:
369 # GH#39755 avoid casting dt64/td64 to integers
370 arr = ensure_wrapped_if_datetimelike(arr)
371 arr = arr.astype(conv_dtype)
372 f(arr, indexer, out, fill_value=fill_value)
374 return wrapper
377_take_1d_dict = {
378 ("int8", "int8"): libalgos.take_1d_int8_int8,
379 ("int8", "int32"): libalgos.take_1d_int8_int32,
380 ("int8", "int64"): libalgos.take_1d_int8_int64,
381 ("int8", "float64"): libalgos.take_1d_int8_float64,
382 ("int16", "int16"): libalgos.take_1d_int16_int16,
383 ("int16", "int32"): libalgos.take_1d_int16_int32,
384 ("int16", "int64"): libalgos.take_1d_int16_int64,
385 ("int16", "float64"): libalgos.take_1d_int16_float64,
386 ("int32", "int32"): libalgos.take_1d_int32_int32,
387 ("int32", "int64"): libalgos.take_1d_int32_int64,
388 ("int32", "float64"): libalgos.take_1d_int32_float64,
389 ("int64", "int64"): libalgos.take_1d_int64_int64,
390 ("int64", "float64"): libalgos.take_1d_int64_float64,
391 ("float32", "float32"): libalgos.take_1d_float32_float32,
392 ("float32", "float64"): libalgos.take_1d_float32_float64,
393 ("float64", "float64"): libalgos.take_1d_float64_float64,
394 ("object", "object"): libalgos.take_1d_object_object,
395 ("bool", "bool"): _view_wrapper(libalgos.take_1d_bool_bool, np.uint8, np.uint8),
396 ("bool", "object"): _view_wrapper(libalgos.take_1d_bool_object, np.uint8, None),
397 ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
398 libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64
399 ),
400 ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
401 libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64
402 ),
403}
405_take_2d_axis0_dict = {
406 ("int8", "int8"): libalgos.take_2d_axis0_int8_int8,
407 ("int8", "int32"): libalgos.take_2d_axis0_int8_int32,
408 ("int8", "int64"): libalgos.take_2d_axis0_int8_int64,
409 ("int8", "float64"): libalgos.take_2d_axis0_int8_float64,
410 ("int16", "int16"): libalgos.take_2d_axis0_int16_int16,
411 ("int16", "int32"): libalgos.take_2d_axis0_int16_int32,
412 ("int16", "int64"): libalgos.take_2d_axis0_int16_int64,
413 ("int16", "float64"): libalgos.take_2d_axis0_int16_float64,
414 ("int32", "int32"): libalgos.take_2d_axis0_int32_int32,
415 ("int32", "int64"): libalgos.take_2d_axis0_int32_int64,
416 ("int32", "float64"): libalgos.take_2d_axis0_int32_float64,
417 ("int64", "int64"): libalgos.take_2d_axis0_int64_int64,
418 ("int64", "float64"): libalgos.take_2d_axis0_int64_float64,
419 ("float32", "float32"): libalgos.take_2d_axis0_float32_float32,
420 ("float32", "float64"): libalgos.take_2d_axis0_float32_float64,
421 ("float64", "float64"): libalgos.take_2d_axis0_float64_float64,
422 ("object", "object"): libalgos.take_2d_axis0_object_object,
423 ("bool", "bool"): _view_wrapper(
424 libalgos.take_2d_axis0_bool_bool, np.uint8, np.uint8
425 ),
426 ("bool", "object"): _view_wrapper(
427 libalgos.take_2d_axis0_bool_object, np.uint8, None
428 ),
429 ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
430 libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64
431 ),
432 ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
433 libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64
434 ),
435}
437_take_2d_axis1_dict = {
438 ("int8", "int8"): libalgos.take_2d_axis1_int8_int8,
439 ("int8", "int32"): libalgos.take_2d_axis1_int8_int32,
440 ("int8", "int64"): libalgos.take_2d_axis1_int8_int64,
441 ("int8", "float64"): libalgos.take_2d_axis1_int8_float64,
442 ("int16", "int16"): libalgos.take_2d_axis1_int16_int16,
443 ("int16", "int32"): libalgos.take_2d_axis1_int16_int32,
444 ("int16", "int64"): libalgos.take_2d_axis1_int16_int64,
445 ("int16", "float64"): libalgos.take_2d_axis1_int16_float64,
446 ("int32", "int32"): libalgos.take_2d_axis1_int32_int32,
447 ("int32", "int64"): libalgos.take_2d_axis1_int32_int64,
448 ("int32", "float64"): libalgos.take_2d_axis1_int32_float64,
449 ("int64", "int64"): libalgos.take_2d_axis1_int64_int64,
450 ("int64", "float64"): libalgos.take_2d_axis1_int64_float64,
451 ("float32", "float32"): libalgos.take_2d_axis1_float32_float32,
452 ("float32", "float64"): libalgos.take_2d_axis1_float32_float64,
453 ("float64", "float64"): libalgos.take_2d_axis1_float64_float64,
454 ("object", "object"): libalgos.take_2d_axis1_object_object,
455 ("bool", "bool"): _view_wrapper(
456 libalgos.take_2d_axis1_bool_bool, np.uint8, np.uint8
457 ),
458 ("bool", "object"): _view_wrapper(
459 libalgos.take_2d_axis1_bool_object, np.uint8, None
460 ),
461 ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
462 libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64
463 ),
464 ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
465 libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64
466 ),
467}
469_take_2d_multi_dict = {
470 ("int8", "int8"): libalgos.take_2d_multi_int8_int8,
471 ("int8", "int32"): libalgos.take_2d_multi_int8_int32,
472 ("int8", "int64"): libalgos.take_2d_multi_int8_int64,
473 ("int8", "float64"): libalgos.take_2d_multi_int8_float64,
474 ("int16", "int16"): libalgos.take_2d_multi_int16_int16,
475 ("int16", "int32"): libalgos.take_2d_multi_int16_int32,
476 ("int16", "int64"): libalgos.take_2d_multi_int16_int64,
477 ("int16", "float64"): libalgos.take_2d_multi_int16_float64,
478 ("int32", "int32"): libalgos.take_2d_multi_int32_int32,
479 ("int32", "int64"): libalgos.take_2d_multi_int32_int64,
480 ("int32", "float64"): libalgos.take_2d_multi_int32_float64,
481 ("int64", "int64"): libalgos.take_2d_multi_int64_int64,
482 ("int64", "float64"): libalgos.take_2d_multi_int64_float64,
483 ("float32", "float32"): libalgos.take_2d_multi_float32_float32,
484 ("float32", "float64"): libalgos.take_2d_multi_float32_float64,
485 ("float64", "float64"): libalgos.take_2d_multi_float64_float64,
486 ("object", "object"): libalgos.take_2d_multi_object_object,
487 ("bool", "bool"): _view_wrapper(
488 libalgos.take_2d_multi_bool_bool, np.uint8, np.uint8
489 ),
490 ("bool", "object"): _view_wrapper(
491 libalgos.take_2d_multi_bool_object, np.uint8, None
492 ),
493 ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
494 libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64
495 ),
496 ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
497 libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64
498 ),
499}
502def _take_nd_object(
503 arr: np.ndarray,
504 indexer: npt.NDArray[np.intp],
505 out: np.ndarray,
506 axis: int,
507 fill_value,
508 mask_info,
509):
510 if mask_info is not None:
511 mask, needs_masking = mask_info
512 else:
513 mask = indexer == -1
514 needs_masking = mask.any()
515 if arr.dtype != out.dtype:
516 arr = arr.astype(out.dtype)
517 if arr.shape[axis] > 0:
518 arr.take(indexer, axis=axis, out=out)
519 if needs_masking:
520 outindexer = [slice(None)] * arr.ndim
521 outindexer[axis] = mask
522 out[tuple(outindexer)] = fill_value
525def _take_2d_multi_object(
526 arr: np.ndarray,
527 indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
528 out: np.ndarray,
529 fill_value,
530 mask_info,
531) -> None:
532 # this is not ideal, performance-wise, but it's better than raising
533 # an exception (best to optimize in Cython to avoid getting here)
534 row_idx, col_idx = indexer # both np.intp
535 if mask_info is not None:
536 (row_mask, col_mask), (row_needs, col_needs) = mask_info
537 else:
538 row_mask = row_idx == -1
539 col_mask = col_idx == -1
540 row_needs = row_mask.any()
541 col_needs = col_mask.any()
542 if fill_value is not None:
543 if row_needs:
544 out[row_mask, :] = fill_value
545 if col_needs:
546 out[:, col_mask] = fill_value
547 for i in range(len(row_idx)):
548 u_ = row_idx[i]
549 for j in range(len(col_idx)):
550 v = col_idx[j]
551 out[i, j] = arr[u_, v]
554def _take_preprocess_indexer_and_fill_value(
555 arr: np.ndarray,
556 indexer: npt.NDArray[np.intp],
557 fill_value,
558 allow_fill: bool,
559 mask: npt.NDArray[np.bool_] | None = None,
560):
561 mask_info: tuple[np.ndarray | None, bool] | None = None
563 if not allow_fill:
564 dtype, fill_value = arr.dtype, arr.dtype.type()
565 mask_info = None, False
566 else:
567 # check for promotion based on types only (do this first because
568 # it's faster than computing a mask)
569 dtype, fill_value = maybe_promote(arr.dtype, fill_value)
570 if dtype != arr.dtype:
571 # check if promotion is actually required based on indexer
572 if mask is not None:
573 needs_masking = True
574 else:
575 mask = indexer == -1
576 needs_masking = bool(mask.any())
577 mask_info = mask, needs_masking
578 if not needs_masking:
579 # if not, then depromote, set fill_value to dummy
580 # (it won't be used but we don't want the cython code
581 # to crash when trying to cast it to dtype)
582 dtype, fill_value = arr.dtype, arr.dtype.type()
584 return dtype, fill_value, mask_info