Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/_mixins.py: 29%
204 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from functools import wraps
4from typing import (
5 TYPE_CHECKING,
6 Any,
7 Literal,
8 Sequence,
9 TypeVar,
10 cast,
11 overload,
12)
14import numpy as np
16from pandas._libs import lib
17from pandas._libs.arrays import NDArrayBacked
18from pandas._typing import (
19 ArrayLike,
20 Dtype,
21 F,
22 PositionalIndexer2D,
23 PositionalIndexerTuple,
24 ScalarIndexer,
25 SequenceIndexer,
26 Shape,
27 TakeIndexer,
28 npt,
29 type_t,
30)
31from pandas.errors import AbstractMethodError
32from pandas.util._decorators import doc
33from pandas.util._validators import (
34 validate_bool_kwarg,
35 validate_fillna_kwargs,
36 validate_insert_loc,
37)
39from pandas.core.dtypes.common import (
40 is_dtype_equal,
41 pandas_dtype,
42)
43from pandas.core.dtypes.dtypes import (
44 DatetimeTZDtype,
45 ExtensionDtype,
46 PeriodDtype,
47)
48from pandas.core.dtypes.missing import array_equivalent
50from pandas.core import missing
51from pandas.core.algorithms import (
52 take,
53 unique,
54 value_counts,
55)
56from pandas.core.array_algos.quantile import quantile_with_mask
57from pandas.core.array_algos.transforms import shift
58from pandas.core.arrays.base import ExtensionArray
59from pandas.core.construction import extract_array
60from pandas.core.indexers import check_array_indexer
61from pandas.core.sorting import nargminmax
63NDArrayBackedExtensionArrayT = TypeVar(
64 "NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray"
65)
67if TYPE_CHECKING: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true
68 from pandas._typing import (
69 NumpySorter,
70 NumpyValueArrayLike,
71 )
73 from pandas import Series
76def ravel_compat(meth: F) -> F:
77 """
78 Decorator to ravel a 2D array before passing it to a cython operation,
79 then reshape the result to our own shape.
80 """
82 @wraps(meth)
83 def method(self, *args, **kwargs):
84 if self.ndim == 1:
85 return meth(self, *args, **kwargs)
87 flags = self._ndarray.flags
88 flat = self.ravel("K")
89 result = meth(flat, *args, **kwargs)
90 order = "F" if flags.f_contiguous else "C"
91 return result.reshape(self.shape, order=order)
93 return cast(F, method)
96class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray):
97 """
98 ExtensionArray that is backed by a single NumPy ndarray.
99 """
101 _ndarray: np.ndarray
103 # scalar used to denote NA value inside our self._ndarray, e.g. -1
104 # for Categorical, iNaT for Period. Outside of object dtype,
105 # self.isna() should be exactly locations in self._ndarray with
106 # _internal_fill_value.
107 _internal_fill_value: Any
109 def _box_func(self, x):
110 """
111 Wrap numpy type in our dtype.type if necessary.
112 """
113 return x
115 def _validate_scalar(self, value):
116 # used by NDArrayBackedExtensionIndex.insert
117 raise AbstractMethodError(self)
119 # ------------------------------------------------------------------------
121 def view(self, dtype: Dtype | None = None) -> ArrayLike:
122 # We handle datetime64, datetime64tz, timedelta64, and period
123 # dtypes here. Everything else we pass through to the underlying
124 # ndarray.
125 if dtype is None or dtype is self.dtype:
126 return self._from_backing_data(self._ndarray)
128 if isinstance(dtype, type):
129 # we sometimes pass non-dtype objects, e.g np.ndarray;
130 # pass those through to the underlying ndarray
131 return self._ndarray.view(dtype)
133 dtype = pandas_dtype(dtype)
134 arr = self._ndarray
136 if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)):
137 cls = dtype.construct_array_type()
138 return cls(arr.view("i8"), dtype=dtype)
139 elif dtype == "M8[ns]":
140 from pandas.core.arrays import DatetimeArray
142 return DatetimeArray(arr.view("i8"), dtype=dtype)
143 elif dtype == "m8[ns]":
144 from pandas.core.arrays import TimedeltaArray
146 return TimedeltaArray(arr.view("i8"), dtype=dtype)
148 # error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible
149 # type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None,
150 # type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int,
151 # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
152 return arr.view(dtype=dtype) # type: ignore[arg-type]
154 def take(
155 self: NDArrayBackedExtensionArrayT,
156 indices: TakeIndexer,
157 *,
158 allow_fill: bool = False,
159 fill_value: Any = None,
160 axis: int = 0,
161 ) -> NDArrayBackedExtensionArrayT:
162 if allow_fill:
163 fill_value = self._validate_scalar(fill_value)
165 new_data = take(
166 self._ndarray,
167 indices,
168 allow_fill=allow_fill,
169 fill_value=fill_value,
170 axis=axis,
171 )
172 return self._from_backing_data(new_data)
174 # ------------------------------------------------------------------------
176 def equals(self, other) -> bool:
177 if type(self) is not type(other):
178 return False
179 if not is_dtype_equal(self.dtype, other.dtype):
180 return False
181 return bool(array_equivalent(self._ndarray, other._ndarray))
183 @classmethod
184 def _from_factorized(cls, values, original):
185 assert values.dtype == original._ndarray.dtype
186 return original._from_backing_data(values)
188 def _values_for_argsort(self) -> np.ndarray:
189 return self._ndarray
191 def _values_for_factorize(self):
192 return self._ndarray, self._internal_fill_value
194 # Signature of "argmin" incompatible with supertype "ExtensionArray"
195 def argmin(self, axis: int = 0, skipna: bool = True): # type: ignore[override]
196 # override base class by adding axis keyword
197 validate_bool_kwarg(skipna, "skipna")
198 if not skipna and self._hasna:
199 raise NotImplementedError
200 return nargminmax(self, "argmin", axis=axis)
202 # Signature of "argmax" incompatible with supertype "ExtensionArray"
203 def argmax(self, axis: int = 0, skipna: bool = True): # type: ignore[override]
204 # override base class by adding axis keyword
205 validate_bool_kwarg(skipna, "skipna")
206 if not skipna and self._hasna:
207 raise NotImplementedError
208 return nargminmax(self, "argmax", axis=axis)
210 def unique(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT:
211 new_data = unique(self._ndarray)
212 return self._from_backing_data(new_data)
214 @classmethod
215 @doc(ExtensionArray._concat_same_type)
216 def _concat_same_type(
217 cls: type[NDArrayBackedExtensionArrayT],
218 to_concat: Sequence[NDArrayBackedExtensionArrayT],
219 axis: int = 0,
220 ) -> NDArrayBackedExtensionArrayT:
221 dtypes = {str(x.dtype) for x in to_concat}
222 if len(dtypes) != 1:
223 raise ValueError("to_concat must have the same dtype (tz)", dtypes)
225 new_values = [x._ndarray for x in to_concat]
226 new_arr = np.concatenate(new_values, axis=axis)
227 return to_concat[0]._from_backing_data(new_arr)
229 @doc(ExtensionArray.searchsorted)
230 def searchsorted(
231 self,
232 value: NumpyValueArrayLike | ExtensionArray,
233 side: Literal["left", "right"] = "left",
234 sorter: NumpySorter = None,
235 ) -> npt.NDArray[np.intp] | np.intp:
236 # TODO(2.0): use _validate_setitem_value once dt64tz mismatched-timezone
237 # deprecation is enforced
238 npvalue = self._validate_searchsorted_value(value)
239 return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter)
241 def _validate_searchsorted_value(
242 self, value: NumpyValueArrayLike | ExtensionArray
243 ) -> NumpyValueArrayLike:
244 # TODO(2.0): after deprecation in datetimelikearraymixin is enforced,
245 # we can remove this and use _validate_setitem_value directly
246 if isinstance(value, ExtensionArray):
247 return value.to_numpy()
248 else:
249 return value
251 @doc(ExtensionArray.shift)
252 def shift(self, periods=1, fill_value=None, axis=0):
254 fill_value = self._validate_shift_value(fill_value)
255 new_values = shift(self._ndarray, periods, axis, fill_value)
257 return self._from_backing_data(new_values)
259 def _validate_shift_value(self, fill_value):
260 # TODO(2.0): after deprecation in datetimelikearraymixin is enforced,
261 # we can remove this and use validate_fill_value directly
262 return self._validate_scalar(fill_value)
264 def __setitem__(self, key, value) -> None:
265 key = check_array_indexer(self, key)
266 value = self._validate_setitem_value(value)
267 self._ndarray[key] = value
269 def _validate_setitem_value(self, value):
270 return value
272 @overload
273 def __getitem__(self, key: ScalarIndexer) -> Any:
274 ...
276 @overload
277 def __getitem__(
278 self: NDArrayBackedExtensionArrayT,
279 key: SequenceIndexer | PositionalIndexerTuple,
280 ) -> NDArrayBackedExtensionArrayT:
281 ...
283 def __getitem__(
284 self: NDArrayBackedExtensionArrayT,
285 key: PositionalIndexer2D,
286 ) -> NDArrayBackedExtensionArrayT | Any:
287 if lib.is_integer(key):
288 # fast-path
289 result = self._ndarray[key]
290 if self.ndim == 1:
291 return self._box_func(result)
292 return self._from_backing_data(result)
294 # error: Incompatible types in assignment (expression has type "ExtensionArray",
295 # variable has type "Union[int, slice, ndarray]")
296 key = extract_array(key, extract_numpy=True) # type: ignore[assignment]
297 key = check_array_indexer(self, key)
298 result = self._ndarray[key]
299 if lib.is_scalar(result):
300 return self._box_func(result)
302 result = self._from_backing_data(result)
303 return result
305 def _fill_mask_inplace(
306 self, method: str, limit, mask: npt.NDArray[np.bool_]
307 ) -> None:
308 # (for now) when self.ndim == 2, we assume axis=0
309 func = missing.get_fill_func(method, ndim=self.ndim)
310 func(self._ndarray.T, limit=limit, mask=mask.T)
311 return
313 @doc(ExtensionArray.fillna)
314 def fillna(
315 self: NDArrayBackedExtensionArrayT, value=None, method=None, limit=None
316 ) -> NDArrayBackedExtensionArrayT:
317 value, method = validate_fillna_kwargs(
318 value, method, validate_scalar_dict_value=False
319 )
321 mask = self.isna()
322 # error: Argument 2 to "check_value_size" has incompatible type
323 # "ExtensionArray"; expected "ndarray"
324 value = missing.check_value_size(
325 value, mask, len(self) # type: ignore[arg-type]
326 )
328 if mask.any():
329 if method is not None:
330 # TODO: check value is None
331 # (for now) when self.ndim == 2, we assume axis=0
332 func = missing.get_fill_func(method, ndim=self.ndim)
333 npvalues = self._ndarray.T.copy()
334 func(npvalues, limit=limit, mask=mask.T)
335 npvalues = npvalues.T
337 # TODO: PandasArray didn't used to copy, need tests for this
338 new_values = self._from_backing_data(npvalues)
339 else:
340 # fill with value
341 new_values = self.copy()
342 new_values[mask] = value
343 else:
344 # We validate the fill_value even if there is nothing to fill
345 if value is not None:
346 self._validate_setitem_value(value)
348 new_values = self.copy()
349 return new_values
351 # ------------------------------------------------------------------------
352 # Reductions
354 def _wrap_reduction_result(self, axis: int | None, result):
355 if axis is None or self.ndim == 1:
356 return self._box_func(result)
357 return self._from_backing_data(result)
359 # ------------------------------------------------------------------------
360 # __array_function__ methods
362 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
363 """
364 Analogue to np.putmask(self, mask, value)
366 Parameters
367 ----------
368 mask : np.ndarray[bool]
369 value : scalar or listlike
371 Raises
372 ------
373 TypeError
374 If value cannot be cast to self.dtype.
375 """
376 value = self._validate_setitem_value(value)
378 np.putmask(self._ndarray, mask, value)
380 def _where(
381 self: NDArrayBackedExtensionArrayT, mask: npt.NDArray[np.bool_], value
382 ) -> NDArrayBackedExtensionArrayT:
383 """
384 Analogue to np.where(mask, self, value)
386 Parameters
387 ----------
388 mask : np.ndarray[bool]
389 value : scalar or listlike
391 Raises
392 ------
393 TypeError
394 If value cannot be cast to self.dtype.
395 """
396 value = self._validate_setitem_value(value)
398 res_values = np.where(mask, self._ndarray, value)
399 return self._from_backing_data(res_values)
401 # ------------------------------------------------------------------------
402 # Index compat methods
404 def insert(
405 self: NDArrayBackedExtensionArrayT, loc: int, item
406 ) -> NDArrayBackedExtensionArrayT:
407 """
408 Make new ExtensionArray inserting new item at location. Follows
409 Python list.append semantics for negative values.
411 Parameters
412 ----------
413 loc : int
414 item : object
416 Returns
417 -------
418 type(self)
419 """
420 loc = validate_insert_loc(loc, len(self))
422 code = self._validate_scalar(item)
424 new_vals = np.concatenate(
425 (
426 self._ndarray[:loc],
427 np.asarray([code], dtype=self._ndarray.dtype),
428 self._ndarray[loc:],
429 )
430 )
431 return self._from_backing_data(new_vals)
433 # ------------------------------------------------------------------------
434 # Additional array methods
435 # These are not part of the EA API, but we implement them because
436 # pandas assumes they're there.
438 def value_counts(self, dropna: bool = True) -> Series:
439 """
440 Return a Series containing counts of unique values.
442 Parameters
443 ----------
444 dropna : bool, default True
445 Don't include counts of NA values.
447 Returns
448 -------
449 Series
450 """
451 if self.ndim != 1:
452 raise NotImplementedError
454 from pandas import (
455 Index,
456 Series,
457 )
459 if dropna:
460 # error: Unsupported operand type for ~ ("ExtensionArray")
461 values = self[~self.isna()]._ndarray # type: ignore[operator]
462 else:
463 values = self._ndarray
465 result = value_counts(values, sort=False, dropna=dropna)
467 index_arr = self._from_backing_data(np.asarray(result.index._data))
468 index = Index(index_arr, name=result.index.name)
469 return Series(result._values, index=index, name=result.name)
471 def _quantile(
472 self: NDArrayBackedExtensionArrayT,
473 qs: npt.NDArray[np.float64],
474 interpolation: str,
475 ) -> NDArrayBackedExtensionArrayT:
476 # TODO: disable for Categorical if not ordered?
478 mask = np.asarray(self.isna())
479 arr = self._ndarray
480 fill_value = self._internal_fill_value
482 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)
484 res_values = self._cast_quantile_result(res_values)
485 return self._from_backing_data(res_values)
487 # TODO: see if we can share this with other dispatch-wrapping methods
488 def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray:
489 """
490 Cast the result of quantile_with_mask to an appropriate dtype
491 to pass to _from_backing_data in _quantile.
492 """
493 return res_values
495 # ------------------------------------------------------------------------
496 # numpy-like methods
498 @classmethod
499 def _empty(
500 cls: type_t[NDArrayBackedExtensionArrayT], shape: Shape, dtype: ExtensionDtype
501 ) -> NDArrayBackedExtensionArrayT:
502 """
503 Analogous to np.empty(shape, dtype=dtype)
505 Parameters
506 ----------
507 shape : tuple[int]
508 dtype : ExtensionDtype
509 """
510 # The base implementation uses a naive approach to find the dtype
511 # for the backing ndarray
512 arr = cls._from_sequence([], dtype=dtype)
513 backing = np.empty(shape, dtype=arr._ndarray.dtype)
514 return arr._from_backing_data(backing)