Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/numpy_.py: 23%
172 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3import numpy as np
5from pandas._libs import lib
6from pandas._typing import (
7 Dtype,
8 NpDtype,
9 Scalar,
10 npt,
11)
12from pandas.compat.numpy import function as nv
14from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
15from pandas.core.dtypes.dtypes import PandasDtype
16from pandas.core.dtypes.missing import isna
18from pandas.core import (
19 arraylike,
20 nanops,
21 ops,
22)
23from pandas.core.arraylike import OpsMixin
24from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
25from pandas.core.construction import ensure_wrapped_if_datetimelike
26from pandas.core.strings.object_array import ObjectStringArrayMixin
29class PandasArray(
30 OpsMixin,
31 NDArrayBackedExtensionArray,
32 ObjectStringArrayMixin,
33):
34 """
35 A pandas ExtensionArray for NumPy data.
37 This is mostly for internal compatibility, and is not especially
38 useful on its own.
40 Parameters
41 ----------
42 values : ndarray
43 The NumPy ndarray to wrap. Must be 1-dimensional.
44 copy : bool, default False
45 Whether to copy `values`.
47 Attributes
48 ----------
49 None
51 Methods
52 -------
53 None
54 """
56 # If you're wondering why pd.Series(cls) doesn't put the array in an
57 # ExtensionBlock, search for `ABCPandasArray`. We check for
58 # that _typ to ensure that users don't unnecessarily use EAs inside
59 # pandas internals, which turns off things like block consolidation.
60 _typ = "npy_extension"
61 __array_priority__ = 1000
62 _ndarray: np.ndarray
63 _dtype: PandasDtype
64 _internal_fill_value = np.nan
66 # ------------------------------------------------------------------------
67 # Constructors
69 def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None:
70 if isinstance(values, type(self)):
71 values = values._ndarray
72 if not isinstance(values, np.ndarray):
73 raise ValueError(
74 f"'values' must be a NumPy array, not {type(values).__name__}"
75 )
77 if values.ndim == 0:
78 # Technically we support 2, but do not advertise that fact.
79 raise ValueError("PandasArray must be 1-dimensional.")
81 if copy:
82 values = values.copy()
84 dtype = PandasDtype(values.dtype)
85 super().__init__(values, dtype)
87 @classmethod
88 def _from_sequence(
89 cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
90 ) -> PandasArray:
91 if isinstance(dtype, PandasDtype):
92 dtype = dtype._dtype
94 # error: Argument "dtype" to "asarray" has incompatible type
95 # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object],
96 # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
97 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
98 # _DTypeDict, Tuple[Any, Any]]]"
99 result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type]
100 if (
101 result.ndim > 1
102 and not hasattr(scalars, "dtype")
103 and (dtype is None or dtype == object)
104 ):
105 # e.g. list-of-tuples
106 result = construct_1d_object_array_from_listlike(scalars)
108 if copy and result is scalars:
109 result = result.copy()
110 return cls(result)
112 def _from_backing_data(self, arr: np.ndarray) -> PandasArray:
113 return type(self)(arr)
115 # ------------------------------------------------------------------------
116 # Data
118 @property
119 def dtype(self) -> PandasDtype:
120 return self._dtype
122 # ------------------------------------------------------------------------
123 # NumPy Array Interface
125 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
126 return np.asarray(self._ndarray, dtype=dtype)
128 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
129 # Lightly modified version of
130 # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html
131 # The primary modification is not boxing scalar return values
132 # in PandasArray, since pandas' ExtensionArrays are 1-d.
133 out = kwargs.get("out", ())
135 result = ops.maybe_dispatch_ufunc_to_dunder_op(
136 self, ufunc, method, *inputs, **kwargs
137 )
138 if result is not NotImplemented:
139 return result
141 if "out" in kwargs:
142 # e.g. test_ufunc_unary
143 return arraylike.dispatch_ufunc_with_out(
144 self, ufunc, method, *inputs, **kwargs
145 )
147 if method == "reduce":
148 result = arraylike.dispatch_reduction_ufunc(
149 self, ufunc, method, *inputs, **kwargs
150 )
151 if result is not NotImplemented:
152 # e.g. tests.series.test_ufunc.TestNumpyReductions
153 return result
155 # Defer to the implementation of the ufunc on unwrapped values.
156 inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs)
157 if out:
158 kwargs["out"] = tuple(
159 x._ndarray if isinstance(x, PandasArray) else x for x in out
160 )
161 result = getattr(ufunc, method)(*inputs, **kwargs)
163 if ufunc.nout > 1:
164 # multiple return values; re-box array-like results
165 return tuple(type(self)(x) for x in result)
166 elif method == "at":
167 # no return value
168 return None
169 elif method == "reduce":
170 if isinstance(result, np.ndarray):
171 # e.g. test_np_reduce_2d
172 return type(self)(result)
174 # e.g. test_np_max_nested_tuples
175 return result
176 else:
177 # one return value; re-box array-like results
178 return type(self)(result)
180 # ------------------------------------------------------------------------
181 # Pandas ExtensionArray Interface
183 def isna(self) -> np.ndarray:
184 return isna(self._ndarray)
186 def _validate_scalar(self, fill_value):
187 if fill_value is None:
188 # Primarily for subclasses
189 fill_value = self.dtype.na_value
190 return fill_value
192 def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:
193 if self.dtype.kind in ["i", "u", "b"]:
194 fv = None
195 else:
196 fv = np.nan
197 return self._ndarray, fv
199 # ------------------------------------------------------------------------
200 # Reductions
202 def any(
203 self,
204 *,
205 axis: int | None = None,
206 out=None,
207 keepdims: bool = False,
208 skipna: bool = True,
209 ):
210 nv.validate_any((), {"out": out, "keepdims": keepdims})
211 result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna)
212 return self._wrap_reduction_result(axis, result)
214 def all(
215 self,
216 *,
217 axis: int | None = None,
218 out=None,
219 keepdims: bool = False,
220 skipna: bool = True,
221 ):
222 nv.validate_all((), {"out": out, "keepdims": keepdims})
223 result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna)
224 return self._wrap_reduction_result(axis, result)
226 def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar:
227 nv.validate_min((), kwargs)
228 result = nanops.nanmin(
229 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
230 )
231 return self._wrap_reduction_result(axis, result)
233 def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar:
234 nv.validate_max((), kwargs)
235 result = nanops.nanmax(
236 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
237 )
238 return self._wrap_reduction_result(axis, result)
240 def sum(
241 self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs
242 ) -> Scalar:
243 nv.validate_sum((), kwargs)
244 result = nanops.nansum(
245 self._ndarray, axis=axis, skipna=skipna, min_count=min_count
246 )
247 return self._wrap_reduction_result(axis, result)
249 def prod(
250 self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs
251 ) -> Scalar:
252 nv.validate_prod((), kwargs)
253 result = nanops.nanprod(
254 self._ndarray, axis=axis, skipna=skipna, min_count=min_count
255 )
256 return self._wrap_reduction_result(axis, result)
258 def mean(
259 self,
260 *,
261 axis: int | None = None,
262 dtype: NpDtype | None = None,
263 out=None,
264 keepdims: bool = False,
265 skipna: bool = True,
266 ):
267 nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims})
268 result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)
269 return self._wrap_reduction_result(axis, result)
271 def median(
272 self,
273 *,
274 axis: int | None = None,
275 out=None,
276 overwrite_input: bool = False,
277 keepdims: bool = False,
278 skipna: bool = True,
279 ):
280 nv.validate_median(
281 (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}
282 )
283 result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
284 return self._wrap_reduction_result(axis, result)
286 def std(
287 self,
288 *,
289 axis: int | None = None,
290 dtype: NpDtype | None = None,
291 out=None,
292 ddof=1,
293 keepdims: bool = False,
294 skipna: bool = True,
295 ):
296 nv.validate_stat_ddof_func(
297 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std"
298 )
299 result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
300 return self._wrap_reduction_result(axis, result)
302 def var(
303 self,
304 *,
305 axis: int | None = None,
306 dtype: NpDtype | None = None,
307 out=None,
308 ddof=1,
309 keepdims: bool = False,
310 skipna: bool = True,
311 ):
312 nv.validate_stat_ddof_func(
313 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var"
314 )
315 result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
316 return self._wrap_reduction_result(axis, result)
318 def sem(
319 self,
320 *,
321 axis: int | None = None,
322 dtype: NpDtype | None = None,
323 out=None,
324 ddof=1,
325 keepdims: bool = False,
326 skipna: bool = True,
327 ):
328 nv.validate_stat_ddof_func(
329 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem"
330 )
331 result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
332 return self._wrap_reduction_result(axis, result)
334 def kurt(
335 self,
336 *,
337 axis: int | None = None,
338 dtype: NpDtype | None = None,
339 out=None,
340 keepdims: bool = False,
341 skipna: bool = True,
342 ):
343 nv.validate_stat_ddof_func(
344 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt"
345 )
346 result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)
347 return self._wrap_reduction_result(axis, result)
349 def skew(
350 self,
351 *,
352 axis: int | None = None,
353 dtype: NpDtype | None = None,
354 out=None,
355 keepdims: bool = False,
356 skipna: bool = True,
357 ):
358 nv.validate_stat_ddof_func(
359 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew"
360 )
361 result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna)
362 return self._wrap_reduction_result(axis, result)
364 # ------------------------------------------------------------------------
365 # Additional Methods
367 def to_numpy(
368 self,
369 dtype: npt.DTypeLike | None = None,
370 copy: bool = False,
371 na_value: object = lib.no_default,
372 ) -> np.ndarray:
373 result = np.asarray(self._ndarray, dtype=dtype)
375 if (copy or na_value is not lib.no_default) and result is self._ndarray:
376 result = result.copy()
378 if na_value is not lib.no_default:
379 result[self.isna()] = na_value
381 return result
383 # ------------------------------------------------------------------------
384 # Ops
386 def __invert__(self) -> PandasArray:
387 return type(self)(~self._ndarray)
389 def __neg__(self) -> PandasArray:
390 return type(self)(-self._ndarray)
392 def __pos__(self) -> PandasArray:
393 return type(self)(+self._ndarray)
395 def __abs__(self) -> PandasArray:
396 return type(self)(abs(self._ndarray))
398 def _cmp_method(self, other, op):
399 if isinstance(other, PandasArray):
400 other = other._ndarray
402 other = ops.maybe_prepare_scalar_for_op(other, (len(self),))
403 pd_op = ops.get_array_op(op)
404 other = ensure_wrapped_if_datetimelike(other)
405 with np.errstate(all="ignore"):
406 result = pd_op(self._ndarray, other)
408 if op is divmod or op is ops.rdivmod:
409 a, b = result
410 if isinstance(a, np.ndarray):
411 # for e.g. op vs TimedeltaArray, we may already
412 # have an ExtensionArray, in which case we do not wrap
413 return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b)
414 return a, b
416 if isinstance(result, np.ndarray):
417 # for e.g. multiplication vs TimedeltaArray, we may already
418 # have an ExtensionArray, in which case we do not wrap
419 return self._wrap_ndarray_result(result)
420 return result
422 _arith_method = _cmp_method
424 def _wrap_ndarray_result(self, result: np.ndarray):
425 # If we have timedelta64[ns] result, return a TimedeltaArray instead
426 # of a PandasArray
427 if result.dtype == "timedelta64[ns]":
428 from pandas.core.arrays import TimedeltaArray
430 return TimedeltaArray._simple_new(result)
431 return type(self)(result)
433 # ------------------------------------------------------------------------
434 # String methods interface
435 _str_na_value = np.nan