Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/nanops.py: 14%
630 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3import functools
4import itertools
5import operator
6from typing import (
7 Any,
8 Callable,
9 cast,
10)
11import warnings
13import numpy as np
15from pandas._config import get_option
17from pandas._libs import (
18 NaT,
19 NaTType,
20 iNaT,
21 lib,
22)
23from pandas._typing import (
24 ArrayLike,
25 Dtype,
26 DtypeObj,
27 F,
28 Scalar,
29 Shape,
30 npt,
31)
32from pandas.compat._optional import import_optional_dependency
34from pandas.core.dtypes.common import (
35 is_any_int_dtype,
36 is_bool_dtype,
37 is_complex,
38 is_datetime64_any_dtype,
39 is_float,
40 is_float_dtype,
41 is_integer,
42 is_integer_dtype,
43 is_numeric_dtype,
44 is_object_dtype,
45 is_scalar,
46 is_timedelta64_dtype,
47 needs_i8_conversion,
48 pandas_dtype,
49)
50from pandas.core.dtypes.dtypes import PeriodDtype
51from pandas.core.dtypes.missing import (
52 isna,
53 na_value_for_dtype,
54 notna,
55)
57from pandas.core.construction import extract_array
59bn = import_optional_dependency("bottleneck", errors="warn")
60_BOTTLENECK_INSTALLED = bn is not None
61_USE_BOTTLENECK = False
64def set_use_bottleneck(v: bool = True) -> None:
65 # set/unset to use bottleneck
66 global _USE_BOTTLENECK
67 if _BOTTLENECK_INSTALLED: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true
68 _USE_BOTTLENECK = v
71set_use_bottleneck(get_option("compute.use_bottleneck"))
74class disallow:
75 def __init__(self, *dtypes: Dtype) -> None:
76 super().__init__()
77 self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)
79 def check(self, obj) -> bool:
80 return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes)
82 def __call__(self, f: F) -> F:
83 @functools.wraps(f)
84 def _f(*args, **kwargs):
85 obj_iter = itertools.chain(args, kwargs.values())
86 if any(self.check(obj) for obj in obj_iter):
87 f_name = f.__name__.replace("nan", "")
88 raise TypeError(
89 f"reduction operation '{f_name}' not allowed for this dtype"
90 )
91 try:
92 with np.errstate(invalid="ignore"):
93 return f(*args, **kwargs)
94 except ValueError as e:
95 # we want to transform an object array
96 # ValueError message to the more typical TypeError
97 # e.g. this is normally a disallowed function on
98 # object arrays that contain strings
99 if is_object_dtype(args[0]):
100 raise TypeError(e) from e
101 raise
103 return cast(F, _f)
106class bottleneck_switch:
107 def __init__(self, name=None, **kwargs) -> None:
108 self.name = name
109 self.kwargs = kwargs
111 def __call__(self, alt: F) -> F:
112 bn_name = self.name or alt.__name__
114 try:
115 bn_func = getattr(bn, bn_name)
116 except (AttributeError, NameError): # pragma: no cover
117 bn_func = None
119 @functools.wraps(alt)
120 def f(
121 values: np.ndarray,
122 *,
123 axis: int | None = None,
124 skipna: bool = True,
125 **kwds,
126 ):
127 if len(self.kwargs) > 0:
128 for k, v in self.kwargs.items():
129 if k not in kwds:
130 kwds[k] = v
132 if values.size == 0 and kwds.get("min_count") is None:
133 # We are empty, returning NA for our type
134 # Only applies for the default `min_count` of None
135 # since that affects how empty arrays are handled.
136 # TODO(GH-18976) update all the nanops methods to
137 # correctly handle empty inputs and remove this check.
138 # It *may* just be `var`
139 return _na_for_min_count(values, axis)
141 if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):
142 if kwds.get("mask", None) is None:
143 # `mask` is not recognised by bottleneck, would raise
144 # TypeError if called
145 kwds.pop("mask", None)
146 result = bn_func(values, axis=axis, **kwds)
148 # prefer to treat inf/-inf as NA, but must compute the func
149 # twice :(
150 if _has_infs(result):
151 result = alt(values, axis=axis, skipna=skipna, **kwds)
152 else:
153 result = alt(values, axis=axis, skipna=skipna, **kwds)
154 else:
155 result = alt(values, axis=axis, skipna=skipna, **kwds)
157 return result
159 return cast(F, f)
162def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool:
163 # Bottleneck chokes on datetime64, PeriodDtype (or and EA)
164 if not is_object_dtype(dtype) and not needs_i8_conversion(dtype):
165 # GH 42878
166 # Bottleneck uses naive summation leading to O(n) loss of precision
167 # unlike numpy which implements pairwise summation, which has O(log(n)) loss
168 # crossref: https://github.com/pydata/bottleneck/issues/379
170 # GH 15507
171 # bottleneck does not properly upcast during the sum
172 # so can overflow
174 # GH 9422
175 # further we also want to preserve NaN when all elements
176 # are NaN, unlike bottleneck/numpy which consider this
177 # to be 0
178 return name not in ["nansum", "nanprod", "nanmean"]
179 return False
182def _has_infs(result) -> bool:
183 if isinstance(result, np.ndarray):
184 if result.dtype == "f8" or result.dtype == "f4":
185 # Note: outside of an nanops-specific test, we always have
186 # result.ndim == 1, so there is no risk of this ravel making a copy.
187 return lib.has_infs(result.ravel("K"))
188 try:
189 return np.isinf(result).any()
190 except (TypeError, NotImplementedError):
191 # if it doesn't support infs, then it can't have infs
192 return False
195def _get_fill_value(
196 dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None
197):
198 """return the correct fill value for the dtype of the values"""
199 if fill_value is not None:
200 return fill_value
201 if _na_ok_dtype(dtype):
202 if fill_value_typ is None:
203 return np.nan
204 else:
205 if fill_value_typ == "+inf":
206 return np.inf
207 else:
208 return -np.inf
209 else:
210 if fill_value_typ == "+inf":
211 # need the max int here
212 return lib.i8max
213 else:
214 return iNaT
217def _maybe_get_mask(
218 values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None
219) -> npt.NDArray[np.bool_] | None:
220 """
221 Compute a mask if and only if necessary.
223 This function will compute a mask iff it is necessary. Otherwise,
224 return the provided mask (potentially None) when a mask does not need to be
225 computed.
227 A mask is never necessary if the values array is of boolean or integer
228 dtypes, as these are incapable of storing NaNs. If passing a NaN-capable
229 dtype that is interpretable as either boolean or integer data (eg,
230 timedelta64), a mask must be provided.
232 If the skipna parameter is False, a new mask will not be computed.
234 The mask is computed using isna() by default. Setting invert=True selects
235 notna() as the masking function.
237 Parameters
238 ----------
239 values : ndarray
240 input array to potentially compute mask for
241 skipna : bool
242 boolean for whether NaNs should be skipped
243 mask : Optional[ndarray]
244 nan-mask if known
246 Returns
247 -------
248 Optional[np.ndarray[bool]]
249 """
250 if mask is None:
251 if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype):
252 # Boolean data cannot contain nulls, so signal via mask being None
253 return None
255 if skipna or needs_i8_conversion(values.dtype):
256 mask = isna(values)
258 return mask
261def _get_values(
262 values: np.ndarray,
263 skipna: bool,
264 fill_value: Any = None,
265 fill_value_typ: str | None = None,
266 mask: npt.NDArray[np.bool_] | None = None,
267) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]:
268 """
269 Utility to get the values view, mask, dtype, dtype_max, and fill_value.
271 If both mask and fill_value/fill_value_typ are not None and skipna is True,
272 the values array will be copied.
274 For input arrays of boolean or integer dtypes, copies will only occur if a
275 precomputed mask, a fill_value/fill_value_typ, and skipna=True are
276 provided.
278 Parameters
279 ----------
280 values : ndarray
281 input array to potentially compute mask for
282 skipna : bool
283 boolean for whether NaNs should be skipped
284 fill_value : Any
285 value to fill NaNs with
286 fill_value_typ : str
287 Set to '+inf' or '-inf' to handle dtype-specific infinities
288 mask : Optional[np.ndarray[bool]]
289 nan-mask if known
291 Returns
292 -------
293 values : ndarray
294 Potential copy of input value array
295 mask : Optional[ndarray[bool]]
296 Mask for values, if deemed necessary to compute
297 dtype : np.dtype
298 dtype for values
299 dtype_max : np.dtype
300 platform independent dtype
301 fill_value : Any
302 fill value used
303 """
304 # In _get_values is only called from within nanops, and in all cases
305 # with scalar fill_value. This guarantee is important for the
306 # np.where call below
307 assert is_scalar(fill_value)
308 # error: Incompatible types in assignment (expression has type "Union[Any,
309 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")
310 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]
312 mask = _maybe_get_mask(values, skipna, mask)
314 dtype = values.dtype
316 datetimelike = False
317 if needs_i8_conversion(values.dtype):
318 # changing timedelta64/datetime64 to int64 needs to happen after
319 # finding `mask` above
320 values = np.asarray(values.view("i8"))
321 datetimelike = True
323 dtype_ok = _na_ok_dtype(dtype)
325 # get our fill value (in case we need to provide an alternative
326 # dtype for it)
327 fill_value = _get_fill_value(
328 dtype, fill_value=fill_value, fill_value_typ=fill_value_typ
329 )
331 if skipna and (mask is not None) and (fill_value is not None):
332 if mask.any():
333 if dtype_ok or datetimelike:
334 values = values.copy()
335 np.putmask(values, mask, fill_value)
336 else:
337 # np.where will promote if needed
338 values = np.where(~mask, values, fill_value)
340 # return a platform independent precision dtype
341 dtype_max = dtype
342 if is_integer_dtype(dtype) or is_bool_dtype(dtype):
343 dtype_max = np.dtype(np.int64)
344 elif is_float_dtype(dtype):
345 dtype_max = np.dtype(np.float64)
347 return values, mask, dtype, dtype_max, fill_value
350def _na_ok_dtype(dtype: DtypeObj) -> bool:
351 if needs_i8_conversion(dtype):
352 return False
353 return not issubclass(dtype.type, np.integer)
356def _wrap_results(result, dtype: np.dtype, fill_value=None):
357 """wrap our results if needed"""
358 if result is NaT:
359 pass
361 elif is_datetime64_any_dtype(dtype):
362 if fill_value is None:
363 # GH#24293
364 fill_value = iNaT
365 if not isinstance(result, np.ndarray):
366 assert not isna(fill_value), "Expected non-null fill_value"
367 if result == fill_value:
368 result = np.nan
370 if isna(result):
371 result = np.datetime64("NaT", "ns")
372 else:
373 result = np.int64(result).view("datetime64[ns]")
374 # retain original unit
375 result = result.astype(dtype, copy=False)
376 else:
377 # If we have float dtype, taking a view will give the wrong result
378 result = result.astype(dtype)
379 elif is_timedelta64_dtype(dtype):
380 if not isinstance(result, np.ndarray):
381 if result == fill_value or np.isnan(result):
382 result = np.timedelta64("NaT").astype(dtype)
384 elif np.fabs(result) > lib.i8max:
385 # raise if we have a timedelta64[ns] which is too large
386 raise ValueError("overflow in timedelta operation")
387 else:
388 # return a timedelta64 with the original unit
389 result = np.int64(result).astype(dtype, copy=False)
391 else:
392 result = result.astype("m8[ns]").view(dtype)
394 return result
397def _datetimelike_compat(func: F) -> F:
398 """
399 If we have datetime64 or timedelta64 values, ensure we have a correct
400 mask before calling the wrapped function, then cast back afterwards.
401 """
403 @functools.wraps(func)
404 def new_func(
405 values: np.ndarray,
406 *,
407 axis: int | None = None,
408 skipna: bool = True,
409 mask: npt.NDArray[np.bool_] | None = None,
410 **kwargs,
411 ):
412 orig_values = values
414 datetimelike = values.dtype.kind in ["m", "M"]
415 if datetimelike and mask is None:
416 mask = isna(values)
418 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
420 if datetimelike:
421 result = _wrap_results(result, orig_values.dtype, fill_value=iNaT)
422 if not skipna:
423 assert mask is not None # checked above
424 result = _mask_datetimelike_result(result, axis, mask, orig_values)
426 return result
428 return cast(F, new_func)
431def _na_for_min_count(values: np.ndarray, axis: int | None) -> Scalar | np.ndarray:
432 """
433 Return the missing value for `values`.
435 Parameters
436 ----------
437 values : ndarray
438 axis : int or None
439 axis for the reduction, required if values.ndim > 1.
441 Returns
442 -------
443 result : scalar or ndarray
444 For 1-D values, returns a scalar of the correct missing type.
445 For 2-D values, returns a 1-D array where each element is missing.
446 """
447 # we either return np.nan or pd.NaT
448 if is_numeric_dtype(values):
449 values = values.astype("float64")
450 fill_value = na_value_for_dtype(values.dtype)
452 if values.ndim == 1:
453 return fill_value
454 elif axis is None:
455 return fill_value
456 else:
457 result_shape = values.shape[:axis] + values.shape[axis + 1 :]
459 return np.full(result_shape, fill_value, dtype=values.dtype)
462def maybe_operate_rowwise(func: F) -> F:
463 """
464 NumPy operations on C-contiguous ndarrays with axis=1 can be
465 very slow if axis 1 >> axis 0.
466 Operate row-by-row and concatenate the results.
467 """
469 @functools.wraps(func)
470 def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs):
471 if (
472 axis == 1
473 and values.ndim == 2
474 and values.flags["C_CONTIGUOUS"]
475 # only takes this path for wide arrays (long dataframes), for threshold see
476 # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737
477 and (values.shape[1] / 1000) > values.shape[0]
478 and values.dtype != object
479 and values.dtype != bool
480 ):
481 arrs = list(values)
482 if kwargs.get("mask") is not None:
483 mask = kwargs.pop("mask")
484 results = [
485 func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs))
486 ]
487 else:
488 results = [func(x, **kwargs) for x in arrs]
489 return np.array(results)
491 return func(values, axis=axis, **kwargs)
493 return cast(F, newfunc)
496def nanany(
497 values: np.ndarray,
498 *,
499 axis: int | None = None,
500 skipna: bool = True,
501 mask: npt.NDArray[np.bool_] | None = None,
502) -> bool:
503 """
504 Check if any elements along an axis evaluate to True.
506 Parameters
507 ----------
508 values : ndarray
509 axis : int, optional
510 skipna : bool, default True
511 mask : ndarray[bool], optional
512 nan-mask if known
514 Returns
515 -------
516 result : bool
518 Examples
519 --------
520 >>> import pandas.core.nanops as nanops
521 >>> s = pd.Series([1, 2])
522 >>> nanops.nanany(s)
523 True
525 >>> import pandas.core.nanops as nanops
526 >>> s = pd.Series([np.nan])
527 >>> nanops.nanany(s)
528 False
529 """
530 values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask)
532 # For object type, any won't necessarily return
533 # boolean values (numpy/numpy#4352)
534 if is_object_dtype(values):
535 values = values.astype(bool)
537 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected
538 # "bool")
539 return values.any(axis) # type: ignore[return-value]
542def nanall(
543 values: np.ndarray,
544 *,
545 axis: int | None = None,
546 skipna: bool = True,
547 mask: npt.NDArray[np.bool_] | None = None,
548) -> bool:
549 """
550 Check if all elements along an axis evaluate to True.
552 Parameters
553 ----------
554 values : ndarray
555 axis : int, optional
556 skipna : bool, default True
557 mask : ndarray[bool], optional
558 nan-mask if known
560 Returns
561 -------
562 result : bool
564 Examples
565 --------
566 >>> import pandas.core.nanops as nanops
567 >>> s = pd.Series([1, 2, np.nan])
568 >>> nanops.nanall(s)
569 True
571 >>> import pandas.core.nanops as nanops
572 >>> s = pd.Series([1, 0])
573 >>> nanops.nanall(s)
574 False
575 """
576 values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask)
578 # For object type, all won't necessarily return
579 # boolean values (numpy/numpy#4352)
580 if is_object_dtype(values):
581 values = values.astype(bool)
583 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected
584 # "bool")
585 return values.all(axis) # type: ignore[return-value]
588@disallow("M8")
589@_datetimelike_compat
590@maybe_operate_rowwise
591def nansum(
592 values: np.ndarray,
593 *,
594 axis: int | None = None,
595 skipna: bool = True,
596 min_count: int = 0,
597 mask: npt.NDArray[np.bool_] | None = None,
598) -> float:
599 """
600 Sum the elements along an axis ignoring NaNs
602 Parameters
603 ----------
604 values : ndarray[dtype]
605 axis : int, optional
606 skipna : bool, default True
607 min_count: int, default 0
608 mask : ndarray[bool], optional
609 nan-mask if known
611 Returns
612 -------
613 result : dtype
615 Examples
616 --------
617 >>> import pandas.core.nanops as nanops
618 >>> s = pd.Series([1, 2, np.nan])
619 >>> nanops.nansum(s)
620 3.0
621 """
622 values, mask, dtype, dtype_max, _ = _get_values(
623 values, skipna, fill_value=0, mask=mask
624 )
625 dtype_sum = dtype_max
626 if is_float_dtype(dtype):
627 dtype_sum = dtype
628 elif is_timedelta64_dtype(dtype):
629 dtype_sum = np.dtype(np.float64)
631 the_sum = values.sum(axis, dtype=dtype_sum)
632 the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)
634 return the_sum
637def _mask_datetimelike_result(
638 result: np.ndarray | np.datetime64 | np.timedelta64,
639 axis: int | None,
640 mask: npt.NDArray[np.bool_],
641 orig_values: np.ndarray,
642) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType:
643 if isinstance(result, np.ndarray):
644 # we need to apply the mask
645 result = result.astype("i8").view(orig_values.dtype)
646 axis_mask = mask.any(axis=axis)
647 # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any],
648 # datetime64, timedelta64]")
649 result[axis_mask] = iNaT # type: ignore[index]
650 else:
651 if mask.any():
652 return np.int64(iNaT).view(orig_values.dtype)
653 return result
656@disallow(PeriodDtype)
657@bottleneck_switch()
658@_datetimelike_compat
659def nanmean(
660 values: np.ndarray,
661 *,
662 axis: int | None = None,
663 skipna: bool = True,
664 mask: npt.NDArray[np.bool_] | None = None,
665) -> float:
666 """
667 Compute the mean of the element along an axis ignoring NaNs
669 Parameters
670 ----------
671 values : ndarray
672 axis : int, optional
673 skipna : bool, default True
674 mask : ndarray[bool], optional
675 nan-mask if known
677 Returns
678 -------
679 float
680 Unless input is a float array, in which case use the same
681 precision as the input array.
683 Examples
684 --------
685 >>> import pandas.core.nanops as nanops
686 >>> s = pd.Series([1, 2, np.nan])
687 >>> nanops.nanmean(s)
688 1.5
689 """
690 values, mask, dtype, dtype_max, _ = _get_values(
691 values, skipna, fill_value=0, mask=mask
692 )
693 dtype_sum = dtype_max
694 dtype_count = np.dtype(np.float64)
696 # not using needs_i8_conversion because that includes period
697 if dtype.kind in ["m", "M"]:
698 dtype_sum = np.dtype(np.float64)
699 elif is_integer_dtype(dtype):
700 dtype_sum = np.dtype(np.float64)
701 elif is_float_dtype(dtype):
702 dtype_sum = dtype
703 dtype_count = dtype
705 count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
706 the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
708 if axis is not None and getattr(the_sum, "ndim", False):
709 count = cast(np.ndarray, count)
710 with np.errstate(all="ignore"):
711 # suppress division by zero warnings
712 the_mean = the_sum / count
713 ct_mask = count == 0
714 if ct_mask.any():
715 the_mean[ct_mask] = np.nan
716 else:
717 the_mean = the_sum / count if count > 0 else np.nan
719 return the_mean
722@bottleneck_switch()
723def nanmedian(values, *, axis=None, skipna=True, mask=None):
724 """
725 Parameters
726 ----------
727 values : ndarray
728 axis : int, optional
729 skipna : bool, default True
730 mask : ndarray[bool], optional
731 nan-mask if known
733 Returns
734 -------
735 result : float
736 Unless input is a float array, in which case use the same
737 precision as the input array.
739 Examples
740 --------
741 >>> import pandas.core.nanops as nanops
742 >>> s = pd.Series([1, np.nan, 2, 2])
743 >>> nanops.nanmedian(s)
744 2.0
745 """
747 def get_median(x):
748 mask = notna(x)
749 if not skipna and not mask.all():
750 return np.nan
751 with warnings.catch_warnings():
752 # Suppress RuntimeWarning about All-NaN slice
753 warnings.filterwarnings("ignore", "All-NaN slice encountered")
754 res = np.nanmedian(x[mask])
755 return res
757 values, mask, dtype, _, _ = _get_values(values, skipna, mask=mask)
758 if not is_float_dtype(values.dtype):
759 try:
760 values = values.astype("f8")
761 except ValueError as err:
762 # e.g. "could not convert string to float: 'a'"
763 raise TypeError(str(err)) from err
764 if mask is not None:
765 values[mask] = np.nan
767 notempty = values.size
769 # an array from a frame
770 if values.ndim > 1 and axis is not None:
772 # there's a non-empty array to apply over otherwise numpy raises
773 if notempty:
774 if not skipna:
775 res = np.apply_along_axis(get_median, axis, values)
777 else:
778 # fastpath for the skipna case
779 with warnings.catch_warnings():
780 # Suppress RuntimeWarning about All-NaN slice
781 warnings.filterwarnings("ignore", "All-NaN slice encountered")
782 res = np.nanmedian(values, axis)
784 else:
785 # must return the correct shape, but median is not defined for the
786 # empty set so return nans of shape "everything but the passed axis"
787 # since "axis" is where the reduction would occur if we had a nonempty
788 # array
789 res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan)
791 else:
792 # otherwise return a scalar value
793 res = get_median(values) if notempty else np.nan
794 return _wrap_results(res, dtype)
797def get_empty_reduction_result(
798 shape: tuple[int, ...],
799 axis: int,
800 dtype: np.dtype | type[np.floating],
801 fill_value: Any,
802) -> np.ndarray:
803 """
804 The result from a reduction on an empty ndarray.
806 Parameters
807 ----------
808 shape : Tuple[int]
809 axis : int
810 dtype : np.dtype
811 fill_value : Any
813 Returns
814 -------
815 np.ndarray
816 """
817 shp = np.array(shape)
818 dims = np.arange(len(shape))
819 ret = np.empty(shp[dims != axis], dtype=dtype)
820 ret.fill(fill_value)
821 return ret
824def _get_counts_nanvar(
825 values_shape: Shape,
826 mask: npt.NDArray[np.bool_] | None,
827 axis: int | None,
828 ddof: int,
829 dtype: np.dtype = np.dtype(np.float64),
830) -> tuple[float | np.ndarray, float | np.ndarray]:
831 """
832 Get the count of non-null values along an axis, accounting
833 for degrees of freedom.
835 Parameters
836 ----------
837 values_shape : Tuple[int, ...]
838 shape tuple from values ndarray, used if mask is None
839 mask : Optional[ndarray[bool]]
840 locations in values that should be considered missing
841 axis : Optional[int]
842 axis to count along
843 ddof : int
844 degrees of freedom
845 dtype : type, optional
846 type to use for count
848 Returns
849 -------
850 count : int, np.nan or np.ndarray
851 d : int, np.nan or np.ndarray
852 """
853 count = _get_counts(values_shape, mask, axis, dtype=dtype)
854 d = count - dtype.type(ddof)
856 # always return NaN, never inf
857 if is_scalar(count):
858 if count <= ddof:
859 count = np.nan
860 d = np.nan
861 else:
862 # count is not narrowed by is_scalar check
863 count = cast(np.ndarray, count)
864 mask = count <= ddof
865 if mask.any():
866 np.putmask(d, mask, np.nan)
867 np.putmask(count, mask, np.nan)
868 return count, d
871@bottleneck_switch(ddof=1)
872def nanstd(values, *, axis=None, skipna=True, ddof=1, mask=None):
873 """
874 Compute the standard deviation along given axis while ignoring NaNs
876 Parameters
877 ----------
878 values : ndarray
879 axis : int, optional
880 skipna : bool, default True
881 ddof : int, default 1
882 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
883 where N represents the number of elements.
884 mask : ndarray[bool], optional
885 nan-mask if known
887 Returns
888 -------
889 result : float
890 Unless input is a float array, in which case use the same
891 precision as the input array.
893 Examples
894 --------
895 >>> import pandas.core.nanops as nanops
896 >>> s = pd.Series([1, np.nan, 2, 3])
897 >>> nanops.nanstd(s)
898 1.0
899 """
900 if values.dtype == "M8[ns]":
901 values = values.view("m8[ns]")
903 orig_dtype = values.dtype
904 values, mask, _, _, _ = _get_values(values, skipna, mask=mask)
906 result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask))
907 return _wrap_results(result, orig_dtype)
910@disallow("M8", "m8")
911@bottleneck_switch(ddof=1)
912def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None):
913 """
914 Compute the variance along given axis while ignoring NaNs
916 Parameters
917 ----------
918 values : ndarray
919 axis : int, optional
920 skipna : bool, default True
921 ddof : int, default 1
922 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
923 where N represents the number of elements.
924 mask : ndarray[bool], optional
925 nan-mask if known
927 Returns
928 -------
929 result : float
930 Unless input is a float array, in which case use the same
931 precision as the input array.
933 Examples
934 --------
935 >>> import pandas.core.nanops as nanops
936 >>> s = pd.Series([1, np.nan, 2, 3])
937 >>> nanops.nanvar(s)
938 1.0
939 """
940 values = extract_array(values, extract_numpy=True)
941 dtype = values.dtype
942 mask = _maybe_get_mask(values, skipna, mask)
943 if is_any_int_dtype(dtype):
944 values = values.astype("f8")
945 if mask is not None:
946 values[mask] = np.nan
948 if is_float_dtype(values.dtype):
949 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)
950 else:
951 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof)
953 if skipna and mask is not None:
954 values = values.copy()
955 np.putmask(values, mask, 0)
957 # xref GH10242
958 # Compute variance via two-pass algorithm, which is stable against
959 # cancellation errors and relatively accurate for small numbers of
960 # observations.
961 #
962 # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
963 avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
964 if axis is not None:
965 avg = np.expand_dims(avg, axis)
966 sqr = _ensure_numeric((avg - values) ** 2)
967 if mask is not None:
968 np.putmask(sqr, mask, 0)
969 result = sqr.sum(axis=axis, dtype=np.float64) / d
971 # Return variance as np.float64 (the datatype used in the accumulator),
972 # unless we were dealing with a float array, in which case use the same
973 # precision as the original values array.
974 if is_float_dtype(dtype):
975 result = result.astype(dtype, copy=False)
976 return result
979@disallow("M8", "m8")
980def nansem(
981 values: np.ndarray,
982 *,
983 axis: int | None = None,
984 skipna: bool = True,
985 ddof: int = 1,
986 mask: npt.NDArray[np.bool_] | None = None,
987) -> float:
988 """
989 Compute the standard error in the mean along given axis while ignoring NaNs
991 Parameters
992 ----------
993 values : ndarray
994 axis : int, optional
995 skipna : bool, default True
996 ddof : int, default 1
997 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
998 where N represents the number of elements.
999 mask : ndarray[bool], optional
1000 nan-mask if known
1002 Returns
1003 -------
1004 result : float64
1005 Unless input is a float array, in which case use the same
1006 precision as the input array.
1008 Examples
1009 --------
1010 >>> import pandas.core.nanops as nanops
1011 >>> s = pd.Series([1, np.nan, 2, 3])
1012 >>> nanops.nansem(s)
1013 0.5773502691896258
1014 """
1015 # This checks if non-numeric-like data is passed with numeric_only=False
1016 # and raises a TypeError otherwise
1017 nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)
1019 mask = _maybe_get_mask(values, skipna, mask)
1020 if not is_float_dtype(values.dtype):
1021 values = values.astype("f8")
1023 count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)
1024 var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof)
1026 return np.sqrt(var) / np.sqrt(count)
1029def _nanminmax(meth, fill_value_typ):
1030 @bottleneck_switch(name="nan" + meth)
1031 @_datetimelike_compat
1032 def reduction(
1033 values: np.ndarray,
1034 *,
1035 axis: int | None = None,
1036 skipna: bool = True,
1037 mask: npt.NDArray[np.bool_] | None = None,
1038 ) -> Dtype:
1040 values, mask, dtype, dtype_max, fill_value = _get_values(
1041 values, skipna, fill_value_typ=fill_value_typ, mask=mask
1042 )
1044 if (axis is not None and values.shape[axis] == 0) or values.size == 0:
1045 try:
1046 result = getattr(values, meth)(axis, dtype=dtype_max)
1047 result.fill(np.nan)
1048 except (AttributeError, TypeError, ValueError):
1049 result = np.nan
1050 else:
1051 result = getattr(values, meth)(axis)
1053 result = _maybe_null_out(result, axis, mask, values.shape)
1054 return result
1056 return reduction
1059nanmin = _nanminmax("min", fill_value_typ="+inf")
1060nanmax = _nanminmax("max", fill_value_typ="-inf")
1063@disallow("O")
1064def nanargmax(
1065 values: np.ndarray,
1066 *,
1067 axis: int | None = None,
1068 skipna: bool = True,
1069 mask: npt.NDArray[np.bool_] | None = None,
1070) -> int | np.ndarray:
1071 """
1072 Parameters
1073 ----------
1074 values : ndarray
1075 axis : int, optional
1076 skipna : bool, default True
1077 mask : ndarray[bool], optional
1078 nan-mask if known
1080 Returns
1081 -------
1082 result : int or ndarray[int]
1083 The index/indices of max value in specified axis or -1 in the NA case
1085 Examples
1086 --------
1087 >>> import pandas.core.nanops as nanops
1088 >>> arr = np.array([1, 2, 3, np.nan, 4])
1089 >>> nanops.nanargmax(arr)
1090 4
1092 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)
1093 >>> arr[2:, 2] = np.nan
1094 >>> arr
1095 array([[ 0., 1., 2.],
1096 [ 3., 4., 5.],
1097 [ 6., 7., nan],
1098 [ 9., 10., nan]])
1099 >>> nanops.nanargmax(arr, axis=1)
1100 array([2, 2, 1, 1])
1101 """
1102 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="-inf", mask=mask)
1103 # error: Need type annotation for 'result'
1104 result = values.argmax(axis) # type: ignore[var-annotated]
1105 result = _maybe_arg_null_out(result, axis, mask, skipna)
1106 return result
1109@disallow("O")
1110def nanargmin(
1111 values: np.ndarray,
1112 *,
1113 axis: int | None = None,
1114 skipna: bool = True,
1115 mask: npt.NDArray[np.bool_] | None = None,
1116) -> int | np.ndarray:
1117 """
1118 Parameters
1119 ----------
1120 values : ndarray
1121 axis : int, optional
1122 skipna : bool, default True
1123 mask : ndarray[bool], optional
1124 nan-mask if known
1126 Returns
1127 -------
1128 result : int or ndarray[int]
1129 The index/indices of min value in specified axis or -1 in the NA case
1131 Examples
1132 --------
1133 >>> import pandas.core.nanops as nanops
1134 >>> arr = np.array([1, 2, 3, np.nan, 4])
1135 >>> nanops.nanargmin(arr)
1136 0
1138 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)
1139 >>> arr[2:, 0] = np.nan
1140 >>> arr
1141 array([[ 0., 1., 2.],
1142 [ 3., 4., 5.],
1143 [nan, 7., 8.],
1144 [nan, 10., 11.]])
1145 >>> nanops.nanargmin(arr, axis=1)
1146 array([0, 0, 1, 1])
1147 """
1148 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="+inf", mask=mask)
1149 # error: Need type annotation for 'result'
1150 result = values.argmin(axis) # type: ignore[var-annotated]
1151 result = _maybe_arg_null_out(result, axis, mask, skipna)
1152 return result
1155@disallow("M8", "m8")
1156@maybe_operate_rowwise
1157def nanskew(
1158 values: np.ndarray,
1159 *,
1160 axis: int | None = None,
1161 skipna: bool = True,
1162 mask: npt.NDArray[np.bool_] | None = None,
1163) -> float:
1164 """
1165 Compute the sample skewness.
1167 The statistic computed here is the adjusted Fisher-Pearson standardized
1168 moment coefficient G1. The algorithm computes this coefficient directly
1169 from the second and third central moment.
1171 Parameters
1172 ----------
1173 values : ndarray
1174 axis : int, optional
1175 skipna : bool, default True
1176 mask : ndarray[bool], optional
1177 nan-mask if known
1179 Returns
1180 -------
1181 result : float64
1182 Unless input is a float array, in which case use the same
1183 precision as the input array.
1185 Examples
1186 --------
1187 >>> import pandas.core.nanops as nanops
1188 >>> s = pd.Series([1, np.nan, 1, 2])
1189 >>> nanops.nanskew(s)
1190 1.7320508075688787
1191 """
1192 # error: Incompatible types in assignment (expression has type "Union[Any,
1193 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")
1194 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]
1195 mask = _maybe_get_mask(values, skipna, mask)
1196 if not is_float_dtype(values.dtype):
1197 values = values.astype("f8")
1198 count = _get_counts(values.shape, mask, axis)
1199 else:
1200 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)
1202 if skipna and mask is not None:
1203 values = values.copy()
1204 np.putmask(values, mask, 0)
1206 mean = values.sum(axis, dtype=np.float64) / count
1207 if axis is not None:
1208 mean = np.expand_dims(mean, axis)
1210 adjusted = values - mean
1211 if skipna and mask is not None:
1212 np.putmask(adjusted, mask, 0)
1213 adjusted2 = adjusted**2
1214 adjusted3 = adjusted2 * adjusted
1215 m2 = adjusted2.sum(axis, dtype=np.float64)
1216 m3 = adjusted3.sum(axis, dtype=np.float64)
1218 # floating point error
1219 #
1220 # #18044 in _libs/windows.pyx calc_skew follow this behavior
1221 # to fix the fperr to treat m2 <1e-14 as zero
1222 m2 = _zero_out_fperr(m2)
1223 m3 = _zero_out_fperr(m3)
1225 with np.errstate(invalid="ignore", divide="ignore"):
1226 result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)
1228 dtype = values.dtype
1229 if is_float_dtype(dtype):
1230 result = result.astype(dtype, copy=False)
1232 if isinstance(result, np.ndarray):
1233 result = np.where(m2 == 0, 0, result)
1234 result[count < 3] = np.nan
1235 else:
1236 result = 0 if m2 == 0 else result
1237 if count < 3:
1238 return np.nan
1240 return result
1243@disallow("M8", "m8")
1244@maybe_operate_rowwise
1245def nankurt(
1246 values: np.ndarray,
1247 *,
1248 axis: int | None = None,
1249 skipna: bool = True,
1250 mask: npt.NDArray[np.bool_] | None = None,
1251) -> float:
1252 """
1253 Compute the sample excess kurtosis
1255 The statistic computed here is the adjusted Fisher-Pearson standardized
1256 moment coefficient G2, computed directly from the second and fourth
1257 central moment.
1259 Parameters
1260 ----------
1261 values : ndarray
1262 axis : int, optional
1263 skipna : bool, default True
1264 mask : ndarray[bool], optional
1265 nan-mask if known
1267 Returns
1268 -------
1269 result : float64
1270 Unless input is a float array, in which case use the same
1271 precision as the input array.
1273 Examples
1274 --------
1275 >>> import pandas.core.nanops as nanops
1276 >>> s = pd.Series([1, np.nan, 1, 3, 2])
1277 >>> nanops.nankurt(s)
1278 -1.2892561983471076
1279 """
1280 # error: Incompatible types in assignment (expression has type "Union[Any,
1281 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")
1282 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]
1283 mask = _maybe_get_mask(values, skipna, mask)
1284 if not is_float_dtype(values.dtype):
1285 values = values.astype("f8")
1286 count = _get_counts(values.shape, mask, axis)
1287 else:
1288 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)
1290 if skipna and mask is not None:
1291 values = values.copy()
1292 np.putmask(values, mask, 0)
1294 mean = values.sum(axis, dtype=np.float64) / count
1295 if axis is not None:
1296 mean = np.expand_dims(mean, axis)
1298 adjusted = values - mean
1299 if skipna and mask is not None:
1300 np.putmask(adjusted, mask, 0)
1301 adjusted2 = adjusted**2
1302 adjusted4 = adjusted2**2
1303 m2 = adjusted2.sum(axis, dtype=np.float64)
1304 m4 = adjusted4.sum(axis, dtype=np.float64)
1306 with np.errstate(invalid="ignore", divide="ignore"):
1307 adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))
1308 numerator = count * (count + 1) * (count - 1) * m4
1309 denominator = (count - 2) * (count - 3) * m2**2
1311 # floating point error
1312 #
1313 # #18044 in _libs/windows.pyx calc_kurt follow this behavior
1314 # to fix the fperr to treat denom <1e-14 as zero
1315 numerator = _zero_out_fperr(numerator)
1316 denominator = _zero_out_fperr(denominator)
1318 if not isinstance(denominator, np.ndarray):
1319 # if ``denom`` is a scalar, check these corner cases first before
1320 # doing division
1321 if count < 4:
1322 return np.nan
1323 if denominator == 0:
1324 return 0
1326 with np.errstate(invalid="ignore", divide="ignore"):
1327 result = numerator / denominator - adj
1329 dtype = values.dtype
1330 if is_float_dtype(dtype):
1331 result = result.astype(dtype, copy=False)
1333 if isinstance(result, np.ndarray):
1334 result = np.where(denominator == 0, 0, result)
1335 result[count < 4] = np.nan
1337 return result
1340@disallow("M8", "m8")
1341@maybe_operate_rowwise
1342def nanprod(
1343 values: np.ndarray,
1344 *,
1345 axis: int | None = None,
1346 skipna: bool = True,
1347 min_count: int = 0,
1348 mask: npt.NDArray[np.bool_] | None = None,
1349) -> float:
1350 """
1351 Parameters
1352 ----------
1353 values : ndarray[dtype]
1354 axis : int, optional
1355 skipna : bool, default True
1356 min_count: int, default 0
1357 mask : ndarray[bool], optional
1358 nan-mask if known
1360 Returns
1361 -------
1362 Dtype
1363 The product of all elements on a given axis. ( NaNs are treated as 1)
1365 Examples
1366 --------
1367 >>> import pandas.core.nanops as nanops
1368 >>> s = pd.Series([1, 2, 3, np.nan])
1369 >>> nanops.nanprod(s)
1370 6.0
1371 """
1372 mask = _maybe_get_mask(values, skipna, mask)
1374 if skipna and mask is not None:
1375 values = values.copy()
1376 values[mask] = 1
1377 result = values.prod(axis)
1378 # error: Incompatible return value type (got "Union[ndarray, float]", expected
1379 # "float")
1380 return _maybe_null_out( # type: ignore[return-value]
1381 result, axis, mask, values.shape, min_count=min_count
1382 )
1385def _maybe_arg_null_out(
1386 result: np.ndarray,
1387 axis: int | None,
1388 mask: npt.NDArray[np.bool_] | None,
1389 skipna: bool,
1390) -> np.ndarray | int:
1391 # helper function for nanargmin/nanargmax
1392 if mask is None:
1393 return result
1395 if axis is None or not getattr(result, "ndim", False):
1396 if skipna:
1397 if mask.all():
1398 return -1
1399 else:
1400 if mask.any():
1401 return -1
1402 else:
1403 if skipna:
1404 na_mask = mask.all(axis)
1405 else:
1406 na_mask = mask.any(axis)
1407 if na_mask.any():
1408 result[na_mask] = -1
1409 return result
1412def _get_counts(
1413 values_shape: Shape,
1414 mask: npt.NDArray[np.bool_] | None,
1415 axis: int | None,
1416 dtype: np.dtype = np.dtype(np.float64),
1417) -> float | np.ndarray:
1418 """
1419 Get the count of non-null values along an axis
1421 Parameters
1422 ----------
1423 values_shape : tuple of int
1424 shape tuple from values ndarray, used if mask is None
1425 mask : Optional[ndarray[bool]]
1426 locations in values that should be considered missing
1427 axis : Optional[int]
1428 axis to count along
1429 dtype : type, optional
1430 type to use for count
1432 Returns
1433 -------
1434 count : scalar or array
1435 """
1436 if axis is None:
1437 if mask is not None:
1438 n = mask.size - mask.sum()
1439 else:
1440 n = np.prod(values_shape)
1441 return dtype.type(n)
1443 if mask is not None:
1444 count = mask.shape[axis] - mask.sum(axis)
1445 else:
1446 count = values_shape[axis]
1448 if is_scalar(count):
1449 return dtype.type(count)
1450 return count.astype(dtype, copy=False)
1453def _maybe_null_out(
1454 result: np.ndarray | float | NaTType,
1455 axis: int | None,
1456 mask: npt.NDArray[np.bool_] | None,
1457 shape: tuple[int, ...],
1458 min_count: int = 1,
1459) -> np.ndarray | float | NaTType:
1460 """
1461 Returns
1462 -------
1463 Dtype
1464 The product of all elements on a given axis. ( NaNs are treated as 1)
1465 """
1466 if axis is not None and isinstance(result, np.ndarray):
1467 if mask is not None:
1468 null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
1469 else:
1470 # we have no nulls, kept mask=None in _maybe_get_mask
1471 below_count = shape[axis] - min_count < 0
1472 new_shape = shape[:axis] + shape[axis + 1 :]
1473 null_mask = np.broadcast_to(below_count, new_shape)
1475 if np.any(null_mask):
1476 if is_numeric_dtype(result):
1477 if np.iscomplexobj(result):
1478 result = result.astype("c16")
1479 elif not is_float_dtype(result):
1480 result = result.astype("f8", copy=False)
1481 result[null_mask] = np.nan
1482 else:
1483 # GH12941, use None to auto cast null
1484 result[null_mask] = None
1485 elif result is not NaT:
1486 if check_below_min_count(shape, mask, min_count):
1487 result = np.nan
1489 return result
1492def check_below_min_count(
1493 shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int
1494) -> bool:
1495 """
1496 Check for the `min_count` keyword. Returns True if below `min_count` (when
1497 missing value should be returned from the reduction).
1499 Parameters
1500 ----------
1501 shape : tuple
1502 The shape of the values (`values.shape`).
1503 mask : ndarray[bool] or None
1504 Boolean numpy array (typically of same shape as `shape`) or None.
1505 min_count : int
1506 Keyword passed through from sum/prod call.
1508 Returns
1509 -------
1510 bool
1511 """
1512 if min_count > 0:
1513 if mask is None:
1514 # no missing values, only check size
1515 non_nulls = np.prod(shape)
1516 else:
1517 non_nulls = mask.size - mask.sum()
1518 if non_nulls < min_count:
1519 return True
1520 return False
1523def _zero_out_fperr(arg):
1524 # #18044 reference this behavior to fix rolling skew/kurt issue
1525 if isinstance(arg, np.ndarray):
1526 with np.errstate(invalid="ignore"):
1527 return np.where(np.abs(arg) < 1e-14, 0, arg)
1528 else:
1529 return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg
1532@disallow("M8", "m8")
1533def nancorr(
1534 a: np.ndarray, b: np.ndarray, *, method="pearson", min_periods: int | None = None
1535) -> float:
1536 """
1537 a, b: ndarrays
1538 """
1539 if len(a) != len(b):
1540 raise AssertionError("Operands to nancorr must have same size")
1542 if min_periods is None:
1543 min_periods = 1
1545 valid = notna(a) & notna(b)
1546 if not valid.all():
1547 a = a[valid]
1548 b = b[valid]
1550 if len(a) < min_periods:
1551 return np.nan
1553 f = get_corr_func(method)
1554 return f(a, b)
1557def get_corr_func(method) -> Callable[[np.ndarray, np.ndarray], float]:
1558 if method == "kendall":
1559 from scipy.stats import kendalltau
1561 def func(a, b):
1562 return kendalltau(a, b)[0]
1564 return func
1565 elif method == "spearman":
1566 from scipy.stats import spearmanr
1568 def func(a, b):
1569 return spearmanr(a, b)[0]
1571 return func
1572 elif method == "pearson":
1574 def func(a, b):
1575 return np.corrcoef(a, b)[0, 1]
1577 return func
1578 elif callable(method):
1579 return method
1581 raise ValueError(
1582 f"Unknown method '{method}', expected one of "
1583 "'kendall', 'spearman', 'pearson', or callable"
1584 )
1587@disallow("M8", "m8")
1588def nancov(
1589 a: np.ndarray,
1590 b: np.ndarray,
1591 *,
1592 min_periods: int | None = None,
1593 ddof: int | None = 1,
1594) -> float:
1595 if len(a) != len(b):
1596 raise AssertionError("Operands to nancov must have same size")
1598 if min_periods is None:
1599 min_periods = 1
1601 valid = notna(a) & notna(b)
1602 if not valid.all():
1603 a = a[valid]
1604 b = b[valid]
1606 if len(a) < min_periods:
1607 return np.nan
1609 return np.cov(a, b, ddof=ddof)[0, 1]
1612def _ensure_numeric(x):
1613 if isinstance(x, np.ndarray):
1614 if is_integer_dtype(x) or is_bool_dtype(x):
1615 x = x.astype(np.float64)
1616 elif is_object_dtype(x):
1617 try:
1618 x = x.astype(np.complex128)
1619 except (TypeError, ValueError):
1620 try:
1621 x = x.astype(np.float64)
1622 except ValueError as err:
1623 # GH#29941 we get here with object arrays containing strs
1624 raise TypeError(f"Could not convert {x} to numeric") from err
1625 else:
1626 if not np.any(np.imag(x)):
1627 x = x.real
1628 elif not (is_float(x) or is_integer(x) or is_complex(x)):
1629 try:
1630 x = float(x)
1631 except (TypeError, ValueError):
1632 # e.g. "1+1j" or "foo"
1633 try:
1634 x = complex(x)
1635 except ValueError as err:
1636 # e.g. "foo"
1637 raise TypeError(f"Could not convert {x} to numeric") from err
1638 return x
1641# NA-friendly array comparisons
1644def make_nancomp(op):
1645 def f(x, y):
1646 xmask = isna(x)
1647 ymask = isna(y)
1648 mask = xmask | ymask
1650 with np.errstate(all="ignore"):
1651 result = op(x, y)
1653 if mask.any():
1654 if is_bool_dtype(result):
1655 result = result.astype("O")
1656 np.putmask(result, mask, np.nan)
1658 return result
1660 return f
1663nangt = make_nancomp(operator.gt)
1664nange = make_nancomp(operator.ge)
1665nanlt = make_nancomp(operator.lt)
1666nanle = make_nancomp(operator.le)
1667naneq = make_nancomp(operator.eq)
1668nanne = make_nancomp(operator.ne)
1671def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
1672 """
1673 Cumulative function with skipna support.
1675 Parameters
1676 ----------
1677 values : np.ndarray or ExtensionArray
1678 accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate}
1679 skipna : bool
1681 Returns
1682 -------
1683 np.ndarray or ExtensionArray
1684 """
1685 mask_a, mask_b = {
1686 np.cumprod: (1.0, np.nan),
1687 np.maximum.accumulate: (-np.inf, np.nan),
1688 np.cumsum: (0.0, np.nan),
1689 np.minimum.accumulate: (np.inf, np.nan),
1690 }[accum_func]
1692 # We will be applying this function to block values
1693 if values.dtype.kind in ["m", "M"]:
1694 # GH#30460, GH#29058
1695 # numpy 1.18 started sorting NaTs at the end instead of beginning,
1696 # so we need to work around to maintain backwards-consistency.
1697 orig_dtype = values.dtype
1699 # We need to define mask before masking NaTs
1700 mask = isna(values)
1702 y = values.view("i8")
1703 # Note: the accum_func comparison fails as an "is" comparison
1704 changed = accum_func == np.minimum.accumulate
1706 try:
1707 if changed:
1708 y[mask] = lib.i8max
1710 result = accum_func(y, axis=0)
1711 finally:
1712 if changed:
1713 # restore NaT elements
1714 y[mask] = iNaT
1716 if skipna:
1717 result[mask] = iNaT
1718 elif accum_func == np.minimum.accumulate:
1719 # Restore NaTs that we masked previously
1720 nz = (~np.asarray(mask)).nonzero()[0]
1721 if len(nz):
1722 # everything up to the first non-na entry stays NaT
1723 result[: nz[0]] = iNaT
1725 if isinstance(values.dtype, np.dtype):
1726 result = result.view(orig_dtype)
1727 else:
1728 # DatetimeArray/TimedeltaArray
1729 # TODO: have this case go through a DTA method?
1730 # For DatetimeTZDtype, view result as M8[ns]
1731 npdtype = orig_dtype if isinstance(orig_dtype, np.dtype) else "M8[ns]"
1732 # Item "type" of "Union[Type[ExtensionArray], Type[ndarray[Any, Any]]]"
1733 # has no attribute "_simple_new"
1734 result = type(values)._simple_new( # type: ignore[union-attr]
1735 result.view(npdtype), dtype=orig_dtype
1736 )
1738 elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
1739 vals = values.copy()
1740 mask = isna(vals)
1741 vals[mask] = mask_a
1742 result = accum_func(vals, axis=0)
1743 result[mask] = mask_b
1744 else:
1745 result = accum_func(values, axis=0)
1747 return result