Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/array_algos/quantile.py: 14%
46 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3import numpy as np
5from pandas._typing import (
6 ArrayLike,
7 Scalar,
8 npt,
9)
10from pandas.compat.numpy import np_percentile_argname
12from pandas.core.dtypes.missing import (
13 isna,
14 na_value_for_dtype,
15)
18def quantile_compat(
19 values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str
20) -> ArrayLike:
21 """
22 Compute the quantiles of the given values for each quantile in `qs`.
24 Parameters
25 ----------
26 values : np.ndarray or ExtensionArray
27 qs : np.ndarray[float64]
28 interpolation : str
30 Returns
31 -------
32 np.ndarray or ExtensionArray
33 """
34 if isinstance(values, np.ndarray):
35 fill_value = na_value_for_dtype(values.dtype, compat=False)
36 mask = isna(values)
37 return quantile_with_mask(values, mask, fill_value, qs, interpolation)
38 else:
39 return values._quantile(qs, interpolation)
42def quantile_with_mask(
43 values: np.ndarray,
44 mask: npt.NDArray[np.bool_],
45 fill_value,
46 qs: npt.NDArray[np.float64],
47 interpolation: str,
48) -> np.ndarray:
49 """
50 Compute the quantiles of the given values for each quantile in `qs`.
52 Parameters
53 ----------
54 values : np.ndarray
55 For ExtensionArray, this is _values_for_factorize()[0]
56 mask : np.ndarray[bool]
57 mask = isna(values)
58 For ExtensionArray, this is computed before calling _value_for_factorize
59 fill_value : Scalar
60 The value to interpret fill NA entries with
61 For ExtensionArray, this is _values_for_factorize()[1]
62 qs : np.ndarray[float64]
63 interpolation : str
64 Type of interpolation
66 Returns
67 -------
68 np.ndarray
70 Notes
71 -----
72 Assumes values is already 2D. For ExtensionArray this means np.atleast_2d
73 has been called on _values_for_factorize()[0]
75 Quantile is computed along axis=1.
76 """
77 assert values.shape == mask.shape
78 if values.ndim == 1:
79 # unsqueeze, operate, re-squeeze
80 values = np.atleast_2d(values)
81 mask = np.atleast_2d(mask)
82 res_values = quantile_with_mask(values, mask, fill_value, qs, interpolation)
83 return res_values[0]
85 assert values.ndim == 2
87 is_empty = values.shape[1] == 0
89 if is_empty:
90 # create the array of na_values
91 # 2d len(values) * len(qs)
92 flat = np.array([fill_value] * len(qs))
93 result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
94 else:
95 result = _nanpercentile(
96 values,
97 qs * 100.0,
98 na_value=fill_value,
99 mask=mask,
100 interpolation=interpolation,
101 )
103 result = np.array(result, copy=False)
104 result = result.T
106 return result
109def _nanpercentile_1d(
110 values: np.ndarray,
111 mask: npt.NDArray[np.bool_],
112 qs: npt.NDArray[np.float64],
113 na_value: Scalar,
114 interpolation: str,
115) -> Scalar | np.ndarray:
116 """
117 Wrapper for np.percentile that skips missing values, specialized to
118 1-dimensional case.
120 Parameters
121 ----------
122 values : array over which to find quantiles
123 mask : ndarray[bool]
124 locations in values that should be considered missing
125 qs : np.ndarray[float64] of quantile indices to find
126 na_value : scalar
127 value to return for empty or all-null values
128 interpolation : str
130 Returns
131 -------
132 quantiles : scalar or array
133 """
134 # mask is Union[ExtensionArray, ndarray]
135 values = values[~mask]
137 if len(values) == 0:
138 # Can't pass dtype=values.dtype here bc we might have na_value=np.nan
139 # with values.dtype=int64 see test_quantile_empty
140 # equiv: 'np.array([na_value] * len(qs))' but much faster
141 return np.full(len(qs), na_value)
143 return np.percentile(
144 values,
145 qs,
146 # error: No overload variant of "percentile" matches argument
147 # types "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]"
148 # , "Dict[str, str]" [call-overload]
149 **{np_percentile_argname: interpolation}, # type: ignore[call-overload]
150 )
153def _nanpercentile(
154 values: np.ndarray,
155 qs: npt.NDArray[np.float64],
156 *,
157 na_value,
158 mask: npt.NDArray[np.bool_],
159 interpolation: str,
160):
161 """
162 Wrapper for np.percentile that skips missing values.
164 Parameters
165 ----------
166 values : np.ndarray[ndim=2] over which to find quantiles
167 qs : np.ndarray[float64] of quantile indices to find
168 na_value : scalar
169 value to return for empty or all-null values
170 mask : np.ndarray[bool]
171 locations in values that should be considered missing
172 interpolation : str
174 Returns
175 -------
176 quantiles : scalar or array
177 """
179 if values.dtype.kind in ["m", "M"]:
180 # need to cast to integer to avoid rounding errors in numpy
181 result = _nanpercentile(
182 values.view("i8"),
183 qs=qs,
184 na_value=na_value.view("i8"),
185 mask=mask,
186 interpolation=interpolation,
187 )
189 # Note: we have to do `astype` and not view because in general we
190 # have float result at this point, not i8
191 return result.astype(values.dtype)
193 if mask.any():
194 # Caller is responsible for ensuring mask shape match
195 assert mask.shape == values.shape
196 result = [
197 _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
198 for (val, m) in zip(list(values), list(mask))
199 ]
200 if values.dtype.kind == "f":
201 # preserve itemsize
202 result = np.array(result, dtype=values.dtype, copy=False).T
203 else:
204 result = np.array(result, copy=False).T
205 if (
206 result.dtype != values.dtype
207 and (result == result.astype(values.dtype, copy=False)).all()
208 ):
209 # e.g. values id integer dtype and result is floating dtype,
210 # only cast back to integer dtype if result values are all-integer.
211 result = result.astype(values.dtype, copy=False)
212 return result
213 else:
214 return np.percentile(
215 values,
216 qs,
217 axis=1,
218 # error: No overload variant of "percentile" matches argument types
219 # "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]",
220 # "int", "Dict[str, str]" [call-overload]
221 **{np_percentile_argname: interpolation}, # type: ignore[call-overload]
222 )