Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/array_algos/quantile.py: 14%

46 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import numpy as np 

4 

5from pandas._typing import ( 

6 ArrayLike, 

7 Scalar, 

8 npt, 

9) 

10from pandas.compat.numpy import np_percentile_argname 

11 

12from pandas.core.dtypes.missing import ( 

13 isna, 

14 na_value_for_dtype, 

15) 

16 

17 

18def quantile_compat( 

19 values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str 

20) -> ArrayLike: 

21 """ 

22 Compute the quantiles of the given values for each quantile in `qs`. 

23 

24 Parameters 

25 ---------- 

26 values : np.ndarray or ExtensionArray 

27 qs : np.ndarray[float64] 

28 interpolation : str 

29 

30 Returns 

31 ------- 

32 np.ndarray or ExtensionArray 

33 """ 

34 if isinstance(values, np.ndarray): 

35 fill_value = na_value_for_dtype(values.dtype, compat=False) 

36 mask = isna(values) 

37 return quantile_with_mask(values, mask, fill_value, qs, interpolation) 

38 else: 

39 return values._quantile(qs, interpolation) 

40 

41 

42def quantile_with_mask( 

43 values: np.ndarray, 

44 mask: npt.NDArray[np.bool_], 

45 fill_value, 

46 qs: npt.NDArray[np.float64], 

47 interpolation: str, 

48) -> np.ndarray: 

49 """ 

50 Compute the quantiles of the given values for each quantile in `qs`. 

51 

52 Parameters 

53 ---------- 

54 values : np.ndarray 

55 For ExtensionArray, this is _values_for_factorize()[0] 

56 mask : np.ndarray[bool] 

57 mask = isna(values) 

58 For ExtensionArray, this is computed before calling _value_for_factorize 

59 fill_value : Scalar 

60 The value to interpret fill NA entries with 

61 For ExtensionArray, this is _values_for_factorize()[1] 

62 qs : np.ndarray[float64] 

63 interpolation : str 

64 Type of interpolation 

65 

66 Returns 

67 ------- 

68 np.ndarray 

69 

70 Notes 

71 ----- 

72 Assumes values is already 2D. For ExtensionArray this means np.atleast_2d 

73 has been called on _values_for_factorize()[0] 

74 

75 Quantile is computed along axis=1. 

76 """ 

77 assert values.shape == mask.shape 

78 if values.ndim == 1: 

79 # unsqueeze, operate, re-squeeze 

80 values = np.atleast_2d(values) 

81 mask = np.atleast_2d(mask) 

82 res_values = quantile_with_mask(values, mask, fill_value, qs, interpolation) 

83 return res_values[0] 

84 

85 assert values.ndim == 2 

86 

87 is_empty = values.shape[1] == 0 

88 

89 if is_empty: 

90 # create the array of na_values 

91 # 2d len(values) * len(qs) 

92 flat = np.array([fill_value] * len(qs)) 

93 result = np.repeat(flat, len(values)).reshape(len(values), len(qs)) 

94 else: 

95 result = _nanpercentile( 

96 values, 

97 qs * 100.0, 

98 na_value=fill_value, 

99 mask=mask, 

100 interpolation=interpolation, 

101 ) 

102 

103 result = np.array(result, copy=False) 

104 result = result.T 

105 

106 return result 

107 

108 

109def _nanpercentile_1d( 

110 values: np.ndarray, 

111 mask: npt.NDArray[np.bool_], 

112 qs: npt.NDArray[np.float64], 

113 na_value: Scalar, 

114 interpolation: str, 

115) -> Scalar | np.ndarray: 

116 """ 

117 Wrapper for np.percentile that skips missing values, specialized to 

118 1-dimensional case. 

119 

120 Parameters 

121 ---------- 

122 values : array over which to find quantiles 

123 mask : ndarray[bool] 

124 locations in values that should be considered missing 

125 qs : np.ndarray[float64] of quantile indices to find 

126 na_value : scalar 

127 value to return for empty or all-null values 

128 interpolation : str 

129 

130 Returns 

131 ------- 

132 quantiles : scalar or array 

133 """ 

134 # mask is Union[ExtensionArray, ndarray] 

135 values = values[~mask] 

136 

137 if len(values) == 0: 

138 # Can't pass dtype=values.dtype here bc we might have na_value=np.nan 

139 # with values.dtype=int64 see test_quantile_empty 

140 # equiv: 'np.array([na_value] * len(qs))' but much faster 

141 return np.full(len(qs), na_value) 

142 

143 return np.percentile( 

144 values, 

145 qs, 

146 # error: No overload variant of "percentile" matches argument 

147 # types "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]" 

148 # , "Dict[str, str]" [call-overload] 

149 **{np_percentile_argname: interpolation}, # type: ignore[call-overload] 

150 ) 

151 

152 

153def _nanpercentile( 

154 values: np.ndarray, 

155 qs: npt.NDArray[np.float64], 

156 *, 

157 na_value, 

158 mask: npt.NDArray[np.bool_], 

159 interpolation: str, 

160): 

161 """ 

162 Wrapper for np.percentile that skips missing values. 

163 

164 Parameters 

165 ---------- 

166 values : np.ndarray[ndim=2] over which to find quantiles 

167 qs : np.ndarray[float64] of quantile indices to find 

168 na_value : scalar 

169 value to return for empty or all-null values 

170 mask : np.ndarray[bool] 

171 locations in values that should be considered missing 

172 interpolation : str 

173 

174 Returns 

175 ------- 

176 quantiles : scalar or array 

177 """ 

178 

179 if values.dtype.kind in ["m", "M"]: 

180 # need to cast to integer to avoid rounding errors in numpy 

181 result = _nanpercentile( 

182 values.view("i8"), 

183 qs=qs, 

184 na_value=na_value.view("i8"), 

185 mask=mask, 

186 interpolation=interpolation, 

187 ) 

188 

189 # Note: we have to do `astype` and not view because in general we 

190 # have float result at this point, not i8 

191 return result.astype(values.dtype) 

192 

193 if mask.any(): 

194 # Caller is responsible for ensuring mask shape match 

195 assert mask.shape == values.shape 

196 result = [ 

197 _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation) 

198 for (val, m) in zip(list(values), list(mask)) 

199 ] 

200 if values.dtype.kind == "f": 

201 # preserve itemsize 

202 result = np.array(result, dtype=values.dtype, copy=False).T 

203 else: 

204 result = np.array(result, copy=False).T 

205 if ( 

206 result.dtype != values.dtype 

207 and (result == result.astype(values.dtype, copy=False)).all() 

208 ): 

209 # e.g. values id integer dtype and result is floating dtype, 

210 # only cast back to integer dtype if result values are all-integer. 

211 result = result.astype(values.dtype, copy=False) 

212 return result 

213 else: 

214 return np.percentile( 

215 values, 

216 qs, 

217 axis=1, 

218 # error: No overload variant of "percentile" matches argument types 

219 # "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]", 

220 # "int", "Dict[str, str]" [call-overload] 

221 **{np_percentile_argname: interpolation}, # type: ignore[call-overload] 

222 )