Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/numpy_.py: 23%

172 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import numpy as np 

4 

5from pandas._libs import lib 

6from pandas._typing import ( 

7 Dtype, 

8 NpDtype, 

9 Scalar, 

10 npt, 

11) 

12from pandas.compat.numpy import function as nv 

13 

14from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike 

15from pandas.core.dtypes.dtypes import PandasDtype 

16from pandas.core.dtypes.missing import isna 

17 

18from pandas.core import ( 

19 arraylike, 

20 nanops, 

21 ops, 

22) 

23from pandas.core.arraylike import OpsMixin 

24from pandas.core.arrays._mixins import NDArrayBackedExtensionArray 

25from pandas.core.construction import ensure_wrapped_if_datetimelike 

26from pandas.core.strings.object_array import ObjectStringArrayMixin 

27 

28 

29class PandasArray( 

30 OpsMixin, 

31 NDArrayBackedExtensionArray, 

32 ObjectStringArrayMixin, 

33): 

34 """ 

35 A pandas ExtensionArray for NumPy data. 

36 

37 This is mostly for internal compatibility, and is not especially 

38 useful on its own. 

39 

40 Parameters 

41 ---------- 

42 values : ndarray 

43 The NumPy ndarray to wrap. Must be 1-dimensional. 

44 copy : bool, default False 

45 Whether to copy `values`. 

46 

47 Attributes 

48 ---------- 

49 None 

50 

51 Methods 

52 ------- 

53 None 

54 """ 

55 

56 # If you're wondering why pd.Series(cls) doesn't put the array in an 

57 # ExtensionBlock, search for `ABCPandasArray`. We check for 

58 # that _typ to ensure that users don't unnecessarily use EAs inside 

59 # pandas internals, which turns off things like block consolidation. 

60 _typ = "npy_extension" 

61 __array_priority__ = 1000 

62 _ndarray: np.ndarray 

63 _dtype: PandasDtype 

64 _internal_fill_value = np.nan 

65 

66 # ------------------------------------------------------------------------ 

67 # Constructors 

68 

69 def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None: 

70 if isinstance(values, type(self)): 

71 values = values._ndarray 

72 if not isinstance(values, np.ndarray): 

73 raise ValueError( 

74 f"'values' must be a NumPy array, not {type(values).__name__}" 

75 ) 

76 

77 if values.ndim == 0: 

78 # Technically we support 2, but do not advertise that fact. 

79 raise ValueError("PandasArray must be 1-dimensional.") 

80 

81 if copy: 

82 values = values.copy() 

83 

84 dtype = PandasDtype(values.dtype) 

85 super().__init__(values, dtype) 

86 

87 @classmethod 

88 def _from_sequence( 

89 cls, scalars, *, dtype: Dtype | None = None, copy: bool = False 

90 ) -> PandasArray: 

91 if isinstance(dtype, PandasDtype): 

92 dtype = dtype._dtype 

93 

94 # error: Argument "dtype" to "asarray" has incompatible type 

95 # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object], 

96 # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, 

97 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], 

98 # _DTypeDict, Tuple[Any, Any]]]" 

99 result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type] 

100 if ( 

101 result.ndim > 1 

102 and not hasattr(scalars, "dtype") 

103 and (dtype is None or dtype == object) 

104 ): 

105 # e.g. list-of-tuples 

106 result = construct_1d_object_array_from_listlike(scalars) 

107 

108 if copy and result is scalars: 

109 result = result.copy() 

110 return cls(result) 

111 

112 def _from_backing_data(self, arr: np.ndarray) -> PandasArray: 

113 return type(self)(arr) 

114 

115 # ------------------------------------------------------------------------ 

116 # Data 

117 

118 @property 

119 def dtype(self) -> PandasDtype: 

120 return self._dtype 

121 

122 # ------------------------------------------------------------------------ 

123 # NumPy Array Interface 

124 

125 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: 

126 return np.asarray(self._ndarray, dtype=dtype) 

127 

128 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

129 # Lightly modified version of 

130 # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html 

131 # The primary modification is not boxing scalar return values 

132 # in PandasArray, since pandas' ExtensionArrays are 1-d. 

133 out = kwargs.get("out", ()) 

134 

135 result = ops.maybe_dispatch_ufunc_to_dunder_op( 

136 self, ufunc, method, *inputs, **kwargs 

137 ) 

138 if result is not NotImplemented: 

139 return result 

140 

141 if "out" in kwargs: 

142 # e.g. test_ufunc_unary 

143 return arraylike.dispatch_ufunc_with_out( 

144 self, ufunc, method, *inputs, **kwargs 

145 ) 

146 

147 if method == "reduce": 

148 result = arraylike.dispatch_reduction_ufunc( 

149 self, ufunc, method, *inputs, **kwargs 

150 ) 

151 if result is not NotImplemented: 

152 # e.g. tests.series.test_ufunc.TestNumpyReductions 

153 return result 

154 

155 # Defer to the implementation of the ufunc on unwrapped values. 

156 inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs) 

157 if out: 

158 kwargs["out"] = tuple( 

159 x._ndarray if isinstance(x, PandasArray) else x for x in out 

160 ) 

161 result = getattr(ufunc, method)(*inputs, **kwargs) 

162 

163 if ufunc.nout > 1: 

164 # multiple return values; re-box array-like results 

165 return tuple(type(self)(x) for x in result) 

166 elif method == "at": 

167 # no return value 

168 return None 

169 elif method == "reduce": 

170 if isinstance(result, np.ndarray): 

171 # e.g. test_np_reduce_2d 

172 return type(self)(result) 

173 

174 # e.g. test_np_max_nested_tuples 

175 return result 

176 else: 

177 # one return value; re-box array-like results 

178 return type(self)(result) 

179 

180 # ------------------------------------------------------------------------ 

181 # Pandas ExtensionArray Interface 

182 

183 def isna(self) -> np.ndarray: 

184 return isna(self._ndarray) 

185 

186 def _validate_scalar(self, fill_value): 

187 if fill_value is None: 

188 # Primarily for subclasses 

189 fill_value = self.dtype.na_value 

190 return fill_value 

191 

192 def _values_for_factorize(self) -> tuple[np.ndarray, float | None]: 

193 if self.dtype.kind in ["i", "u", "b"]: 

194 fv = None 

195 else: 

196 fv = np.nan 

197 return self._ndarray, fv 

198 

199 # ------------------------------------------------------------------------ 

200 # Reductions 

201 

202 def any( 

203 self, 

204 *, 

205 axis: int | None = None, 

206 out=None, 

207 keepdims: bool = False, 

208 skipna: bool = True, 

209 ): 

210 nv.validate_any((), {"out": out, "keepdims": keepdims}) 

211 result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) 

212 return self._wrap_reduction_result(axis, result) 

213 

214 def all( 

215 self, 

216 *, 

217 axis: int | None = None, 

218 out=None, 

219 keepdims: bool = False, 

220 skipna: bool = True, 

221 ): 

222 nv.validate_all((), {"out": out, "keepdims": keepdims}) 

223 result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) 

224 return self._wrap_reduction_result(axis, result) 

225 

226 def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar: 

227 nv.validate_min((), kwargs) 

228 result = nanops.nanmin( 

229 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna 

230 ) 

231 return self._wrap_reduction_result(axis, result) 

232 

233 def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar: 

234 nv.validate_max((), kwargs) 

235 result = nanops.nanmax( 

236 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna 

237 ) 

238 return self._wrap_reduction_result(axis, result) 

239 

240 def sum( 

241 self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs 

242 ) -> Scalar: 

243 nv.validate_sum((), kwargs) 

244 result = nanops.nansum( 

245 self._ndarray, axis=axis, skipna=skipna, min_count=min_count 

246 ) 

247 return self._wrap_reduction_result(axis, result) 

248 

249 def prod( 

250 self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs 

251 ) -> Scalar: 

252 nv.validate_prod((), kwargs) 

253 result = nanops.nanprod( 

254 self._ndarray, axis=axis, skipna=skipna, min_count=min_count 

255 ) 

256 return self._wrap_reduction_result(axis, result) 

257 

258 def mean( 

259 self, 

260 *, 

261 axis: int | None = None, 

262 dtype: NpDtype | None = None, 

263 out=None, 

264 keepdims: bool = False, 

265 skipna: bool = True, 

266 ): 

267 nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims}) 

268 result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) 

269 return self._wrap_reduction_result(axis, result) 

270 

271 def median( 

272 self, 

273 *, 

274 axis: int | None = None, 

275 out=None, 

276 overwrite_input: bool = False, 

277 keepdims: bool = False, 

278 skipna: bool = True, 

279 ): 

280 nv.validate_median( 

281 (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims} 

282 ) 

283 result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) 

284 return self._wrap_reduction_result(axis, result) 

285 

286 def std( 

287 self, 

288 *, 

289 axis: int | None = None, 

290 dtype: NpDtype | None = None, 

291 out=None, 

292 ddof=1, 

293 keepdims: bool = False, 

294 skipna: bool = True, 

295 ): 

296 nv.validate_stat_ddof_func( 

297 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" 

298 ) 

299 result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

300 return self._wrap_reduction_result(axis, result) 

301 

302 def var( 

303 self, 

304 *, 

305 axis: int | None = None, 

306 dtype: NpDtype | None = None, 

307 out=None, 

308 ddof=1, 

309 keepdims: bool = False, 

310 skipna: bool = True, 

311 ): 

312 nv.validate_stat_ddof_func( 

313 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var" 

314 ) 

315 result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

316 return self._wrap_reduction_result(axis, result) 

317 

318 def sem( 

319 self, 

320 *, 

321 axis: int | None = None, 

322 dtype: NpDtype | None = None, 

323 out=None, 

324 ddof=1, 

325 keepdims: bool = False, 

326 skipna: bool = True, 

327 ): 

328 nv.validate_stat_ddof_func( 

329 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem" 

330 ) 

331 result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

332 return self._wrap_reduction_result(axis, result) 

333 

334 def kurt( 

335 self, 

336 *, 

337 axis: int | None = None, 

338 dtype: NpDtype | None = None, 

339 out=None, 

340 keepdims: bool = False, 

341 skipna: bool = True, 

342 ): 

343 nv.validate_stat_ddof_func( 

344 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt" 

345 ) 

346 result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) 

347 return self._wrap_reduction_result(axis, result) 

348 

349 def skew( 

350 self, 

351 *, 

352 axis: int | None = None, 

353 dtype: NpDtype | None = None, 

354 out=None, 

355 keepdims: bool = False, 

356 skipna: bool = True, 

357 ): 

358 nv.validate_stat_ddof_func( 

359 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew" 

360 ) 

361 result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) 

362 return self._wrap_reduction_result(axis, result) 

363 

364 # ------------------------------------------------------------------------ 

365 # Additional Methods 

366 

367 def to_numpy( 

368 self, 

369 dtype: npt.DTypeLike | None = None, 

370 copy: bool = False, 

371 na_value: object = lib.no_default, 

372 ) -> np.ndarray: 

373 result = np.asarray(self._ndarray, dtype=dtype) 

374 

375 if (copy or na_value is not lib.no_default) and result is self._ndarray: 

376 result = result.copy() 

377 

378 if na_value is not lib.no_default: 

379 result[self.isna()] = na_value 

380 

381 return result 

382 

383 # ------------------------------------------------------------------------ 

384 # Ops 

385 

386 def __invert__(self) -> PandasArray: 

387 return type(self)(~self._ndarray) 

388 

389 def __neg__(self) -> PandasArray: 

390 return type(self)(-self._ndarray) 

391 

392 def __pos__(self) -> PandasArray: 

393 return type(self)(+self._ndarray) 

394 

395 def __abs__(self) -> PandasArray: 

396 return type(self)(abs(self._ndarray)) 

397 

398 def _cmp_method(self, other, op): 

399 if isinstance(other, PandasArray): 

400 other = other._ndarray 

401 

402 other = ops.maybe_prepare_scalar_for_op(other, (len(self),)) 

403 pd_op = ops.get_array_op(op) 

404 other = ensure_wrapped_if_datetimelike(other) 

405 with np.errstate(all="ignore"): 

406 result = pd_op(self._ndarray, other) 

407 

408 if op is divmod or op is ops.rdivmod: 

409 a, b = result 

410 if isinstance(a, np.ndarray): 

411 # for e.g. op vs TimedeltaArray, we may already 

412 # have an ExtensionArray, in which case we do not wrap 

413 return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b) 

414 return a, b 

415 

416 if isinstance(result, np.ndarray): 

417 # for e.g. multiplication vs TimedeltaArray, we may already 

418 # have an ExtensionArray, in which case we do not wrap 

419 return self._wrap_ndarray_result(result) 

420 return result 

421 

422 _arith_method = _cmp_method 

423 

424 def _wrap_ndarray_result(self, result: np.ndarray): 

425 # If we have timedelta64[ns] result, return a TimedeltaArray instead 

426 # of a PandasArray 

427 if result.dtype == "timedelta64[ns]": 

428 from pandas.core.arrays import TimedeltaArray 

429 

430 return TimedeltaArray._simple_new(result) 

431 return type(self)(result) 

432 

433 # ------------------------------------------------------------------------ 

434 # String methods interface 

435 _str_na_value = np.nan