Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arraylike.py: 23%

251 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Methods that can be shared by many array-like classes or subclasses: 

3 Series 

4 Index 

5 ExtensionArray 

6""" 

7from __future__ import annotations 

8 

9import operator 

10from typing import Any 

11import warnings 

12 

13import numpy as np 

14 

15from pandas._libs import lib 

16from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op 

17from pandas.util._exceptions import find_stack_level 

18 

19from pandas.core.dtypes.generic import ABCNDFrame 

20 

21from pandas.core import roperator 

22from pandas.core.construction import extract_array 

23from pandas.core.ops.common import unpack_zerodim_and_defer 

24 

25REDUCTION_ALIASES = { 

26 "maximum": "max", 

27 "minimum": "min", 

28 "add": "sum", 

29 "multiply": "prod", 

30} 

31 

32 

33class OpsMixin: 

34 # ------------------------------------------------------------- 

35 # Comparisons 

36 

37 def _cmp_method(self, other, op): 

38 return NotImplemented 

39 

40 @unpack_zerodim_and_defer("__eq__") 

41 def __eq__(self, other): 

42 return self._cmp_method(other, operator.eq) 

43 

44 @unpack_zerodim_and_defer("__ne__") 

45 def __ne__(self, other): 

46 return self._cmp_method(other, operator.ne) 

47 

48 @unpack_zerodim_and_defer("__lt__") 

49 def __lt__(self, other): 

50 return self._cmp_method(other, operator.lt) 

51 

52 @unpack_zerodim_and_defer("__le__") 

53 def __le__(self, other): 

54 return self._cmp_method(other, operator.le) 

55 

56 @unpack_zerodim_and_defer("__gt__") 

57 def __gt__(self, other): 

58 return self._cmp_method(other, operator.gt) 

59 

60 @unpack_zerodim_and_defer("__ge__") 

61 def __ge__(self, other): 

62 return self._cmp_method(other, operator.ge) 

63 

64 # ------------------------------------------------------------- 

65 # Logical Methods 

66 

67 def _logical_method(self, other, op): 

68 return NotImplemented 

69 

70 @unpack_zerodim_and_defer("__and__") 

71 def __and__(self, other): 

72 return self._logical_method(other, operator.and_) 

73 

74 @unpack_zerodim_and_defer("__rand__") 

75 def __rand__(self, other): 

76 return self._logical_method(other, roperator.rand_) 

77 

78 @unpack_zerodim_and_defer("__or__") 

79 def __or__(self, other): 

80 return self._logical_method(other, operator.or_) 

81 

82 @unpack_zerodim_and_defer("__ror__") 

83 def __ror__(self, other): 

84 return self._logical_method(other, roperator.ror_) 

85 

86 @unpack_zerodim_and_defer("__xor__") 

87 def __xor__(self, other): 

88 return self._logical_method(other, operator.xor) 

89 

90 @unpack_zerodim_and_defer("__rxor__") 

91 def __rxor__(self, other): 

92 return self._logical_method(other, roperator.rxor) 

93 

94 # ------------------------------------------------------------- 

95 # Arithmetic Methods 

96 

97 def _arith_method(self, other, op): 

98 return NotImplemented 

99 

100 @unpack_zerodim_and_defer("__add__") 

101 def __add__(self, other): 

102 return self._arith_method(other, operator.add) 

103 

104 @unpack_zerodim_and_defer("__radd__") 

105 def __radd__(self, other): 

106 return self._arith_method(other, roperator.radd) 

107 

108 @unpack_zerodim_and_defer("__sub__") 

109 def __sub__(self, other): 

110 return self._arith_method(other, operator.sub) 

111 

112 @unpack_zerodim_and_defer("__rsub__") 

113 def __rsub__(self, other): 

114 return self._arith_method(other, roperator.rsub) 

115 

116 @unpack_zerodim_and_defer("__mul__") 

117 def __mul__(self, other): 

118 return self._arith_method(other, operator.mul) 

119 

120 @unpack_zerodim_and_defer("__rmul__") 

121 def __rmul__(self, other): 

122 return self._arith_method(other, roperator.rmul) 

123 

124 @unpack_zerodim_and_defer("__truediv__") 

125 def __truediv__(self, other): 

126 return self._arith_method(other, operator.truediv) 

127 

128 @unpack_zerodim_and_defer("__rtruediv__") 

129 def __rtruediv__(self, other): 

130 return self._arith_method(other, roperator.rtruediv) 

131 

132 @unpack_zerodim_and_defer("__floordiv__") 

133 def __floordiv__(self, other): 

134 return self._arith_method(other, operator.floordiv) 

135 

136 @unpack_zerodim_and_defer("__rfloordiv") 

137 def __rfloordiv__(self, other): 

138 return self._arith_method(other, roperator.rfloordiv) 

139 

140 @unpack_zerodim_and_defer("__mod__") 

141 def __mod__(self, other): 

142 return self._arith_method(other, operator.mod) 

143 

144 @unpack_zerodim_and_defer("__rmod__") 

145 def __rmod__(self, other): 

146 return self._arith_method(other, roperator.rmod) 

147 

148 @unpack_zerodim_and_defer("__divmod__") 

149 def __divmod__(self, other): 

150 return self._arith_method(other, divmod) 

151 

152 @unpack_zerodim_and_defer("__rdivmod__") 

153 def __rdivmod__(self, other): 

154 return self._arith_method(other, roperator.rdivmod) 

155 

156 @unpack_zerodim_and_defer("__pow__") 

157 def __pow__(self, other): 

158 return self._arith_method(other, operator.pow) 

159 

160 @unpack_zerodim_and_defer("__rpow__") 

161 def __rpow__(self, other): 

162 return self._arith_method(other, roperator.rpow) 

163 

164 

165# ----------------------------------------------------------------------------- 

166# Helpers to implement __array_ufunc__ 

167 

168 

169def _is_aligned(frame, other): 

170 """ 

171 Helper to check if a DataFrame is aligned with another DataFrame or Series. 

172 """ 

173 from pandas import DataFrame 

174 

175 if isinstance(other, DataFrame): 

176 return frame._indexed_same(other) 

177 else: 

178 # Series -> match index 

179 return frame.columns.equals(other.index) 

180 

181 

182def _maybe_fallback(ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): 

183 """ 

184 In the future DataFrame, inputs to ufuncs will be aligned before applying 

185 the ufunc, but for now we ignore the index but raise a warning if behaviour 

186 would change in the future. 

187 This helper detects the case where a warning is needed and then fallbacks 

188 to applying the ufunc on arrays to avoid alignment. 

189 

190 See https://github.com/pandas-dev/pandas/pull/39239 

191 """ 

192 from pandas import DataFrame 

193 from pandas.core.generic import NDFrame 

194 

195 n_alignable = sum(isinstance(x, NDFrame) for x in inputs) 

196 n_frames = sum(isinstance(x, DataFrame) for x in inputs) 

197 

198 if n_alignable >= 2 and n_frames >= 1: 

199 # if there are 2 alignable inputs (Series or DataFrame), of which at least 1 

200 # is a DataFrame -> we would have had no alignment before -> warn that this 

201 # will align in the future 

202 

203 # the first frame is what determines the output index/columns in pandas < 1.2 

204 first_frame = next(x for x in inputs if isinstance(x, DataFrame)) 

205 

206 # check if the objects are aligned or not 

207 non_aligned = sum( 

208 not _is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame) 

209 ) 

210 

211 # if at least one is not aligned -> warn and fallback to array behaviour 

212 if non_aligned: 

213 warnings.warn( 

214 "Calling a ufunc on non-aligned DataFrames (or DataFrame/Series " 

215 "combination). Currently, the indices are ignored and the result " 

216 "takes the index/columns of the first DataFrame. In the future , " 

217 "the DataFrames/Series will be aligned before applying the ufunc.\n" 

218 "Convert one of the arguments to a NumPy array " 

219 "(eg 'ufunc(df1, np.asarray(df2)') to keep the current behaviour, " 

220 "or align manually (eg 'df1, df2 = df1.align(df2)') before passing to " 

221 "the ufunc to obtain the future behaviour and silence this warning.", 

222 FutureWarning, 

223 stacklevel=find_stack_level(), 

224 ) 

225 

226 # keep the first dataframe of the inputs, other DataFrame/Series is 

227 # converted to array for fallback behaviour 

228 new_inputs = [] 

229 for x in inputs: 

230 if x is first_frame: 

231 new_inputs.append(x) 

232 elif isinstance(x, NDFrame): 

233 new_inputs.append(np.asarray(x)) 

234 else: 

235 new_inputs.append(x) 

236 

237 # call the ufunc on those transformed inputs 

238 return getattr(ufunc, method)(*new_inputs, **kwargs) 

239 

240 # signal that we didn't fallback / execute the ufunc yet 

241 return NotImplemented 

242 

243 

244def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): 

245 """ 

246 Compatibility with numpy ufuncs. 

247 

248 See also 

249 -------- 

250 numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__ 

251 """ 

252 from pandas.core.generic import NDFrame 

253 from pandas.core.internals import BlockManager 

254 

255 cls = type(self) 

256 

257 kwargs = _standardize_out_kwarg(**kwargs) 

258 

259 # for backwards compatibility check and potentially fallback for non-aligned frames 

260 result = _maybe_fallback(ufunc, method, *inputs, **kwargs) 

261 if result is not NotImplemented: 

262 return result 

263 

264 # for binary ops, use our custom dunder methods 

265 result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs) 

266 if result is not NotImplemented: 

267 return result 

268 

269 # Determine if we should defer. 

270 no_defer = ( 

271 np.ndarray.__array_ufunc__, 

272 cls.__array_ufunc__, 

273 ) 

274 

275 for item in inputs: 

276 higher_priority = ( 

277 hasattr(item, "__array_priority__") 

278 and item.__array_priority__ > self.__array_priority__ 

279 ) 

280 has_array_ufunc = ( 

281 hasattr(item, "__array_ufunc__") 

282 and type(item).__array_ufunc__ not in no_defer 

283 and not isinstance(item, self._HANDLED_TYPES) 

284 ) 

285 if higher_priority or has_array_ufunc: 

286 return NotImplemented 

287 

288 # align all the inputs. 

289 types = tuple(type(x) for x in inputs) 

290 alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)] 

291 

292 if len(alignable) > 1: 

293 # This triggers alignment. 

294 # At the moment, there aren't any ufuncs with more than two inputs 

295 # so this ends up just being x1.index | x2.index, but we write 

296 # it to handle *args. 

297 

298 if len(set(types)) > 1: 

299 # We currently don't handle ufunc(DataFrame, Series) 

300 # well. Previously this raised an internal ValueError. We might 

301 # support it someday, so raise a NotImplementedError. 

302 raise NotImplementedError( 

303 "Cannot apply ufunc {} to mixed DataFrame and Series " 

304 "inputs.".format(ufunc) 

305 ) 

306 axes = self.axes 

307 for obj in alignable[1:]: 

308 # this relies on the fact that we aren't handling mixed 

309 # series / frame ufuncs. 

310 for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)): 

311 axes[i] = ax1.union(ax2) 

312 

313 reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes)) 

314 inputs = tuple( 

315 x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x 

316 for x, t in zip(inputs, types) 

317 ) 

318 else: 

319 reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes)) 

320 

321 if self.ndim == 1: 

322 names = [getattr(x, "name") for x in inputs if hasattr(x, "name")] 

323 name = names[0] if len(set(names)) == 1 else None 

324 reconstruct_kwargs = {"name": name} 

325 else: 

326 reconstruct_kwargs = {} 

327 

328 def reconstruct(result): 

329 if ufunc.nout > 1: 

330 # np.modf, np.frexp, np.divmod 

331 return tuple(_reconstruct(x) for x in result) 

332 

333 return _reconstruct(result) 

334 

335 def _reconstruct(result): 

336 if lib.is_scalar(result): 

337 return result 

338 

339 if result.ndim != self.ndim: 

340 if method == "outer": 

341 if self.ndim == 2: 

342 # we already deprecated for Series 

343 msg = ( 

344 "outer method for ufunc {} is not implemented on " 

345 "pandas objects. Returning an ndarray, but in the " 

346 "future this will raise a 'NotImplementedError'. " 

347 "Consider explicitly converting the DataFrame " 

348 "to an array with '.to_numpy()' first." 

349 ) 

350 warnings.warn( 

351 msg.format(ufunc), FutureWarning, stacklevel=find_stack_level() 

352 ) 

353 return result 

354 raise NotImplementedError 

355 return result 

356 if isinstance(result, BlockManager): 

357 # we went through BlockManager.apply e.g. np.sqrt 

358 result = self._constructor(result, **reconstruct_kwargs, copy=False) 

359 else: 

360 # we converted an array, lost our axes 

361 result = self._constructor( 

362 result, **reconstruct_axes, **reconstruct_kwargs, copy=False 

363 ) 

364 # TODO: When we support multiple values in __finalize__, this 

365 # should pass alignable to `__finalize__` instead of self. 

366 # Then `np.add(a, b)` would consider attrs from both a and b 

367 # when a and b are NDFrames. 

368 if len(alignable) == 1: 

369 result = result.__finalize__(self) 

370 return result 

371 

372 if "out" in kwargs: 

373 # e.g. test_multiindex_get_loc 

374 result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs) 

375 return reconstruct(result) 

376 

377 if method == "reduce": 

378 # e.g. test.series.test_ufunc.test_reduce 

379 result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs) 

380 if result is not NotImplemented: 

381 return result 

382 

383 # We still get here with kwargs `axis` for e.g. np.maximum.accumulate 

384 # and `dtype` and `keepdims` for np.ptp 

385 

386 if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1): 

387 # Just give up on preserving types in the complex case. 

388 # In theory we could preserve them for them. 

389 # * nout>1 is doable if BlockManager.apply took nout and 

390 # returned a Tuple[BlockManager]. 

391 # * len(inputs) > 1 is doable when we know that we have 

392 # aligned blocks / dtypes. 

393 

394 # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add 

395 inputs = tuple(np.asarray(x) for x in inputs) 

396 # Note: we can't use default_array_ufunc here bc reindexing means 

397 # that `self` may not be among `inputs` 

398 result = getattr(ufunc, method)(*inputs, **kwargs) 

399 elif self.ndim == 1: 

400 # ufunc(series, ...) 

401 inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) 

402 result = getattr(ufunc, method)(*inputs, **kwargs) 

403 else: 

404 # ufunc(dataframe) 

405 if method == "__call__" and not kwargs: 

406 # for np.<ufunc>(..) calls 

407 # kwargs cannot necessarily be handled block-by-block, so only 

408 # take this path if there are no kwargs 

409 mgr = inputs[0]._mgr 

410 result = mgr.apply(getattr(ufunc, method)) 

411 else: 

412 # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..)) 

413 # Those can have an axis keyword and thus can't be called block-by-block 

414 result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) 

415 # e.g. np.negative (only one reached), with "where" and "out" in kwargs 

416 

417 result = reconstruct(result) 

418 return result 

419 

420 

421def _standardize_out_kwarg(**kwargs) -> dict: 

422 """ 

423 If kwargs contain "out1" and "out2", replace that with a tuple "out" 

424 

425 np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or 

426 `out1=out1, out2=out2)` 

427 """ 

428 if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs: 

429 out1 = kwargs.pop("out1") 

430 out2 = kwargs.pop("out2") 

431 out = (out1, out2) 

432 kwargs["out"] = out 

433 return kwargs 

434 

435 

436def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

437 """ 

438 If we have an `out` keyword, then call the ufunc without `out` and then 

439 set the result into the given `out`. 

440 """ 

441 

442 # Note: we assume _standardize_out_kwarg has already been called. 

443 out = kwargs.pop("out") 

444 where = kwargs.pop("where", None) 

445 

446 result = getattr(ufunc, method)(*inputs, **kwargs) 

447 

448 if result is NotImplemented: 

449 return NotImplemented 

450 

451 if isinstance(result, tuple): 

452 # i.e. np.divmod, np.modf, np.frexp 

453 if not isinstance(out, tuple) or len(out) != len(result): 

454 raise NotImplementedError 

455 

456 for arr, res in zip(out, result): 

457 _assign_where(arr, res, where) 

458 

459 return out 

460 

461 if isinstance(out, tuple): 

462 if len(out) == 1: 

463 out = out[0] 

464 else: 

465 raise NotImplementedError 

466 

467 _assign_where(out, result, where) 

468 return out 

469 

470 

471def _assign_where(out, result, where) -> None: 

472 """ 

473 Set a ufunc result into 'out', masking with a 'where' argument if necessary. 

474 """ 

475 if where is None: 

476 # no 'where' arg passed to ufunc 

477 out[:] = result 

478 else: 

479 np.putmask(out, where, result) 

480 

481 

482def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

483 """ 

484 Fallback to the behavior we would get if we did not define __array_ufunc__. 

485 

486 Notes 

487 ----- 

488 We are assuming that `self` is among `inputs`. 

489 """ 

490 if not any(x is self for x in inputs): 

491 raise NotImplementedError 

492 

493 new_inputs = [x if x is not self else np.asarray(x) for x in inputs] 

494 

495 return getattr(ufunc, method)(*new_inputs, **kwargs) 

496 

497 

498def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

499 """ 

500 Dispatch ufunc reductions to self's reduction methods. 

501 """ 

502 assert method == "reduce" 

503 

504 if len(inputs) != 1 or inputs[0] is not self: 

505 return NotImplemented 

506 

507 if ufunc.__name__ not in REDUCTION_ALIASES: 

508 return NotImplemented 

509 

510 method_name = REDUCTION_ALIASES[ufunc.__name__] 

511 

512 # NB: we are assuming that min/max represent minimum/maximum methods, 

513 # which would not be accurate for e.g. Timestamp.min 

514 if not hasattr(self, method_name): 

515 return NotImplemented 

516 

517 if self.ndim > 1: 

518 if isinstance(self, ABCNDFrame): 

519 # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA 

520 kwargs["numeric_only"] = False 

521 

522 if "axis" not in kwargs: 

523 # For DataFrame reductions we don't want the default axis=0 

524 # Note: np.min is not a ufunc, but uses array_function_dispatch, 

525 # so calls DataFrame.min (without ever getting here) with the np.min 

526 # default of axis=None, which DataFrame.min catches and changes to axis=0. 

527 # np.minimum.reduce(df) gets here bc axis is not in kwargs, 

528 # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values) 

529 kwargs["axis"] = 0 

530 

531 # By default, numpy's reductions do not skip NaNs, so we have to 

532 # pass skipna=False 

533 return getattr(self, method_name)(skipna=False, **kwargs)