Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/ops/array_ops.py: 15%

181 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Functions for arithmetic and comparison operations on NumPy arrays and 

3ExtensionArrays. 

4""" 

5from __future__ import annotations 

6 

7import datetime 

8from functools import partial 

9import operator 

10from typing import Any 

11 

12import numpy as np 

13 

14from pandas._libs import ( 

15 NaT, 

16 Timedelta, 

17 Timestamp, 

18 lib, 

19 ops as libops, 

20) 

21from pandas._libs.tslibs import BaseOffset 

22from pandas._typing import ( 

23 ArrayLike, 

24 Shape, 

25) 

26 

27from pandas.core.dtypes.cast import ( 

28 construct_1d_object_array_from_listlike, 

29 find_common_type, 

30) 

31from pandas.core.dtypes.common import ( 

32 ensure_object, 

33 is_bool_dtype, 

34 is_integer_dtype, 

35 is_list_like, 

36 is_numeric_v_string_like, 

37 is_object_dtype, 

38 is_scalar, 

39) 

40from pandas.core.dtypes.generic import ( 

41 ABCExtensionArray, 

42 ABCIndex, 

43 ABCSeries, 

44) 

45from pandas.core.dtypes.missing import ( 

46 isna, 

47 notna, 

48) 

49 

50import pandas.core.computation.expressions as expressions 

51from pandas.core.construction import ensure_wrapped_if_datetimelike 

52from pandas.core.ops import ( 

53 missing, 

54 roperator, 

55) 

56from pandas.core.ops.dispatch import should_extension_dispatch 

57from pandas.core.ops.invalid import invalid_comparison 

58 

59 

60def comp_method_OBJECT_ARRAY(op, x, y): 

61 if isinstance(y, list): 

62 y = construct_1d_object_array_from_listlike(y) 

63 

64 if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): 

65 if not is_object_dtype(y.dtype): 

66 y = y.astype(np.object_) 

67 

68 if isinstance(y, (ABCSeries, ABCIndex)): 

69 y = y._values 

70 

71 if x.shape != y.shape: 

72 raise ValueError("Shapes must match", x.shape, y.shape) 

73 result = libops.vec_compare(x.ravel(), y.ravel(), op) 

74 else: 

75 result = libops.scalar_compare(x.ravel(), y, op) 

76 return result.reshape(x.shape) 

77 

78 

79def _masked_arith_op(x: np.ndarray, y, op): 

80 """ 

81 If the given arithmetic operation fails, attempt it again on 

82 only the non-null elements of the input array(s). 

83 

84 Parameters 

85 ---------- 

86 x : np.ndarray 

87 y : np.ndarray, Series, Index 

88 op : binary operator 

89 """ 

90 # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes 

91 # the logic valid for both Series and DataFrame ops. 

92 xrav = x.ravel() 

93 assert isinstance(x, np.ndarray), type(x) 

94 if isinstance(y, np.ndarray): 

95 dtype = find_common_type([x.dtype, y.dtype]) 

96 result = np.empty(x.size, dtype=dtype) 

97 

98 if len(x) != len(y): 

99 raise ValueError(x.shape, y.shape) 

100 else: 

101 ymask = notna(y) 

102 

103 # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex 

104 # we would get int64 dtype, see GH#19956 

105 yrav = y.ravel() 

106 mask = notna(xrav) & ymask.ravel() 

107 

108 # See GH#5284, GH#5035, GH#19448 for historical reference 

109 if mask.any(): 

110 result[mask] = op(xrav[mask], yrav[mask]) 

111 

112 else: 

113 if not is_scalar(y): 

114 raise TypeError( 

115 f"Cannot broadcast np.ndarray with operand of type { type(y) }" 

116 ) 

117 

118 # mask is only meaningful for x 

119 result = np.empty(x.size, dtype=x.dtype) 

120 mask = notna(xrav) 

121 

122 # 1 ** np.nan is 1. So we have to unmask those. 

123 if op is pow: 

124 mask = np.where(x == 1, False, mask) 

125 elif op is roperator.rpow: 

126 mask = np.where(y == 1, False, mask) 

127 

128 if mask.any(): 

129 result[mask] = op(xrav[mask], y) 

130 

131 np.putmask(result, ~mask, np.nan) 

132 result = result.reshape(x.shape) # 2D compat 

133 return result 

134 

135 

136def _na_arithmetic_op(left: np.ndarray, right, op, is_cmp: bool = False): 

137 """ 

138 Return the result of evaluating op on the passed in values. 

139 

140 If native types are not compatible, try coercion to object dtype. 

141 

142 Parameters 

143 ---------- 

144 left : np.ndarray 

145 right : np.ndarray or scalar 

146 Excludes DataFrame, Series, Index, ExtensionArray. 

147 is_cmp : bool, default False 

148 If this a comparison operation. 

149 

150 Returns 

151 ------- 

152 array-like 

153 

154 Raises 

155 ------ 

156 TypeError : invalid operation 

157 """ 

158 if isinstance(right, str): 

159 # can never use numexpr 

160 func = op 

161 else: 

162 func = partial(expressions.evaluate, op) 

163 

164 try: 

165 result = func(left, right) 

166 except TypeError: 

167 if not is_cmp and (is_object_dtype(left.dtype) or is_object_dtype(right)): 

168 # For object dtype, fallback to a masked operation (only operating 

169 # on the non-missing values) 

170 # Don't do this for comparisons, as that will handle complex numbers 

171 # incorrectly, see GH#32047 

172 result = _masked_arith_op(left, right, op) 

173 else: 

174 raise 

175 

176 if is_cmp and (is_scalar(result) or result is NotImplemented): 

177 # numpy returned a scalar instead of operating element-wise 

178 # e.g. numeric array vs str 

179 # TODO: can remove this after dropping some future numpy version? 

180 return invalid_comparison(left, right, op) 

181 

182 return missing.dispatch_fill_zeros(op, left, right, result) 

183 

184 

185def arithmetic_op(left: ArrayLike, right: Any, op): 

186 """ 

187 Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... 

188 

189 Note: the caller is responsible for ensuring that numpy warnings are 

190 suppressed (with np.errstate(all="ignore")) if needed. 

191 

192 Parameters 

193 ---------- 

194 left : np.ndarray or ExtensionArray 

195 right : object 

196 Cannot be a DataFrame or Index. Series is *not* excluded. 

197 op : {operator.add, operator.sub, ...} 

198 Or one of the reversed variants from roperator. 

199 

200 Returns 

201 ------- 

202 ndarray or ExtensionArray 

203 Or a 2-tuple of these in the case of divmod or rdivmod. 

204 """ 

205 # NB: We assume that extract_array and ensure_wrapped_if_datetimelike 

206 # have already been called on `left` and `right`, 

207 # and `maybe_prepare_scalar_for_op` has already been called on `right` 

208 # We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy 

209 # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) 

210 

211 if ( 

212 should_extension_dispatch(left, right) 

213 or isinstance(right, (Timedelta, BaseOffset, Timestamp)) 

214 or right is NaT 

215 ): 

216 # Timedelta/Timestamp and other custom scalars are included in the check 

217 # because numexpr will fail on it, see GH#31457 

218 res_values = op(left, right) 

219 else: 

220 # TODO we should handle EAs consistently and move this check before the if/else 

221 # (https://github.com/pandas-dev/pandas/issues/41165) 

222 _bool_arith_check(op, left, right) 

223 

224 # error: Argument 1 to "_na_arithmetic_op" has incompatible type 

225 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected "ndarray[Any, Any]" 

226 res_values = _na_arithmetic_op(left, right, op) # type: ignore[arg-type] 

227 

228 return res_values 

229 

230 

231def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: 

232 """ 

233 Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. 

234 

235 Note: the caller is responsible for ensuring that numpy warnings are 

236 suppressed (with np.errstate(all="ignore")) if needed. 

237 

238 Parameters 

239 ---------- 

240 left : np.ndarray or ExtensionArray 

241 right : object 

242 Cannot be a DataFrame, Series, or Index. 

243 op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} 

244 

245 Returns 

246 ------- 

247 ndarray or ExtensionArray 

248 """ 

249 # NB: We assume extract_array has already been called on left and right 

250 lvalues = ensure_wrapped_if_datetimelike(left) 

251 rvalues = ensure_wrapped_if_datetimelike(right) 

252 

253 rvalues = lib.item_from_zerodim(rvalues) 

254 if isinstance(rvalues, list): 

255 # We don't catch tuple here bc we may be comparing e.g. MultiIndex 

256 # to a tuple that represents a single entry, see test_compare_tuple_strs 

257 rvalues = np.asarray(rvalues) 

258 

259 if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): 

260 # TODO: make this treatment consistent across ops and classes. 

261 # We are not catching all listlikes here (e.g. frozenset, tuple) 

262 # The ambiguous case is object-dtype. See GH#27803 

263 if len(lvalues) != len(rvalues): 

264 raise ValueError( 

265 "Lengths must match to compare", lvalues.shape, rvalues.shape 

266 ) 

267 

268 if should_extension_dispatch(lvalues, rvalues) or ( 

269 (isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT) 

270 and not is_object_dtype(lvalues.dtype) 

271 ): 

272 # Call the method on lvalues 

273 res_values = op(lvalues, rvalues) 

274 

275 elif is_scalar(rvalues) and isna(rvalues): # TODO: but not pd.NA? 

276 # numpy does not like comparisons vs None 

277 if op is operator.ne: 

278 res_values = np.ones(lvalues.shape, dtype=bool) 

279 else: 

280 res_values = np.zeros(lvalues.shape, dtype=bool) 

281 

282 elif is_numeric_v_string_like(lvalues, rvalues): 

283 # GH#36377 going through the numexpr path would incorrectly raise 

284 return invalid_comparison(lvalues, rvalues, op) 

285 

286 elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str): 

287 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) 

288 

289 else: 

290 res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True) 

291 

292 return res_values 

293 

294 

295def na_logical_op(x: np.ndarray, y, op): 

296 try: 

297 # For exposition, write: 

298 # yarr = isinstance(y, np.ndarray) 

299 # yint = is_integer(y) or (yarr and y.dtype.kind == "i") 

300 # ybool = is_bool(y) or (yarr and y.dtype.kind == "b") 

301 # xint = x.dtype.kind == "i" 

302 # xbool = x.dtype.kind == "b" 

303 # Then Cases where this goes through without raising include: 

304 # (xint or xbool) and (yint or bool) 

305 result = op(x, y) 

306 except TypeError: 

307 if isinstance(y, np.ndarray): 

308 # bool-bool dtype operations should be OK, should not get here 

309 assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)) 

310 x = ensure_object(x) 

311 y = ensure_object(y) 

312 result = libops.vec_binop(x.ravel(), y.ravel(), op) 

313 else: 

314 # let null fall thru 

315 assert lib.is_scalar(y) 

316 if not isna(y): 

317 y = bool(y) 

318 try: 

319 result = libops.scalar_binop(x, y, op) 

320 except ( 

321 TypeError, 

322 ValueError, 

323 AttributeError, 

324 OverflowError, 

325 NotImplementedError, 

326 ) as err: 

327 typ = type(y).__name__ 

328 raise TypeError( 

329 f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array " 

330 f"and scalar of type [{typ}]" 

331 ) from err 

332 

333 return result.reshape(x.shape) 

334 

335 

336def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike: 

337 """ 

338 Evaluate a logical operation `|`, `&`, or `^`. 

339 

340 Parameters 

341 ---------- 

342 left : np.ndarray or ExtensionArray 

343 right : object 

344 Cannot be a DataFrame, Series, or Index. 

345 op : {operator.and_, operator.or_, operator.xor} 

346 Or one of the reversed variants from roperator. 

347 

348 Returns 

349 ------- 

350 ndarray or ExtensionArray 

351 """ 

352 fill_int = lambda x: x 

353 

354 def fill_bool(x, left=None): 

355 # if `left` is specifically not-boolean, we do not cast to bool 

356 if x.dtype.kind in ["c", "f", "O"]: 

357 # dtypes that can hold NA 

358 mask = isna(x) 

359 if mask.any(): 

360 x = x.astype(object) 

361 x[mask] = False 

362 

363 if left is None or is_bool_dtype(left.dtype): 

364 x = x.astype(bool) 

365 return x 

366 

367 is_self_int_dtype = is_integer_dtype(left.dtype) 

368 

369 right = lib.item_from_zerodim(right) 

370 if is_list_like(right) and not hasattr(right, "dtype"): 

371 # e.g. list, tuple 

372 right = construct_1d_object_array_from_listlike(right) 

373 

374 # NB: We assume extract_array has already been called on left and right 

375 lvalues = ensure_wrapped_if_datetimelike(left) 

376 rvalues = right 

377 

378 if should_extension_dispatch(lvalues, rvalues): 

379 # Call the method on lvalues 

380 res_values = op(lvalues, rvalues) 

381 

382 else: 

383 if isinstance(rvalues, np.ndarray): 

384 is_other_int_dtype = is_integer_dtype(rvalues.dtype) 

385 rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues) 

386 

387 else: 

388 # i.e. scalar 

389 is_other_int_dtype = lib.is_integer(rvalues) 

390 

391 # For int vs int `^`, `|`, `&` are bitwise operators and return 

392 # integer dtypes. Otherwise these are boolean ops 

393 filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool 

394 

395 res_values = na_logical_op(lvalues, rvalues, op) 

396 # error: Cannot call function of unknown type 

397 res_values = filler(res_values) # type: ignore[operator] 

398 

399 return res_values 

400 

401 

402def get_array_op(op): 

403 """ 

404 Return a binary array operation corresponding to the given operator op. 

405 

406 Parameters 

407 ---------- 

408 op : function 

409 Binary operator from operator or roperator module. 

410 

411 Returns 

412 ------- 

413 functools.partial 

414 """ 

415 if isinstance(op, partial): 415 ↛ 418line 415 didn't jump to line 418, because the condition on line 415 was never true

416 # We get here via dispatch_to_series in DataFrame case 

417 # e.g. test_rolling_consistency_var_debiasing_factors 

418 return op 

419 

420 op_name = op.__name__.strip("_").lstrip("r") 

421 if op_name == "arith_op": 421 ↛ 424line 421 didn't jump to line 424, because the condition on line 421 was never true

422 # Reached via DataFrame._combine_frame i.e. flex methods 

423 # e.g. test_df_add_flex_filled_mixed_dtypes 

424 return op 

425 

426 if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: 426 ↛ 427line 426 didn't jump to line 427, because the condition on line 426 was never true

427 return partial(comparison_op, op=op) 

428 elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: 428 ↛ 429line 428 didn't jump to line 429, because the condition on line 428 was never true

429 return partial(logical_op, op=op) 

430 elif op_name in { 430 ↛ 442line 430 didn't jump to line 442, because the condition on line 430 was never false

431 "add", 

432 "sub", 

433 "mul", 

434 "truediv", 

435 "floordiv", 

436 "mod", 

437 "divmod", 

438 "pow", 

439 }: 

440 return partial(arithmetic_op, op=op) 

441 else: 

442 raise NotImplementedError(op_name) 

443 

444 

445def maybe_prepare_scalar_for_op(obj, shape: Shape): 

446 """ 

447 Cast non-pandas objects to pandas types to unify behavior of arithmetic 

448 and comparison operations. 

449 

450 Parameters 

451 ---------- 

452 obj: object 

453 shape : tuple[int] 

454 

455 Returns 

456 ------- 

457 out : object 

458 

459 Notes 

460 ----- 

461 Be careful to call this *after* determining the `name` attribute to be 

462 attached to the result of the arithmetic operation. 

463 """ 

464 if type(obj) is datetime.timedelta: 

465 # GH#22390 cast up to Timedelta to rely on Timedelta 

466 # implementation; otherwise operation against numeric-dtype 

467 # raises TypeError 

468 return Timedelta(obj) 

469 elif type(obj) is datetime.datetime: 

470 # cast up to Timestamp to rely on Timestamp implementation, see Timedelta above 

471 return Timestamp(obj) 

472 elif isinstance(obj, np.datetime64): 

473 # GH#28080 numpy casts integer-dtype to datetime64 when doing 

474 # array[int] + datetime64, which we do not allow 

475 if isna(obj): 

476 from pandas.core.arrays import DatetimeArray 

477 

478 # Avoid possible ambiguities with pd.NaT 

479 obj = obj.astype("datetime64[ns]") 

480 right = np.broadcast_to(obj, shape) 

481 return DatetimeArray(right) 

482 

483 return Timestamp(obj) 

484 

485 elif isinstance(obj, np.timedelta64): 

486 if isna(obj): 

487 from pandas.core.arrays import TimedeltaArray 

488 

489 # wrapping timedelta64("NaT") in Timedelta returns NaT, 

490 # which would incorrectly be treated as a datetime-NaT, so 

491 # we broadcast and wrap in a TimedeltaArray 

492 obj = obj.astype("timedelta64[ns]") 

493 right = np.broadcast_to(obj, shape) 

494 return TimedeltaArray(right) 

495 

496 # In particular non-nanosecond timedelta64 needs to be cast to 

497 # nanoseconds, or else we get undesired behavior like 

498 # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D') 

499 return Timedelta(obj) 

500 

501 return obj 

502 

503 

504_BOOL_OP_NOT_ALLOWED = { 

505 operator.truediv, 

506 roperator.rtruediv, 

507 operator.floordiv, 

508 roperator.rfloordiv, 

509 operator.pow, 

510 roperator.rpow, 

511} 

512 

513 

514def _bool_arith_check(op, a, b): 

515 """ 

516 In contrast to numpy, pandas raises an error for certain operations 

517 with booleans. 

518 """ 

519 if op in _BOOL_OP_NOT_ALLOWED: 

520 if is_bool_dtype(a.dtype) and ( 

521 is_bool_dtype(b) or isinstance(b, (bool, np.bool_)) 

522 ): 

523 op_name = op.__name__.strip("_").lstrip("r") 

524 raise NotImplementedError( 

525 f"operator '{op_name}' not implemented for bool dtypes" 

526 )