Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/ops/array_ops.py: 15%
181 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Functions for arithmetic and comparison operations on NumPy arrays and
3ExtensionArrays.
4"""
5from __future__ import annotations
7import datetime
8from functools import partial
9import operator
10from typing import Any
12import numpy as np
14from pandas._libs import (
15 NaT,
16 Timedelta,
17 Timestamp,
18 lib,
19 ops as libops,
20)
21from pandas._libs.tslibs import BaseOffset
22from pandas._typing import (
23 ArrayLike,
24 Shape,
25)
27from pandas.core.dtypes.cast import (
28 construct_1d_object_array_from_listlike,
29 find_common_type,
30)
31from pandas.core.dtypes.common import (
32 ensure_object,
33 is_bool_dtype,
34 is_integer_dtype,
35 is_list_like,
36 is_numeric_v_string_like,
37 is_object_dtype,
38 is_scalar,
39)
40from pandas.core.dtypes.generic import (
41 ABCExtensionArray,
42 ABCIndex,
43 ABCSeries,
44)
45from pandas.core.dtypes.missing import (
46 isna,
47 notna,
48)
50import pandas.core.computation.expressions as expressions
51from pandas.core.construction import ensure_wrapped_if_datetimelike
52from pandas.core.ops import (
53 missing,
54 roperator,
55)
56from pandas.core.ops.dispatch import should_extension_dispatch
57from pandas.core.ops.invalid import invalid_comparison
60def comp_method_OBJECT_ARRAY(op, x, y):
61 if isinstance(y, list):
62 y = construct_1d_object_array_from_listlike(y)
64 if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)):
65 if not is_object_dtype(y.dtype):
66 y = y.astype(np.object_)
68 if isinstance(y, (ABCSeries, ABCIndex)):
69 y = y._values
71 if x.shape != y.shape:
72 raise ValueError("Shapes must match", x.shape, y.shape)
73 result = libops.vec_compare(x.ravel(), y.ravel(), op)
74 else:
75 result = libops.scalar_compare(x.ravel(), y, op)
76 return result.reshape(x.shape)
79def _masked_arith_op(x: np.ndarray, y, op):
80 """
81 If the given arithmetic operation fails, attempt it again on
82 only the non-null elements of the input array(s).
84 Parameters
85 ----------
86 x : np.ndarray
87 y : np.ndarray, Series, Index
88 op : binary operator
89 """
90 # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes
91 # the logic valid for both Series and DataFrame ops.
92 xrav = x.ravel()
93 assert isinstance(x, np.ndarray), type(x)
94 if isinstance(y, np.ndarray):
95 dtype = find_common_type([x.dtype, y.dtype])
96 result = np.empty(x.size, dtype=dtype)
98 if len(x) != len(y):
99 raise ValueError(x.shape, y.shape)
100 else:
101 ymask = notna(y)
103 # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex
104 # we would get int64 dtype, see GH#19956
105 yrav = y.ravel()
106 mask = notna(xrav) & ymask.ravel()
108 # See GH#5284, GH#5035, GH#19448 for historical reference
109 if mask.any():
110 result[mask] = op(xrav[mask], yrav[mask])
112 else:
113 if not is_scalar(y):
114 raise TypeError(
115 f"Cannot broadcast np.ndarray with operand of type { type(y) }"
116 )
118 # mask is only meaningful for x
119 result = np.empty(x.size, dtype=x.dtype)
120 mask = notna(xrav)
122 # 1 ** np.nan is 1. So we have to unmask those.
123 if op is pow:
124 mask = np.where(x == 1, False, mask)
125 elif op is roperator.rpow:
126 mask = np.where(y == 1, False, mask)
128 if mask.any():
129 result[mask] = op(xrav[mask], y)
131 np.putmask(result, ~mask, np.nan)
132 result = result.reshape(x.shape) # 2D compat
133 return result
136def _na_arithmetic_op(left: np.ndarray, right, op, is_cmp: bool = False):
137 """
138 Return the result of evaluating op on the passed in values.
140 If native types are not compatible, try coercion to object dtype.
142 Parameters
143 ----------
144 left : np.ndarray
145 right : np.ndarray or scalar
146 Excludes DataFrame, Series, Index, ExtensionArray.
147 is_cmp : bool, default False
148 If this a comparison operation.
150 Returns
151 -------
152 array-like
154 Raises
155 ------
156 TypeError : invalid operation
157 """
158 if isinstance(right, str):
159 # can never use numexpr
160 func = op
161 else:
162 func = partial(expressions.evaluate, op)
164 try:
165 result = func(left, right)
166 except TypeError:
167 if not is_cmp and (is_object_dtype(left.dtype) or is_object_dtype(right)):
168 # For object dtype, fallback to a masked operation (only operating
169 # on the non-missing values)
170 # Don't do this for comparisons, as that will handle complex numbers
171 # incorrectly, see GH#32047
172 result = _masked_arith_op(left, right, op)
173 else:
174 raise
176 if is_cmp and (is_scalar(result) or result is NotImplemented):
177 # numpy returned a scalar instead of operating element-wise
178 # e.g. numeric array vs str
179 # TODO: can remove this after dropping some future numpy version?
180 return invalid_comparison(left, right, op)
182 return missing.dispatch_fill_zeros(op, left, right, result)
185def arithmetic_op(left: ArrayLike, right: Any, op):
186 """
187 Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ...
189 Note: the caller is responsible for ensuring that numpy warnings are
190 suppressed (with np.errstate(all="ignore")) if needed.
192 Parameters
193 ----------
194 left : np.ndarray or ExtensionArray
195 right : object
196 Cannot be a DataFrame or Index. Series is *not* excluded.
197 op : {operator.add, operator.sub, ...}
198 Or one of the reversed variants from roperator.
200 Returns
201 -------
202 ndarray or ExtensionArray
203 Or a 2-tuple of these in the case of divmod or rdivmod.
204 """
205 # NB: We assume that extract_array and ensure_wrapped_if_datetimelike
206 # have already been called on `left` and `right`,
207 # and `maybe_prepare_scalar_for_op` has already been called on `right`
208 # We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy
209 # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390)
211 if (
212 should_extension_dispatch(left, right)
213 or isinstance(right, (Timedelta, BaseOffset, Timestamp))
214 or right is NaT
215 ):
216 # Timedelta/Timestamp and other custom scalars are included in the check
217 # because numexpr will fail on it, see GH#31457
218 res_values = op(left, right)
219 else:
220 # TODO we should handle EAs consistently and move this check before the if/else
221 # (https://github.com/pandas-dev/pandas/issues/41165)
222 _bool_arith_check(op, left, right)
224 # error: Argument 1 to "_na_arithmetic_op" has incompatible type
225 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected "ndarray[Any, Any]"
226 res_values = _na_arithmetic_op(left, right, op) # type: ignore[arg-type]
228 return res_values
231def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
232 """
233 Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`.
235 Note: the caller is responsible for ensuring that numpy warnings are
236 suppressed (with np.errstate(all="ignore")) if needed.
238 Parameters
239 ----------
240 left : np.ndarray or ExtensionArray
241 right : object
242 Cannot be a DataFrame, Series, or Index.
243 op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le}
245 Returns
246 -------
247 ndarray or ExtensionArray
248 """
249 # NB: We assume extract_array has already been called on left and right
250 lvalues = ensure_wrapped_if_datetimelike(left)
251 rvalues = ensure_wrapped_if_datetimelike(right)
253 rvalues = lib.item_from_zerodim(rvalues)
254 if isinstance(rvalues, list):
255 # We don't catch tuple here bc we may be comparing e.g. MultiIndex
256 # to a tuple that represents a single entry, see test_compare_tuple_strs
257 rvalues = np.asarray(rvalues)
259 if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
260 # TODO: make this treatment consistent across ops and classes.
261 # We are not catching all listlikes here (e.g. frozenset, tuple)
262 # The ambiguous case is object-dtype. See GH#27803
263 if len(lvalues) != len(rvalues):
264 raise ValueError(
265 "Lengths must match to compare", lvalues.shape, rvalues.shape
266 )
268 if should_extension_dispatch(lvalues, rvalues) or (
269 (isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT)
270 and not is_object_dtype(lvalues.dtype)
271 ):
272 # Call the method on lvalues
273 res_values = op(lvalues, rvalues)
275 elif is_scalar(rvalues) and isna(rvalues): # TODO: but not pd.NA?
276 # numpy does not like comparisons vs None
277 if op is operator.ne:
278 res_values = np.ones(lvalues.shape, dtype=bool)
279 else:
280 res_values = np.zeros(lvalues.shape, dtype=bool)
282 elif is_numeric_v_string_like(lvalues, rvalues):
283 # GH#36377 going through the numexpr path would incorrectly raise
284 return invalid_comparison(lvalues, rvalues, op)
286 elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
287 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
289 else:
290 res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True)
292 return res_values
295def na_logical_op(x: np.ndarray, y, op):
296 try:
297 # For exposition, write:
298 # yarr = isinstance(y, np.ndarray)
299 # yint = is_integer(y) or (yarr and y.dtype.kind == "i")
300 # ybool = is_bool(y) or (yarr and y.dtype.kind == "b")
301 # xint = x.dtype.kind == "i"
302 # xbool = x.dtype.kind == "b"
303 # Then Cases where this goes through without raising include:
304 # (xint or xbool) and (yint or bool)
305 result = op(x, y)
306 except TypeError:
307 if isinstance(y, np.ndarray):
308 # bool-bool dtype operations should be OK, should not get here
309 assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype))
310 x = ensure_object(x)
311 y = ensure_object(y)
312 result = libops.vec_binop(x.ravel(), y.ravel(), op)
313 else:
314 # let null fall thru
315 assert lib.is_scalar(y)
316 if not isna(y):
317 y = bool(y)
318 try:
319 result = libops.scalar_binop(x, y, op)
320 except (
321 TypeError,
322 ValueError,
323 AttributeError,
324 OverflowError,
325 NotImplementedError,
326 ) as err:
327 typ = type(y).__name__
328 raise TypeError(
329 f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array "
330 f"and scalar of type [{typ}]"
331 ) from err
333 return result.reshape(x.shape)
336def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike:
337 """
338 Evaluate a logical operation `|`, `&`, or `^`.
340 Parameters
341 ----------
342 left : np.ndarray or ExtensionArray
343 right : object
344 Cannot be a DataFrame, Series, or Index.
345 op : {operator.and_, operator.or_, operator.xor}
346 Or one of the reversed variants from roperator.
348 Returns
349 -------
350 ndarray or ExtensionArray
351 """
352 fill_int = lambda x: x
354 def fill_bool(x, left=None):
355 # if `left` is specifically not-boolean, we do not cast to bool
356 if x.dtype.kind in ["c", "f", "O"]:
357 # dtypes that can hold NA
358 mask = isna(x)
359 if mask.any():
360 x = x.astype(object)
361 x[mask] = False
363 if left is None or is_bool_dtype(left.dtype):
364 x = x.astype(bool)
365 return x
367 is_self_int_dtype = is_integer_dtype(left.dtype)
369 right = lib.item_from_zerodim(right)
370 if is_list_like(right) and not hasattr(right, "dtype"):
371 # e.g. list, tuple
372 right = construct_1d_object_array_from_listlike(right)
374 # NB: We assume extract_array has already been called on left and right
375 lvalues = ensure_wrapped_if_datetimelike(left)
376 rvalues = right
378 if should_extension_dispatch(lvalues, rvalues):
379 # Call the method on lvalues
380 res_values = op(lvalues, rvalues)
382 else:
383 if isinstance(rvalues, np.ndarray):
384 is_other_int_dtype = is_integer_dtype(rvalues.dtype)
385 rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues)
387 else:
388 # i.e. scalar
389 is_other_int_dtype = lib.is_integer(rvalues)
391 # For int vs int `^`, `|`, `&` are bitwise operators and return
392 # integer dtypes. Otherwise these are boolean ops
393 filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool
395 res_values = na_logical_op(lvalues, rvalues, op)
396 # error: Cannot call function of unknown type
397 res_values = filler(res_values) # type: ignore[operator]
399 return res_values
402def get_array_op(op):
403 """
404 Return a binary array operation corresponding to the given operator op.
406 Parameters
407 ----------
408 op : function
409 Binary operator from operator or roperator module.
411 Returns
412 -------
413 functools.partial
414 """
415 if isinstance(op, partial): 415 ↛ 418line 415 didn't jump to line 418, because the condition on line 415 was never true
416 # We get here via dispatch_to_series in DataFrame case
417 # e.g. test_rolling_consistency_var_debiasing_factors
418 return op
420 op_name = op.__name__.strip("_").lstrip("r")
421 if op_name == "arith_op": 421 ↛ 424line 421 didn't jump to line 424, because the condition on line 421 was never true
422 # Reached via DataFrame._combine_frame i.e. flex methods
423 # e.g. test_df_add_flex_filled_mixed_dtypes
424 return op
426 if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: 426 ↛ 427line 426 didn't jump to line 427, because the condition on line 426 was never true
427 return partial(comparison_op, op=op)
428 elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: 428 ↛ 429line 428 didn't jump to line 429, because the condition on line 428 was never true
429 return partial(logical_op, op=op)
430 elif op_name in { 430 ↛ 442line 430 didn't jump to line 442, because the condition on line 430 was never false
431 "add",
432 "sub",
433 "mul",
434 "truediv",
435 "floordiv",
436 "mod",
437 "divmod",
438 "pow",
439 }:
440 return partial(arithmetic_op, op=op)
441 else:
442 raise NotImplementedError(op_name)
445def maybe_prepare_scalar_for_op(obj, shape: Shape):
446 """
447 Cast non-pandas objects to pandas types to unify behavior of arithmetic
448 and comparison operations.
450 Parameters
451 ----------
452 obj: object
453 shape : tuple[int]
455 Returns
456 -------
457 out : object
459 Notes
460 -----
461 Be careful to call this *after* determining the `name` attribute to be
462 attached to the result of the arithmetic operation.
463 """
464 if type(obj) is datetime.timedelta:
465 # GH#22390 cast up to Timedelta to rely on Timedelta
466 # implementation; otherwise operation against numeric-dtype
467 # raises TypeError
468 return Timedelta(obj)
469 elif type(obj) is datetime.datetime:
470 # cast up to Timestamp to rely on Timestamp implementation, see Timedelta above
471 return Timestamp(obj)
472 elif isinstance(obj, np.datetime64):
473 # GH#28080 numpy casts integer-dtype to datetime64 when doing
474 # array[int] + datetime64, which we do not allow
475 if isna(obj):
476 from pandas.core.arrays import DatetimeArray
478 # Avoid possible ambiguities with pd.NaT
479 obj = obj.astype("datetime64[ns]")
480 right = np.broadcast_to(obj, shape)
481 return DatetimeArray(right)
483 return Timestamp(obj)
485 elif isinstance(obj, np.timedelta64):
486 if isna(obj):
487 from pandas.core.arrays import TimedeltaArray
489 # wrapping timedelta64("NaT") in Timedelta returns NaT,
490 # which would incorrectly be treated as a datetime-NaT, so
491 # we broadcast and wrap in a TimedeltaArray
492 obj = obj.astype("timedelta64[ns]")
493 right = np.broadcast_to(obj, shape)
494 return TimedeltaArray(right)
496 # In particular non-nanosecond timedelta64 needs to be cast to
497 # nanoseconds, or else we get undesired behavior like
498 # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
499 return Timedelta(obj)
501 return obj
504_BOOL_OP_NOT_ALLOWED = {
505 operator.truediv,
506 roperator.rtruediv,
507 operator.floordiv,
508 roperator.rfloordiv,
509 operator.pow,
510 roperator.rpow,
511}
514def _bool_arith_check(op, a, b):
515 """
516 In contrast to numpy, pandas raises an error for certain operations
517 with booleans.
518 """
519 if op in _BOOL_OP_NOT_ALLOWED:
520 if is_bool_dtype(a.dtype) and (
521 is_bool_dtype(b) or isinstance(b, (bool, np.bool_))
522 ):
523 op_name = op.__name__.strip("_").lstrip("r")
524 raise NotImplementedError(
525 f"operator '{op_name}' not implemented for bool dtypes"
526 )