Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/computation/ops.py: 35%
293 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Operator classes for eval.
3"""
5from __future__ import annotations
7from datetime import datetime
8from functools import partial
9import operator
10from typing import (
11 Callable,
12 Iterable,
13 Literal,
14)
16import numpy as np
18from pandas._libs.tslibs import Timestamp
20from pandas.core.dtypes.common import (
21 is_list_like,
22 is_scalar,
23)
25import pandas.core.common as com
26from pandas.core.computation.common import (
27 ensure_decoded,
28 result_type_many,
29)
30from pandas.core.computation.scope import DEFAULT_GLOBALS
32from pandas.io.formats.printing import (
33 pprint_thing,
34 pprint_thing_encoded,
35)
37REDUCTIONS = ("sum", "prod")
39_unary_math_ops = (
40 "sin",
41 "cos",
42 "exp",
43 "log",
44 "expm1",
45 "log1p",
46 "sqrt",
47 "sinh",
48 "cosh",
49 "tanh",
50 "arcsin",
51 "arccos",
52 "arctan",
53 "arccosh",
54 "arcsinh",
55 "arctanh",
56 "abs",
57 "log10",
58 "floor",
59 "ceil",
60)
61_binary_math_ops = ("arctan2",)
63MATHOPS = _unary_math_ops + _binary_math_ops
66LOCAL_TAG = "__pd_eval_local_"
69class Term:
70 def __new__(cls, name, env, side=None, encoding=None):
71 klass = Constant if not isinstance(name, str) else cls
72 # error: Argument 2 for "super" not an instance of argument 1
73 supr_new = super(Term, klass).__new__ # type: ignore[misc]
74 return supr_new(klass)
76 is_local: bool
78 def __init__(self, name, env, side=None, encoding=None) -> None:
79 # name is a str for Term, but may be something else for subclasses
80 self._name = name
81 self.env = env
82 self.side = side
83 tname = str(name)
84 self.is_local = tname.startswith(LOCAL_TAG) or tname in DEFAULT_GLOBALS
85 self._value = self._resolve_name()
86 self.encoding = encoding
88 @property
89 def local_name(self) -> str:
90 return self.name.replace(LOCAL_TAG, "")
92 def __repr__(self) -> str:
93 return pprint_thing(self.name)
95 def __call__(self, *args, **kwargs):
96 return self.value
98 def evaluate(self, *args, **kwargs) -> Term:
99 return self
101 def _resolve_name(self):
102 local_name = str(self.local_name)
103 is_local = self.is_local
104 if local_name in self.env.scope and isinstance(
105 self.env.scope[local_name], type
106 ):
107 is_local = False
109 res = self.env.resolve(local_name, is_local=is_local)
110 self.update(res)
112 if hasattr(res, "ndim") and res.ndim > 2:
113 raise NotImplementedError(
114 "N-dimensional objects, where N > 2, are not supported with eval"
115 )
116 return res
118 def update(self, value) -> None:
119 """
120 search order for local (i.e., @variable) variables:
122 scope, key_variable
123 [('locals', 'local_name'),
124 ('globals', 'local_name'),
125 ('locals', 'key'),
126 ('globals', 'key')]
127 """
128 key = self.name
130 # if it's a variable name (otherwise a constant)
131 if isinstance(key, str):
132 self.env.swapkey(self.local_name, key, new_value=value)
134 self.value = value
136 @property
137 def is_scalar(self) -> bool:
138 return is_scalar(self._value)
140 @property
141 def type(self):
142 try:
143 # potentially very slow for large, mixed dtype frames
144 return self._value.values.dtype
145 except AttributeError:
146 try:
147 # ndarray
148 return self._value.dtype
149 except AttributeError:
150 # scalar
151 return type(self._value)
153 return_type = type
155 @property
156 def raw(self) -> str:
157 return f"{type(self).__name__}(name={repr(self.name)}, type={self.type})"
159 @property
160 def is_datetime(self) -> bool:
161 try:
162 t = self.type.type
163 except AttributeError:
164 t = self.type
166 return issubclass(t, (datetime, np.datetime64))
168 @property
169 def value(self):
170 return self._value
172 @value.setter
173 def value(self, new_value):
174 self._value = new_value
176 @property
177 def name(self):
178 return self._name
180 @property
181 def ndim(self) -> int:
182 return self._value.ndim
185class Constant(Term):
186 def __init__(self, value, env, side=None, encoding=None) -> None:
187 super().__init__(value, env, side=side, encoding=encoding)
189 def _resolve_name(self):
190 return self._name
192 @property
193 def name(self):
194 return self.value
196 def __repr__(self) -> str:
197 # in python 2 str() of float
198 # can truncate shorter than repr()
199 return repr(self.name)
202_bool_op_map = {"not": "~", "and": "&", "or": "|"}
205class Op:
206 """
207 Hold an operator of arbitrary arity.
208 """
210 op: str
212 def __init__(self, op: str, operands: Iterable[Term | Op], encoding=None) -> None:
213 self.op = _bool_op_map.get(op, op)
214 self.operands = operands
215 self.encoding = encoding
217 def __iter__(self):
218 return iter(self.operands)
220 def __repr__(self) -> str:
221 """
222 Print a generic n-ary operator and its operands using infix notation.
223 """
224 # recurse over the operands
225 parened = (f"({pprint_thing(opr)})" for opr in self.operands)
226 return pprint_thing(f" {self.op} ".join(parened))
228 @property
229 def return_type(self):
230 # clobber types to bool if the op is a boolean operator
231 if self.op in (CMP_OPS_SYMS + BOOL_OPS_SYMS):
232 return np.bool_
233 return result_type_many(*(term.type for term in com.flatten(self)))
235 @property
236 def has_invalid_return_type(self) -> bool:
237 types = self.operand_types
238 obj_dtype_set = frozenset([np.dtype("object")])
239 return self.return_type == object and types - obj_dtype_set
241 @property
242 def operand_types(self):
243 return frozenset(term.type for term in com.flatten(self))
245 @property
246 def is_scalar(self) -> bool:
247 return all(operand.is_scalar for operand in self.operands)
249 @property
250 def is_datetime(self) -> bool:
251 try:
252 t = self.return_type.type
253 except AttributeError:
254 t = self.return_type
256 return issubclass(t, (datetime, np.datetime64))
259def _in(x, y):
260 """
261 Compute the vectorized membership of ``x in y`` if possible, otherwise
262 use Python.
263 """
264 try:
265 return x.isin(y)
266 except AttributeError:
267 if is_list_like(x):
268 try:
269 return y.isin(x)
270 except AttributeError:
271 pass
272 return x in y
275def _not_in(x, y):
276 """
277 Compute the vectorized membership of ``x not in y`` if possible,
278 otherwise use Python.
279 """
280 try:
281 return ~x.isin(y)
282 except AttributeError:
283 if is_list_like(x):
284 try:
285 return ~y.isin(x)
286 except AttributeError:
287 pass
288 return x not in y
291CMP_OPS_SYMS = (">", "<", ">=", "<=", "==", "!=", "in", "not in")
292_cmp_ops_funcs = (
293 operator.gt,
294 operator.lt,
295 operator.ge,
296 operator.le,
297 operator.eq,
298 operator.ne,
299 _in,
300 _not_in,
301)
302_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs))
304BOOL_OPS_SYMS = ("&", "|", "and", "or")
305_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_)
306_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs))
308ARITH_OPS_SYMS = ("+", "-", "*", "/", "**", "//", "%")
309_arith_ops_funcs = (
310 operator.add,
311 operator.sub,
312 operator.mul,
313 operator.truediv,
314 operator.pow,
315 operator.floordiv,
316 operator.mod,
317)
318_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs))
320SPECIAL_CASE_ARITH_OPS_SYMS = ("**", "//", "%")
321_special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod)
322_special_case_arith_ops_dict = dict(
323 zip(SPECIAL_CASE_ARITH_OPS_SYMS, _special_case_arith_ops_funcs)
324)
326_binary_ops_dict = {}
328for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict):
329 _binary_ops_dict.update(d)
332def _cast_inplace(terms, acceptable_dtypes, dtype):
333 """
334 Cast an expression inplace.
336 Parameters
337 ----------
338 terms : Op
339 The expression that should cast.
340 acceptable_dtypes : list of acceptable numpy.dtype
341 Will not cast if term's dtype in this list.
342 dtype : str or numpy.dtype
343 The dtype to cast to.
344 """
345 dt = np.dtype(dtype)
346 for term in terms:
347 if term.type in acceptable_dtypes:
348 continue
350 try:
351 new_value = term.value.astype(dt)
352 except AttributeError:
353 new_value = dt.type(term.value)
354 term.update(new_value)
357def is_term(obj) -> bool:
358 return isinstance(obj, Term)
361class BinOp(Op):
362 """
363 Hold a binary operator and its operands.
365 Parameters
366 ----------
367 op : str
368 lhs : Term or Op
369 rhs : Term or Op
370 """
372 def __init__(self, op: str, lhs, rhs) -> None:
373 super().__init__(op, (lhs, rhs))
374 self.lhs = lhs
375 self.rhs = rhs
377 self._disallow_scalar_only_bool_ops()
379 self.convert_values()
381 try:
382 self.func = _binary_ops_dict[op]
383 except KeyError as err:
384 # has to be made a list for python3
385 keys = list(_binary_ops_dict.keys())
386 raise ValueError(
387 f"Invalid binary operator {repr(op)}, valid operators are {keys}"
388 ) from err
390 def __call__(self, env):
391 """
392 Recursively evaluate an expression in Python space.
394 Parameters
395 ----------
396 env : Scope
398 Returns
399 -------
400 object
401 The result of an evaluated expression.
402 """
403 # recurse over the left/right nodes
404 left = self.lhs(env)
405 right = self.rhs(env)
407 return self.func(left, right)
409 def evaluate(self, env, engine: str, parser, term_type, eval_in_python):
410 """
411 Evaluate a binary operation *before* being passed to the engine.
413 Parameters
414 ----------
415 env : Scope
416 engine : str
417 parser : str
418 term_type : type
419 eval_in_python : list
421 Returns
422 -------
423 term_type
424 The "pre-evaluated" expression as an instance of ``term_type``
425 """
426 if engine == "python":
427 res = self(env)
428 else:
429 # recurse over the left/right nodes
431 left = self.lhs.evaluate(
432 env,
433 engine=engine,
434 parser=parser,
435 term_type=term_type,
436 eval_in_python=eval_in_python,
437 )
439 right = self.rhs.evaluate(
440 env,
441 engine=engine,
442 parser=parser,
443 term_type=term_type,
444 eval_in_python=eval_in_python,
445 )
447 # base cases
448 if self.op in eval_in_python:
449 res = self.func(left.value, right.value)
450 else:
451 from pandas.core.computation.eval import eval
453 res = eval(self, local_dict=env, engine=engine, parser=parser)
455 name = env.add_tmp(res)
456 return term_type(name, env=env)
458 def convert_values(self) -> None:
459 """
460 Convert datetimes to a comparable value in an expression.
461 """
463 def stringify(value):
464 encoder: Callable
465 if self.encoding is not None:
466 encoder = partial(pprint_thing_encoded, encoding=self.encoding)
467 else:
468 encoder = pprint_thing
469 return encoder(value)
471 lhs, rhs = self.lhs, self.rhs
473 if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar:
474 v = rhs.value
475 if isinstance(v, (int, float)):
476 v = stringify(v)
477 v = Timestamp(ensure_decoded(v))
478 if v.tz is not None:
479 v = v.tz_convert("UTC")
480 self.rhs.update(v)
482 if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar:
483 v = lhs.value
484 if isinstance(v, (int, float)):
485 v = stringify(v)
486 v = Timestamp(ensure_decoded(v))
487 if v.tz is not None:
488 v = v.tz_convert("UTC")
489 self.lhs.update(v)
491 def _disallow_scalar_only_bool_ops(self):
492 rhs = self.rhs
493 lhs = self.lhs
495 # GH#24883 unwrap dtype if necessary to ensure we have a type object
496 rhs_rt = rhs.return_type
497 rhs_rt = getattr(rhs_rt, "type", rhs_rt)
498 lhs_rt = lhs.return_type
499 lhs_rt = getattr(lhs_rt, "type", lhs_rt)
500 if (
501 (lhs.is_scalar or rhs.is_scalar)
502 and self.op in _bool_ops_dict
503 and (
504 not (
505 issubclass(rhs_rt, (bool, np.bool_))
506 and issubclass(lhs_rt, (bool, np.bool_))
507 )
508 )
509 ):
510 raise NotImplementedError("cannot evaluate scalar only bool ops")
513def isnumeric(dtype) -> bool:
514 return issubclass(np.dtype(dtype).type, np.number)
517class Div(BinOp):
518 """
519 Div operator to special case casting.
521 Parameters
522 ----------
523 lhs, rhs : Term or Op
524 The Terms or Ops in the ``/`` expression.
525 """
527 def __init__(self, lhs, rhs) -> None:
528 super().__init__("/", lhs, rhs)
530 if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type):
531 raise TypeError(
532 f"unsupported operand type(s) for {self.op}: "
533 f"'{lhs.return_type}' and '{rhs.return_type}'"
534 )
536 # do not upcast float32s to float64 un-necessarily
537 acceptable_dtypes = [np.float32, np.float_]
538 _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_)
541UNARY_OPS_SYMS = ("+", "-", "~", "not")
542_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert)
543_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs))
546class UnaryOp(Op):
547 """
548 Hold a unary operator and its operands.
550 Parameters
551 ----------
552 op : str
553 The token used to represent the operator.
554 operand : Term or Op
555 The Term or Op operand to the operator.
557 Raises
558 ------
559 ValueError
560 * If no function associated with the passed operator token is found.
561 """
563 def __init__(self, op: Literal["+", "-", "~", "not"], operand) -> None:
564 super().__init__(op, (operand,))
565 self.operand = operand
567 try:
568 self.func = _unary_ops_dict[op]
569 except KeyError as err:
570 raise ValueError(
571 f"Invalid unary operator {repr(op)}, "
572 f"valid operators are {UNARY_OPS_SYMS}"
573 ) from err
575 def __call__(self, env) -> MathCall:
576 operand = self.operand(env)
577 # error: Cannot call function of unknown type
578 return self.func(operand) # type: ignore[operator]
580 def __repr__(self) -> str:
581 return pprint_thing(f"{self.op}({self.operand})")
583 @property
584 def return_type(self) -> np.dtype:
585 operand = self.operand
586 if operand.return_type == np.dtype("bool"):
587 return np.dtype("bool")
588 if isinstance(operand, Op) and (
589 operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict
590 ):
591 return np.dtype("bool")
592 return np.dtype("int")
595class MathCall(Op):
596 def __init__(self, func, args) -> None:
597 super().__init__(func.name, args)
598 self.func = func
600 def __call__(self, env):
601 # error: "Op" not callable
602 operands = [op(env) for op in self.operands] # type: ignore[operator]
603 with np.errstate(all="ignore"):
604 return self.func.func(*operands)
606 def __repr__(self) -> str:
607 operands = map(str, self.operands)
608 return pprint_thing(f"{self.op}({','.join(operands)})")
611class FuncNode:
612 def __init__(self, name: str) -> None:
613 if name not in MATHOPS:
614 raise ValueError(f'"{name}" is not a supported function')
615 self.name = name
616 self.func = getattr(np, name)
618 def __call__(self, *args):
619 return MathCall(self, args)