Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/computation/ops.py: 35%

293 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Operator classes for eval. 

3""" 

4 

5from __future__ import annotations 

6 

7from datetime import datetime 

8from functools import partial 

9import operator 

10from typing import ( 

11 Callable, 

12 Iterable, 

13 Literal, 

14) 

15 

16import numpy as np 

17 

18from pandas._libs.tslibs import Timestamp 

19 

20from pandas.core.dtypes.common import ( 

21 is_list_like, 

22 is_scalar, 

23) 

24 

25import pandas.core.common as com 

26from pandas.core.computation.common import ( 

27 ensure_decoded, 

28 result_type_many, 

29) 

30from pandas.core.computation.scope import DEFAULT_GLOBALS 

31 

32from pandas.io.formats.printing import ( 

33 pprint_thing, 

34 pprint_thing_encoded, 

35) 

36 

37REDUCTIONS = ("sum", "prod") 

38 

39_unary_math_ops = ( 

40 "sin", 

41 "cos", 

42 "exp", 

43 "log", 

44 "expm1", 

45 "log1p", 

46 "sqrt", 

47 "sinh", 

48 "cosh", 

49 "tanh", 

50 "arcsin", 

51 "arccos", 

52 "arctan", 

53 "arccosh", 

54 "arcsinh", 

55 "arctanh", 

56 "abs", 

57 "log10", 

58 "floor", 

59 "ceil", 

60) 

61_binary_math_ops = ("arctan2",) 

62 

63MATHOPS = _unary_math_ops + _binary_math_ops 

64 

65 

66LOCAL_TAG = "__pd_eval_local_" 

67 

68 

69class Term: 

70 def __new__(cls, name, env, side=None, encoding=None): 

71 klass = Constant if not isinstance(name, str) else cls 

72 # error: Argument 2 for "super" not an instance of argument 1 

73 supr_new = super(Term, klass).__new__ # type: ignore[misc] 

74 return supr_new(klass) 

75 

76 is_local: bool 

77 

78 def __init__(self, name, env, side=None, encoding=None) -> None: 

79 # name is a str for Term, but may be something else for subclasses 

80 self._name = name 

81 self.env = env 

82 self.side = side 

83 tname = str(name) 

84 self.is_local = tname.startswith(LOCAL_TAG) or tname in DEFAULT_GLOBALS 

85 self._value = self._resolve_name() 

86 self.encoding = encoding 

87 

88 @property 

89 def local_name(self) -> str: 

90 return self.name.replace(LOCAL_TAG, "") 

91 

92 def __repr__(self) -> str: 

93 return pprint_thing(self.name) 

94 

95 def __call__(self, *args, **kwargs): 

96 return self.value 

97 

98 def evaluate(self, *args, **kwargs) -> Term: 

99 return self 

100 

101 def _resolve_name(self): 

102 local_name = str(self.local_name) 

103 is_local = self.is_local 

104 if local_name in self.env.scope and isinstance( 

105 self.env.scope[local_name], type 

106 ): 

107 is_local = False 

108 

109 res = self.env.resolve(local_name, is_local=is_local) 

110 self.update(res) 

111 

112 if hasattr(res, "ndim") and res.ndim > 2: 

113 raise NotImplementedError( 

114 "N-dimensional objects, where N > 2, are not supported with eval" 

115 ) 

116 return res 

117 

118 def update(self, value) -> None: 

119 """ 

120 search order for local (i.e., @variable) variables: 

121 

122 scope, key_variable 

123 [('locals', 'local_name'), 

124 ('globals', 'local_name'), 

125 ('locals', 'key'), 

126 ('globals', 'key')] 

127 """ 

128 key = self.name 

129 

130 # if it's a variable name (otherwise a constant) 

131 if isinstance(key, str): 

132 self.env.swapkey(self.local_name, key, new_value=value) 

133 

134 self.value = value 

135 

136 @property 

137 def is_scalar(self) -> bool: 

138 return is_scalar(self._value) 

139 

140 @property 

141 def type(self): 

142 try: 

143 # potentially very slow for large, mixed dtype frames 

144 return self._value.values.dtype 

145 except AttributeError: 

146 try: 

147 # ndarray 

148 return self._value.dtype 

149 except AttributeError: 

150 # scalar 

151 return type(self._value) 

152 

153 return_type = type 

154 

155 @property 

156 def raw(self) -> str: 

157 return f"{type(self).__name__}(name={repr(self.name)}, type={self.type})" 

158 

159 @property 

160 def is_datetime(self) -> bool: 

161 try: 

162 t = self.type.type 

163 except AttributeError: 

164 t = self.type 

165 

166 return issubclass(t, (datetime, np.datetime64)) 

167 

168 @property 

169 def value(self): 

170 return self._value 

171 

172 @value.setter 

173 def value(self, new_value): 

174 self._value = new_value 

175 

176 @property 

177 def name(self): 

178 return self._name 

179 

180 @property 

181 def ndim(self) -> int: 

182 return self._value.ndim 

183 

184 

185class Constant(Term): 

186 def __init__(self, value, env, side=None, encoding=None) -> None: 

187 super().__init__(value, env, side=side, encoding=encoding) 

188 

189 def _resolve_name(self): 

190 return self._name 

191 

192 @property 

193 def name(self): 

194 return self.value 

195 

196 def __repr__(self) -> str: 

197 # in python 2 str() of float 

198 # can truncate shorter than repr() 

199 return repr(self.name) 

200 

201 

202_bool_op_map = {"not": "~", "and": "&", "or": "|"} 

203 

204 

205class Op: 

206 """ 

207 Hold an operator of arbitrary arity. 

208 """ 

209 

210 op: str 

211 

212 def __init__(self, op: str, operands: Iterable[Term | Op], encoding=None) -> None: 

213 self.op = _bool_op_map.get(op, op) 

214 self.operands = operands 

215 self.encoding = encoding 

216 

217 def __iter__(self): 

218 return iter(self.operands) 

219 

220 def __repr__(self) -> str: 

221 """ 

222 Print a generic n-ary operator and its operands using infix notation. 

223 """ 

224 # recurse over the operands 

225 parened = (f"({pprint_thing(opr)})" for opr in self.operands) 

226 return pprint_thing(f" {self.op} ".join(parened)) 

227 

228 @property 

229 def return_type(self): 

230 # clobber types to bool if the op is a boolean operator 

231 if self.op in (CMP_OPS_SYMS + BOOL_OPS_SYMS): 

232 return np.bool_ 

233 return result_type_many(*(term.type for term in com.flatten(self))) 

234 

235 @property 

236 def has_invalid_return_type(self) -> bool: 

237 types = self.operand_types 

238 obj_dtype_set = frozenset([np.dtype("object")]) 

239 return self.return_type == object and types - obj_dtype_set 

240 

241 @property 

242 def operand_types(self): 

243 return frozenset(term.type for term in com.flatten(self)) 

244 

245 @property 

246 def is_scalar(self) -> bool: 

247 return all(operand.is_scalar for operand in self.operands) 

248 

249 @property 

250 def is_datetime(self) -> bool: 

251 try: 

252 t = self.return_type.type 

253 except AttributeError: 

254 t = self.return_type 

255 

256 return issubclass(t, (datetime, np.datetime64)) 

257 

258 

259def _in(x, y): 

260 """ 

261 Compute the vectorized membership of ``x in y`` if possible, otherwise 

262 use Python. 

263 """ 

264 try: 

265 return x.isin(y) 

266 except AttributeError: 

267 if is_list_like(x): 

268 try: 

269 return y.isin(x) 

270 except AttributeError: 

271 pass 

272 return x in y 

273 

274 

275def _not_in(x, y): 

276 """ 

277 Compute the vectorized membership of ``x not in y`` if possible, 

278 otherwise use Python. 

279 """ 

280 try: 

281 return ~x.isin(y) 

282 except AttributeError: 

283 if is_list_like(x): 

284 try: 

285 return ~y.isin(x) 

286 except AttributeError: 

287 pass 

288 return x not in y 

289 

290 

291CMP_OPS_SYMS = (">", "<", ">=", "<=", "==", "!=", "in", "not in") 

292_cmp_ops_funcs = ( 

293 operator.gt, 

294 operator.lt, 

295 operator.ge, 

296 operator.le, 

297 operator.eq, 

298 operator.ne, 

299 _in, 

300 _not_in, 

301) 

302_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs)) 

303 

304BOOL_OPS_SYMS = ("&", "|", "and", "or") 

305_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_) 

306_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs)) 

307 

308ARITH_OPS_SYMS = ("+", "-", "*", "/", "**", "//", "%") 

309_arith_ops_funcs = ( 

310 operator.add, 

311 operator.sub, 

312 operator.mul, 

313 operator.truediv, 

314 operator.pow, 

315 operator.floordiv, 

316 operator.mod, 

317) 

318_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs)) 

319 

320SPECIAL_CASE_ARITH_OPS_SYMS = ("**", "//", "%") 

321_special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod) 

322_special_case_arith_ops_dict = dict( 

323 zip(SPECIAL_CASE_ARITH_OPS_SYMS, _special_case_arith_ops_funcs) 

324) 

325 

326_binary_ops_dict = {} 

327 

328for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict): 

329 _binary_ops_dict.update(d) 

330 

331 

332def _cast_inplace(terms, acceptable_dtypes, dtype): 

333 """ 

334 Cast an expression inplace. 

335 

336 Parameters 

337 ---------- 

338 terms : Op 

339 The expression that should cast. 

340 acceptable_dtypes : list of acceptable numpy.dtype 

341 Will not cast if term's dtype in this list. 

342 dtype : str or numpy.dtype 

343 The dtype to cast to. 

344 """ 

345 dt = np.dtype(dtype) 

346 for term in terms: 

347 if term.type in acceptable_dtypes: 

348 continue 

349 

350 try: 

351 new_value = term.value.astype(dt) 

352 except AttributeError: 

353 new_value = dt.type(term.value) 

354 term.update(new_value) 

355 

356 

357def is_term(obj) -> bool: 

358 return isinstance(obj, Term) 

359 

360 

361class BinOp(Op): 

362 """ 

363 Hold a binary operator and its operands. 

364 

365 Parameters 

366 ---------- 

367 op : str 

368 lhs : Term or Op 

369 rhs : Term or Op 

370 """ 

371 

372 def __init__(self, op: str, lhs, rhs) -> None: 

373 super().__init__(op, (lhs, rhs)) 

374 self.lhs = lhs 

375 self.rhs = rhs 

376 

377 self._disallow_scalar_only_bool_ops() 

378 

379 self.convert_values() 

380 

381 try: 

382 self.func = _binary_ops_dict[op] 

383 except KeyError as err: 

384 # has to be made a list for python3 

385 keys = list(_binary_ops_dict.keys()) 

386 raise ValueError( 

387 f"Invalid binary operator {repr(op)}, valid operators are {keys}" 

388 ) from err 

389 

390 def __call__(self, env): 

391 """ 

392 Recursively evaluate an expression in Python space. 

393 

394 Parameters 

395 ---------- 

396 env : Scope 

397 

398 Returns 

399 ------- 

400 object 

401 The result of an evaluated expression. 

402 """ 

403 # recurse over the left/right nodes 

404 left = self.lhs(env) 

405 right = self.rhs(env) 

406 

407 return self.func(left, right) 

408 

409 def evaluate(self, env, engine: str, parser, term_type, eval_in_python): 

410 """ 

411 Evaluate a binary operation *before* being passed to the engine. 

412 

413 Parameters 

414 ---------- 

415 env : Scope 

416 engine : str 

417 parser : str 

418 term_type : type 

419 eval_in_python : list 

420 

421 Returns 

422 ------- 

423 term_type 

424 The "pre-evaluated" expression as an instance of ``term_type`` 

425 """ 

426 if engine == "python": 

427 res = self(env) 

428 else: 

429 # recurse over the left/right nodes 

430 

431 left = self.lhs.evaluate( 

432 env, 

433 engine=engine, 

434 parser=parser, 

435 term_type=term_type, 

436 eval_in_python=eval_in_python, 

437 ) 

438 

439 right = self.rhs.evaluate( 

440 env, 

441 engine=engine, 

442 parser=parser, 

443 term_type=term_type, 

444 eval_in_python=eval_in_python, 

445 ) 

446 

447 # base cases 

448 if self.op in eval_in_python: 

449 res = self.func(left.value, right.value) 

450 else: 

451 from pandas.core.computation.eval import eval 

452 

453 res = eval(self, local_dict=env, engine=engine, parser=parser) 

454 

455 name = env.add_tmp(res) 

456 return term_type(name, env=env) 

457 

458 def convert_values(self) -> None: 

459 """ 

460 Convert datetimes to a comparable value in an expression. 

461 """ 

462 

463 def stringify(value): 

464 encoder: Callable 

465 if self.encoding is not None: 

466 encoder = partial(pprint_thing_encoded, encoding=self.encoding) 

467 else: 

468 encoder = pprint_thing 

469 return encoder(value) 

470 

471 lhs, rhs = self.lhs, self.rhs 

472 

473 if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar: 

474 v = rhs.value 

475 if isinstance(v, (int, float)): 

476 v = stringify(v) 

477 v = Timestamp(ensure_decoded(v)) 

478 if v.tz is not None: 

479 v = v.tz_convert("UTC") 

480 self.rhs.update(v) 

481 

482 if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar: 

483 v = lhs.value 

484 if isinstance(v, (int, float)): 

485 v = stringify(v) 

486 v = Timestamp(ensure_decoded(v)) 

487 if v.tz is not None: 

488 v = v.tz_convert("UTC") 

489 self.lhs.update(v) 

490 

491 def _disallow_scalar_only_bool_ops(self): 

492 rhs = self.rhs 

493 lhs = self.lhs 

494 

495 # GH#24883 unwrap dtype if necessary to ensure we have a type object 

496 rhs_rt = rhs.return_type 

497 rhs_rt = getattr(rhs_rt, "type", rhs_rt) 

498 lhs_rt = lhs.return_type 

499 lhs_rt = getattr(lhs_rt, "type", lhs_rt) 

500 if ( 

501 (lhs.is_scalar or rhs.is_scalar) 

502 and self.op in _bool_ops_dict 

503 and ( 

504 not ( 

505 issubclass(rhs_rt, (bool, np.bool_)) 

506 and issubclass(lhs_rt, (bool, np.bool_)) 

507 ) 

508 ) 

509 ): 

510 raise NotImplementedError("cannot evaluate scalar only bool ops") 

511 

512 

513def isnumeric(dtype) -> bool: 

514 return issubclass(np.dtype(dtype).type, np.number) 

515 

516 

517class Div(BinOp): 

518 """ 

519 Div operator to special case casting. 

520 

521 Parameters 

522 ---------- 

523 lhs, rhs : Term or Op 

524 The Terms or Ops in the ``/`` expression. 

525 """ 

526 

527 def __init__(self, lhs, rhs) -> None: 

528 super().__init__("/", lhs, rhs) 

529 

530 if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type): 

531 raise TypeError( 

532 f"unsupported operand type(s) for {self.op}: " 

533 f"'{lhs.return_type}' and '{rhs.return_type}'" 

534 ) 

535 

536 # do not upcast float32s to float64 un-necessarily 

537 acceptable_dtypes = [np.float32, np.float_] 

538 _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_) 

539 

540 

541UNARY_OPS_SYMS = ("+", "-", "~", "not") 

542_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert) 

543_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs)) 

544 

545 

546class UnaryOp(Op): 

547 """ 

548 Hold a unary operator and its operands. 

549 

550 Parameters 

551 ---------- 

552 op : str 

553 The token used to represent the operator. 

554 operand : Term or Op 

555 The Term or Op operand to the operator. 

556 

557 Raises 

558 ------ 

559 ValueError 

560 * If no function associated with the passed operator token is found. 

561 """ 

562 

563 def __init__(self, op: Literal["+", "-", "~", "not"], operand) -> None: 

564 super().__init__(op, (operand,)) 

565 self.operand = operand 

566 

567 try: 

568 self.func = _unary_ops_dict[op] 

569 except KeyError as err: 

570 raise ValueError( 

571 f"Invalid unary operator {repr(op)}, " 

572 f"valid operators are {UNARY_OPS_SYMS}" 

573 ) from err 

574 

575 def __call__(self, env) -> MathCall: 

576 operand = self.operand(env) 

577 # error: Cannot call function of unknown type 

578 return self.func(operand) # type: ignore[operator] 

579 

580 def __repr__(self) -> str: 

581 return pprint_thing(f"{self.op}({self.operand})") 

582 

583 @property 

584 def return_type(self) -> np.dtype: 

585 operand = self.operand 

586 if operand.return_type == np.dtype("bool"): 

587 return np.dtype("bool") 

588 if isinstance(operand, Op) and ( 

589 operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict 

590 ): 

591 return np.dtype("bool") 

592 return np.dtype("int") 

593 

594 

595class MathCall(Op): 

596 def __init__(self, func, args) -> None: 

597 super().__init__(func.name, args) 

598 self.func = func 

599 

600 def __call__(self, env): 

601 # error: "Op" not callable 

602 operands = [op(env) for op in self.operands] # type: ignore[operator] 

603 with np.errstate(all="ignore"): 

604 return self.func.func(*operands) 

605 

606 def __repr__(self) -> str: 

607 operands = map(str, self.operands) 

608 return pprint_thing(f"{self.op}({','.join(operands)})") 

609 

610 

611class FuncNode: 

612 def __init__(self, name: str) -> None: 

613 if name not in MATHOPS: 

614 raise ValueError(f'"{name}" is not a supported function') 

615 self.name = name 

616 self.func = getattr(np, name) 

617 

618 def __call__(self, *args): 

619 return MathCall(self, args)