Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/computation/eval.py: 13%

110 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Top level ``eval`` module. 

3""" 

4from __future__ import annotations 

5 

6import tokenize 

7from typing import TYPE_CHECKING 

8import warnings 

9 

10from pandas._libs.lib import no_default 

11from pandas.util._exceptions import find_stack_level 

12from pandas.util._validators import validate_bool_kwarg 

13 

14from pandas.core.computation.engines import ENGINES 

15from pandas.core.computation.expr import ( 

16 PARSERS, 

17 Expr, 

18) 

19from pandas.core.computation.parsing import tokenize_string 

20from pandas.core.computation.scope import ensure_scope 

21from pandas.core.generic import NDFrame 

22 

23from pandas.io.formats.printing import pprint_thing 

24 

25if TYPE_CHECKING: 25 ↛ 26line 25 didn't jump to line 26, because the condition on line 25 was never true

26 from pandas.core.computation.ops import BinOp 

27 

28 

29def _check_engine(engine: str | None) -> str: 

30 """ 

31 Make sure a valid engine is passed. 

32 

33 Parameters 

34 ---------- 

35 engine : str 

36 String to validate. 

37 

38 Raises 

39 ------ 

40 KeyError 

41 * If an invalid engine is passed. 

42 ImportError 

43 * If numexpr was requested but doesn't exist. 

44 

45 Returns 

46 ------- 

47 str 

48 Engine name. 

49 """ 

50 from pandas.core.computation.check import NUMEXPR_INSTALLED 

51 from pandas.core.computation.expressions import USE_NUMEXPR 

52 

53 if engine is None: 

54 engine = "numexpr" if USE_NUMEXPR else "python" 

55 

56 if engine not in ENGINES: 

57 valid_engines = list(ENGINES.keys()) 

58 raise KeyError( 

59 f"Invalid engine '{engine}' passed, valid engines are {valid_engines}" 

60 ) 

61 

62 # TODO: validate this in a more general way (thinking of future engines 

63 # that won't necessarily be import-able) 

64 # Could potentially be done on engine instantiation 

65 if engine == "numexpr" and not NUMEXPR_INSTALLED: 

66 raise ImportError( 

67 "'numexpr' is not installed or an unsupported version. Cannot use " 

68 "engine='numexpr' for query/eval if 'numexpr' is not installed" 

69 ) 

70 

71 return engine 

72 

73 

74def _check_parser(parser: str): 

75 """ 

76 Make sure a valid parser is passed. 

77 

78 Parameters 

79 ---------- 

80 parser : str 

81 

82 Raises 

83 ------ 

84 KeyError 

85 * If an invalid parser is passed 

86 """ 

87 if parser not in PARSERS: 

88 raise KeyError( 

89 f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}" 

90 ) 

91 

92 

93def _check_resolvers(resolvers): 

94 if resolvers is not None: 

95 for resolver in resolvers: 

96 if not hasattr(resolver, "__getitem__"): 

97 name = type(resolver).__name__ 

98 raise TypeError( 

99 f"Resolver of type '{name}' does not " 

100 "implement the __getitem__ method" 

101 ) 

102 

103 

104def _check_expression(expr): 

105 """ 

106 Make sure an expression is not an empty string 

107 

108 Parameters 

109 ---------- 

110 expr : object 

111 An object that can be converted to a string 

112 

113 Raises 

114 ------ 

115 ValueError 

116 * If expr is an empty string 

117 """ 

118 if not expr: 

119 raise ValueError("expr cannot be an empty string") 

120 

121 

122def _convert_expression(expr) -> str: 

123 """ 

124 Convert an object to an expression. 

125 

126 This function converts an object to an expression (a unicode string) and 

127 checks to make sure it isn't empty after conversion. This is used to 

128 convert operators to their string representation for recursive calls to 

129 :func:`~pandas.eval`. 

130 

131 Parameters 

132 ---------- 

133 expr : object 

134 The object to be converted to a string. 

135 

136 Returns 

137 ------- 

138 str 

139 The string representation of an object. 

140 

141 Raises 

142 ------ 

143 ValueError 

144 * If the expression is empty. 

145 """ 

146 s = pprint_thing(expr) 

147 _check_expression(s) 

148 return s 

149 

150 

151def _check_for_locals(expr: str, stack_level: int, parser: str): 

152 

153 at_top_of_stack = stack_level == 0 

154 not_pandas_parser = parser != "pandas" 

155 

156 if not_pandas_parser: 

157 msg = "The '@' prefix is only supported by the pandas parser" 

158 elif at_top_of_stack: 

159 msg = ( 

160 "The '@' prefix is not allowed in top-level eval calls.\n" 

161 "please refer to your variables by name without the '@' prefix." 

162 ) 

163 

164 if at_top_of_stack or not_pandas_parser: 

165 for toknum, tokval in tokenize_string(expr): 

166 if toknum == tokenize.OP and tokval == "@": 

167 raise SyntaxError(msg) 

168 

169 

170def eval( 

171 expr: str | BinOp, # we leave BinOp out of the docstr bc it isn't for users 

172 parser: str = "pandas", 

173 engine: str | None = None, 

174 truediv=no_default, 

175 local_dict=None, 

176 global_dict=None, 

177 resolvers=(), 

178 level=0, 

179 target=None, 

180 inplace=False, 

181): 

182 """ 

183 Evaluate a Python expression as a string using various backends. 

184 

185 The following arithmetic operations are supported: ``+``, ``-``, ``*``, 

186 ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following 

187 boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not). 

188 Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`, 

189 :keyword:`or`, and :keyword:`not` with the same semantics as the 

190 corresponding bitwise operators. :class:`~pandas.Series` and 

191 :class:`~pandas.DataFrame` objects are supported and behave as they would 

192 with plain ol' Python evaluation. 

193 

194 Parameters 

195 ---------- 

196 expr : str 

197 The expression to evaluate. This string cannot contain any Python 

198 `statements 

199 <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__, 

200 only Python `expressions 

201 <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__. 

202 parser : {'pandas', 'python'}, default 'pandas' 

203 The parser to use to construct the syntax tree from the expression. The 

204 default of ``'pandas'`` parses code slightly different than standard 

205 Python. Alternatively, you can parse an expression using the 

206 ``'python'`` parser to retain strict Python semantics. See the 

207 :ref:`enhancing performance <enhancingperf.eval>` documentation for 

208 more details. 

209 engine : {'python', 'numexpr'}, default 'numexpr' 

210 

211 The engine used to evaluate the expression. Supported engines are 

212 

213 - None : tries to use ``numexpr``, falls back to ``python`` 

214 - ``'numexpr'`` : This default engine evaluates pandas objects using 

215 numexpr for large speed ups in complex expressions with large frames. 

216 - ``'python'`` : Performs operations as if you had ``eval``'d in top 

217 level python. This engine is generally not that useful. 

218 

219 More backends may be available in the future. 

220 

221 truediv : bool, optional 

222 Whether to use true division, like in Python >= 3. 

223 

224 .. deprecated:: 1.0.0 

225 

226 local_dict : dict or None, optional 

227 A dictionary of local variables, taken from locals() by default. 

228 global_dict : dict or None, optional 

229 A dictionary of global variables, taken from globals() by default. 

230 resolvers : list of dict-like or None, optional 

231 A list of objects implementing the ``__getitem__`` special method that 

232 you can use to inject an additional collection of namespaces to use for 

233 variable lookup. For example, this is used in the 

234 :meth:`~DataFrame.query` method to inject the 

235 ``DataFrame.index`` and ``DataFrame.columns`` 

236 variables that refer to their respective :class:`~pandas.DataFrame` 

237 instance attributes. 

238 level : int, optional 

239 The number of prior stack frames to traverse and add to the current 

240 scope. Most users will **not** need to change this parameter. 

241 target : object, optional, default None 

242 This is the target object for assignment. It is used when there is 

243 variable assignment in the expression. If so, then `target` must 

244 support item assignment with string keys, and if a copy is being 

245 returned, it must also support `.copy()`. 

246 inplace : bool, default False 

247 If `target` is provided, and the expression mutates `target`, whether 

248 to modify `target` inplace. Otherwise, return a copy of `target` with 

249 the mutation. 

250 

251 Returns 

252 ------- 

253 ndarray, numeric scalar, DataFrame, Series, or None 

254 The completion value of evaluating the given code or None if ``inplace=True``. 

255 

256 Raises 

257 ------ 

258 ValueError 

259 There are many instances where such an error can be raised: 

260 

261 - `target=None`, but the expression is multiline. 

262 - The expression is multiline, but not all them have item assignment. 

263 An example of such an arrangement is this: 

264 

265 a = b + 1 

266 a + 2 

267 

268 Here, there are expressions on different lines, making it multiline, 

269 but the last line has no variable assigned to the output of `a + 2`. 

270 - `inplace=True`, but the expression is missing item assignment. 

271 - Item assignment is provided, but the `target` does not support 

272 string item assignment. 

273 - Item assignment is provided and `inplace=False`, but the `target` 

274 does not support the `.copy()` method 

275 

276 See Also 

277 -------- 

278 DataFrame.query : Evaluates a boolean expression to query the columns 

279 of a frame. 

280 DataFrame.eval : Evaluate a string describing operations on 

281 DataFrame columns. 

282 

283 Notes 

284 ----- 

285 The ``dtype`` of any objects involved in an arithmetic ``%`` operation are 

286 recursively cast to ``float64``. 

287 

288 See the :ref:`enhancing performance <enhancingperf.eval>` documentation for 

289 more details. 

290 

291 Examples 

292 -------- 

293 >>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]}) 

294 >>> df 

295 animal age 

296 0 dog 10 

297 1 pig 20 

298 

299 We can add a new column using ``pd.eval``: 

300 

301 >>> pd.eval("double_age = df.age * 2", target=df) 

302 animal age double_age 

303 0 dog 10 20 

304 1 pig 20 40 

305 """ 

306 inplace = validate_bool_kwarg(inplace, "inplace") 

307 

308 if truediv is not no_default: 

309 warnings.warn( 

310 ( 

311 "The `truediv` parameter in pd.eval is deprecated and " 

312 "will be removed in a future version." 

313 ), 

314 FutureWarning, 

315 stacklevel=find_stack_level(), 

316 ) 

317 

318 exprs: list[str | BinOp] 

319 if isinstance(expr, str): 

320 _check_expression(expr) 

321 exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""] 

322 else: 

323 # ops.BinOp; for internal compat, not intended to be passed by users 

324 exprs = [expr] 

325 multi_line = len(exprs) > 1 

326 

327 if multi_line and target is None: 

328 raise ValueError( 

329 "multi-line expressions are only valid in the " 

330 "context of data, use DataFrame.eval" 

331 ) 

332 engine = _check_engine(engine) 

333 _check_parser(parser) 

334 _check_resolvers(resolvers) 

335 

336 ret = None 

337 first_expr = True 

338 target_modified = False 

339 

340 for expr in exprs: 

341 expr = _convert_expression(expr) 

342 _check_for_locals(expr, level, parser) 

343 

344 # get our (possibly passed-in) scope 

345 env = ensure_scope( 

346 level + 1, 

347 global_dict=global_dict, 

348 local_dict=local_dict, 

349 resolvers=resolvers, 

350 target=target, 

351 ) 

352 

353 parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) 

354 

355 # construct the engine and evaluate the parsed expression 

356 eng = ENGINES[engine] 

357 eng_inst = eng(parsed_expr) 

358 ret = eng_inst.evaluate() 

359 

360 if parsed_expr.assigner is None: 

361 if multi_line: 

362 raise ValueError( 

363 "Multi-line expressions are only valid " 

364 "if all expressions contain an assignment" 

365 ) 

366 elif inplace: 

367 raise ValueError("Cannot operate inplace if there is no assignment") 

368 

369 # assign if needed 

370 assigner = parsed_expr.assigner 

371 if env.target is not None and assigner is not None: 

372 target_modified = True 

373 

374 # if returning a copy, copy only on the first assignment 

375 if not inplace and first_expr: 

376 try: 

377 target = env.target.copy() 

378 except AttributeError as err: 

379 raise ValueError("Cannot return a copy of the target") from err 

380 else: 

381 target = env.target 

382 

383 # TypeError is most commonly raised (e.g. int, list), but you 

384 # get IndexError if you try to do this assignment on np.ndarray. 

385 # we will ignore numpy warnings here; e.g. if trying 

386 # to use a non-numeric indexer 

387 try: 

388 with warnings.catch_warnings(record=True): 

389 # TODO: Filter the warnings we actually care about here. 

390 if inplace and isinstance(target, NDFrame): 

391 target.loc[:, assigner] = ret 

392 else: 

393 target[assigner] = ret 

394 except (TypeError, IndexError) as err: 

395 raise ValueError("Cannot assign expression output to target") from err 

396 

397 if not resolvers: 

398 resolvers = ({assigner: ret},) 

399 else: 

400 # existing resolver needs updated to handle 

401 # case of mutating existing column in copy 

402 for resolver in resolvers: 

403 if assigner in resolver: 

404 resolver[assigner] = ret 

405 break 

406 else: 

407 resolvers += ({assigner: ret},) 

408 

409 ret = None 

410 first_expr = False 

411 

412 # We want to exclude `inplace=None` as being False. 

413 if inplace is False: 

414 return target if target_modified else ret