Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/computation/pytables.py: 23%
353 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1""" manage PyTables query interface via Expressions """
2from __future__ import annotations
4import ast
5from functools import partial
6from typing import (
7 TYPE_CHECKING,
8 Any,
9)
11import numpy as np
13from pandas._libs.tslibs import (
14 Timedelta,
15 Timestamp,
16)
17from pandas._typing import npt
18from pandas.errors import UndefinedVariableError
20from pandas.core.dtypes.common import is_list_like
22import pandas.core.common as com
23from pandas.core.computation import (
24 expr,
25 ops,
26 scope as _scope,
27)
28from pandas.core.computation.common import ensure_decoded
29from pandas.core.computation.expr import BaseExprVisitor
30from pandas.core.computation.ops import is_term
31from pandas.core.construction import extract_array
32from pandas.core.indexes.base import Index
34from pandas.io.formats.printing import (
35 pprint_thing,
36 pprint_thing_encoded,
37)
39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true
40 from pandas.compat.chainmap import DeepChainMap
43class PyTablesScope(_scope.Scope):
44 __slots__ = ("queryables",)
46 queryables: dict[str, Any]
48 def __init__(
49 self,
50 level: int,
51 global_dict=None,
52 local_dict=None,
53 queryables: dict[str, Any] | None = None,
54 ) -> None:
55 super().__init__(level + 1, global_dict=global_dict, local_dict=local_dict)
56 self.queryables = queryables or {}
59class Term(ops.Term):
60 env: PyTablesScope
62 def __new__(cls, name, env, side=None, encoding=None):
63 if isinstance(name, str):
64 klass = cls
65 else:
66 klass = Constant
67 return object.__new__(klass)
69 def __init__(self, name, env: PyTablesScope, side=None, encoding=None) -> None:
70 super().__init__(name, env, side=side, encoding=encoding)
72 def _resolve_name(self):
73 # must be a queryables
74 if self.side == "left":
75 # Note: The behavior of __new__ ensures that self.name is a str here
76 if self.name not in self.env.queryables:
77 raise NameError(f"name {repr(self.name)} is not defined")
78 return self.name
80 # resolve the rhs (and allow it to be None)
81 try:
82 return self.env.resolve(self.name, is_local=False)
83 except UndefinedVariableError:
84 return self.name
86 # read-only property overwriting read/write property
87 @property # type: ignore[misc]
88 def value(self):
89 return self._value
92class Constant(Term):
93 def __init__(self, value, env: PyTablesScope, side=None, encoding=None) -> None:
94 assert isinstance(env, PyTablesScope), type(env)
95 super().__init__(value, env, side=side, encoding=encoding)
97 def _resolve_name(self):
98 return self._name
101class BinOp(ops.BinOp):
103 _max_selectors = 31
105 op: str
106 queryables: dict[str, Any]
107 condition: str | None
109 def __init__(self, op: str, lhs, rhs, queryables: dict[str, Any], encoding) -> None:
110 super().__init__(op, lhs, rhs)
111 self.queryables = queryables
112 self.encoding = encoding
113 self.condition = None
115 def _disallow_scalar_only_bool_ops(self):
116 pass
118 def prune(self, klass):
119 def pr(left, right):
120 """create and return a new specialized BinOp from myself"""
121 if left is None:
122 return right
123 elif right is None:
124 return left
126 k = klass
127 if isinstance(left, ConditionBinOp):
128 if isinstance(right, ConditionBinOp):
129 k = JointConditionBinOp
130 elif isinstance(left, k):
131 return left
132 elif isinstance(right, k):
133 return right
135 elif isinstance(left, FilterBinOp):
136 if isinstance(right, FilterBinOp):
137 k = JointFilterBinOp
138 elif isinstance(left, k):
139 return left
140 elif isinstance(right, k):
141 return right
143 return k(
144 self.op, left, right, queryables=self.queryables, encoding=self.encoding
145 ).evaluate()
147 left, right = self.lhs, self.rhs
149 if is_term(left) and is_term(right):
150 res = pr(left.value, right.value)
151 elif not is_term(left) and is_term(right):
152 res = pr(left.prune(klass), right.value)
153 elif is_term(left) and not is_term(right):
154 res = pr(left.value, right.prune(klass))
155 elif not (is_term(left) or is_term(right)):
156 res = pr(left.prune(klass), right.prune(klass))
158 return res
160 def conform(self, rhs):
161 """inplace conform rhs"""
162 if not is_list_like(rhs):
163 rhs = [rhs]
164 if isinstance(rhs, np.ndarray):
165 rhs = rhs.ravel()
166 return rhs
168 @property
169 def is_valid(self) -> bool:
170 """return True if this is a valid field"""
171 return self.lhs in self.queryables
173 @property
174 def is_in_table(self) -> bool:
175 """
176 return True if this is a valid column name for generation (e.g. an
177 actual column in the table)
178 """
179 return self.queryables.get(self.lhs) is not None
181 @property
182 def kind(self):
183 """the kind of my field"""
184 return getattr(self.queryables.get(self.lhs), "kind", None)
186 @property
187 def meta(self):
188 """the meta of my field"""
189 return getattr(self.queryables.get(self.lhs), "meta", None)
191 @property
192 def metadata(self):
193 """the metadata of my field"""
194 return getattr(self.queryables.get(self.lhs), "metadata", None)
196 def generate(self, v) -> str:
197 """create and return the op string for this TermValue"""
198 val = v.tostring(self.encoding)
199 return f"({self.lhs} {self.op} {val})"
201 def convert_value(self, v) -> TermValue:
202 """
203 convert the expression that is in the term to something that is
204 accepted by pytables
205 """
207 def stringify(value):
208 if self.encoding is not None:
209 return pprint_thing_encoded(value, encoding=self.encoding)
210 return pprint_thing(value)
212 kind = ensure_decoded(self.kind)
213 meta = ensure_decoded(self.meta)
214 if kind == "datetime64" or kind == "datetime":
215 if isinstance(v, (int, float)):
216 v = stringify(v)
217 v = ensure_decoded(v)
218 v = Timestamp(v)
219 if v.tz is not None:
220 v = v.tz_convert("UTC")
221 return TermValue(v, v.value, kind)
222 elif kind == "timedelta64" or kind == "timedelta":
223 if isinstance(v, str):
224 v = Timedelta(v).value
225 else:
226 v = Timedelta(v, unit="s").value
227 return TermValue(int(v), v, kind)
228 elif meta == "category":
229 metadata = extract_array(self.metadata, extract_numpy=True)
230 result: npt.NDArray[np.intp] | np.intp | int
231 if v not in metadata:
232 result = -1
233 else:
234 result = metadata.searchsorted(v, side="left")
235 return TermValue(result, result, "integer")
236 elif kind == "integer":
237 v = int(float(v))
238 return TermValue(v, v, kind)
239 elif kind == "float":
240 v = float(v)
241 return TermValue(v, v, kind)
242 elif kind == "bool":
243 if isinstance(v, str):
244 v = not v.strip().lower() in [
245 "false",
246 "f",
247 "no",
248 "n",
249 "none",
250 "0",
251 "[]",
252 "{}",
253 "",
254 ]
255 else:
256 v = bool(v)
257 return TermValue(v, v, kind)
258 elif isinstance(v, str):
259 # string quoting
260 return TermValue(v, stringify(v), "string")
261 else:
262 raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column")
264 def convert_values(self):
265 pass
268class FilterBinOp(BinOp):
269 filter: tuple[Any, Any, Index] | None = None
271 def __repr__(self) -> str:
272 if self.filter is None:
273 return "Filter: Not Initialized"
274 return pprint_thing(f"[Filter : [{self.filter[0]}] -> [{self.filter[1]}]")
276 def invert(self):
277 """invert the filter"""
278 if self.filter is not None:
279 self.filter = (
280 self.filter[0],
281 self.generate_filter_op(invert=True),
282 self.filter[2],
283 )
284 return self
286 def format(self):
287 """return the actual filter format"""
288 return [self.filter]
290 def evaluate(self):
292 if not self.is_valid:
293 raise ValueError(f"query term is not valid [{self}]")
295 rhs = self.conform(self.rhs)
296 values = list(rhs)
298 if self.is_in_table:
300 # if too many values to create the expression, use a filter instead
301 if self.op in ["==", "!="] and len(values) > self._max_selectors:
303 filter_op = self.generate_filter_op()
304 self.filter = (self.lhs, filter_op, Index(values))
306 return self
307 return None
309 # equality conditions
310 if self.op in ["==", "!="]:
312 filter_op = self.generate_filter_op()
313 self.filter = (self.lhs, filter_op, Index(values))
315 else:
316 raise TypeError(
317 f"passing a filterable condition to a non-table indexer [{self}]"
318 )
320 return self
322 def generate_filter_op(self, invert: bool = False):
323 if (self.op == "!=" and not invert) or (self.op == "==" and invert):
324 return lambda axis, vals: ~axis.isin(vals)
325 else:
326 return lambda axis, vals: axis.isin(vals)
329class JointFilterBinOp(FilterBinOp):
330 def format(self):
331 raise NotImplementedError("unable to collapse Joint Filters")
333 def evaluate(self):
334 return self
337class ConditionBinOp(BinOp):
338 def __repr__(self) -> str:
339 return pprint_thing(f"[Condition : [{self.condition}]]")
341 def invert(self):
342 """invert the condition"""
343 # if self.condition is not None:
344 # self.condition = "~(%s)" % self.condition
345 # return self
346 raise NotImplementedError(
347 "cannot use an invert condition when passing to numexpr"
348 )
350 def format(self):
351 """return the actual ne format"""
352 return self.condition
354 def evaluate(self):
356 if not self.is_valid:
357 raise ValueError(f"query term is not valid [{self}]")
359 # convert values if we are in the table
360 if not self.is_in_table:
361 return None
363 rhs = self.conform(self.rhs)
364 values = [self.convert_value(v) for v in rhs]
366 # equality conditions
367 if self.op in ["==", "!="]:
369 # too many values to create the expression?
370 if len(values) <= self._max_selectors:
371 vs = [self.generate(v) for v in values]
372 self.condition = f"({' | '.join(vs)})"
374 # use a filter after reading
375 else:
376 return None
377 else:
378 self.condition = self.generate(values[0])
380 return self
383class JointConditionBinOp(ConditionBinOp):
384 def evaluate(self):
385 self.condition = f"({self.lhs.condition} {self.op} {self.rhs.condition})"
386 return self
389class UnaryOp(ops.UnaryOp):
390 def prune(self, klass):
392 if self.op != "~":
393 raise NotImplementedError("UnaryOp only support invert type ops")
395 operand = self.operand
396 operand = operand.prune(klass)
398 if operand is not None and (
399 issubclass(klass, ConditionBinOp)
400 and operand.condition is not None
401 or not issubclass(klass, ConditionBinOp)
402 and issubclass(klass, FilterBinOp)
403 and operand.filter is not None
404 ):
405 return operand.invert()
406 return None
409class PyTablesExprVisitor(BaseExprVisitor):
410 const_type = Constant
411 term_type = Term
413 def __init__(self, env, engine, parser, **kwargs) -> None:
414 super().__init__(env, engine, parser)
415 for bin_op in self.binary_ops:
416 bin_node = self.binary_op_nodes_map[bin_op]
417 setattr(
418 self,
419 f"visit_{bin_node}",
420 lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs),
421 )
423 def visit_UnaryOp(self, node, **kwargs):
424 if isinstance(node.op, (ast.Not, ast.Invert)):
425 return UnaryOp("~", self.visit(node.operand))
426 elif isinstance(node.op, ast.USub):
427 return self.const_type(-self.visit(node.operand).value, self.env)
428 elif isinstance(node.op, ast.UAdd):
429 raise NotImplementedError("Unary addition not supported")
431 def visit_Index(self, node, **kwargs):
432 return self.visit(node.value).value
434 def visit_Assign(self, node, **kwargs):
435 cmpr = ast.Compare(
436 ops=[ast.Eq()], left=node.targets[0], comparators=[node.value]
437 )
438 return self.visit(cmpr)
440 def visit_Subscript(self, node, **kwargs):
441 # only allow simple subscripts
443 value = self.visit(node.value)
444 slobj = self.visit(node.slice)
445 try:
446 value = value.value
447 except AttributeError:
448 pass
450 if isinstance(slobj, Term):
451 # In py39 np.ndarray lookups with Term containing int raise
452 slobj = slobj.value
454 try:
455 return self.const_type(value[slobj], self.env)
456 except TypeError as err:
457 raise ValueError(
458 f"cannot subscript {repr(value)} with {repr(slobj)}"
459 ) from err
461 def visit_Attribute(self, node, **kwargs):
462 attr = node.attr
463 value = node.value
465 ctx = type(node.ctx)
466 if ctx == ast.Load:
467 # resolve the value
468 resolved = self.visit(value)
470 # try to get the value to see if we are another expression
471 try:
472 resolved = resolved.value
473 except (AttributeError):
474 pass
476 try:
477 return self.term_type(getattr(resolved, attr), self.env)
478 except AttributeError:
480 # something like datetime.datetime where scope is overridden
481 if isinstance(value, ast.Name) and value.id == attr:
482 return resolved
484 raise ValueError(f"Invalid Attribute context {ctx.__name__}")
486 def translate_In(self, op):
487 return ast.Eq() if isinstance(op, ast.In) else op
489 def _rewrite_membership_op(self, node, left, right):
490 return self.visit(node.op), node.op, left, right
493def _validate_where(w):
494 """
495 Validate that the where statement is of the right type.
497 The type may either be String, Expr, or list-like of Exprs.
499 Parameters
500 ----------
501 w : String term expression, Expr, or list-like of Exprs.
503 Returns
504 -------
505 where : The original where clause if the check was successful.
507 Raises
508 ------
509 TypeError : An invalid data type was passed in for w (e.g. dict).
510 """
511 if not (isinstance(w, (PyTablesExpr, str)) or is_list_like(w)):
512 raise TypeError(
513 "where must be passed as a string, PyTablesExpr, "
514 "or list-like of PyTablesExpr"
515 )
517 return w
520class PyTablesExpr(expr.Expr):
521 """
522 Hold a pytables-like expression, comprised of possibly multiple 'terms'.
524 Parameters
525 ----------
526 where : string term expression, PyTablesExpr, or list-like of PyTablesExprs
527 queryables : a "kinds" map (dict of column name -> kind), or None if column
528 is non-indexable
529 encoding : an encoding that will encode the query terms
531 Returns
532 -------
533 a PyTablesExpr object
535 Examples
536 --------
537 'index>=date'
538 "columns=['A', 'D']"
539 'columns=A'
540 'columns==A'
541 "~(columns=['A','B'])"
542 'index>df.index[3] & string="bar"'
543 '(index>df.index[3] & index<=df.index[6]) | string="bar"'
544 "ts>=Timestamp('2012-02-01')"
545 "major_axis>=20130101"
546 """
548 _visitor: PyTablesExprVisitor | None
549 env: PyTablesScope
550 expr: str
552 def __init__(
553 self,
554 where,
555 queryables: dict[str, Any] | None = None,
556 encoding=None,
557 scope_level: int = 0,
558 ) -> None:
560 where = _validate_where(where)
562 self.encoding = encoding
563 self.condition = None
564 self.filter = None
565 self.terms = None
566 self._visitor = None
568 # capture the environment if needed
569 local_dict: DeepChainMap[Any, Any] | None = None
571 if isinstance(where, PyTablesExpr):
572 local_dict = where.env.scope
573 _where = where.expr
575 elif is_list_like(where):
576 where = list(where)
577 for idx, w in enumerate(where):
578 if isinstance(w, PyTablesExpr):
579 local_dict = w.env.scope
580 else:
581 w = _validate_where(w)
582 where[idx] = w
583 _where = " & ".join([f"({w})" for w in com.flatten(where)])
584 else:
585 # _validate_where ensures we otherwise have a string
586 _where = where
588 self.expr = _where
589 self.env = PyTablesScope(scope_level + 1, local_dict=local_dict)
591 if queryables is not None and isinstance(self.expr, str):
592 self.env.queryables.update(queryables)
593 self._visitor = PyTablesExprVisitor(
594 self.env,
595 queryables=queryables,
596 parser="pytables",
597 engine="pytables",
598 encoding=encoding,
599 )
600 self.terms = self.parse()
602 def __repr__(self) -> str:
603 if self.terms is not None:
604 return pprint_thing(self.terms)
605 return pprint_thing(self.expr)
607 def evaluate(self):
608 """create and return the numexpr condition and filter"""
609 try:
610 self.condition = self.terms.prune(ConditionBinOp)
611 except AttributeError as err:
612 raise ValueError(
613 f"cannot process expression [{self.expr}], [{self}] "
614 "is not a valid condition"
615 ) from err
616 try:
617 self.filter = self.terms.prune(FilterBinOp)
618 except AttributeError as err:
619 raise ValueError(
620 f"cannot process expression [{self.expr}], [{self}] "
621 "is not a valid filter"
622 ) from err
624 return self.condition, self.filter
627class TermValue:
628 """hold a term value the we use to construct a condition/filter"""
630 def __init__(self, value, converted, kind: str) -> None:
631 assert isinstance(kind, str), kind
632 self.value = value
633 self.converted = converted
634 self.kind = kind
636 def tostring(self, encoding) -> str:
637 """quote the string if not encoded else encode and return"""
638 if self.kind == "string":
639 if encoding is not None:
640 return str(self.converted)
641 return f'"{self.converted}"'
642 elif self.kind == "float":
643 # python 2 str(float) is not always
644 # round-trippable so use repr()
645 return repr(self.converted)
646 return str(self.converted)
649def maybe_expression(s) -> bool:
650 """loose checking if s is a pytables-acceptable expression"""
651 if not isinstance(s, str):
652 return False
653 ops = PyTablesExprVisitor.binary_ops + PyTablesExprVisitor.unary_ops + ("=",)
655 # make sure we have an op at least
656 return any(op in s for op in ops)