Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/computation/pytables.py: 23%

353 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" manage PyTables query interface via Expressions """ 

2from __future__ import annotations 

3 

4import ast 

5from functools import partial 

6from typing import ( 

7 TYPE_CHECKING, 

8 Any, 

9) 

10 

11import numpy as np 

12 

13from pandas._libs.tslibs import ( 

14 Timedelta, 

15 Timestamp, 

16) 

17from pandas._typing import npt 

18from pandas.errors import UndefinedVariableError 

19 

20from pandas.core.dtypes.common import is_list_like 

21 

22import pandas.core.common as com 

23from pandas.core.computation import ( 

24 expr, 

25 ops, 

26 scope as _scope, 

27) 

28from pandas.core.computation.common import ensure_decoded 

29from pandas.core.computation.expr import BaseExprVisitor 

30from pandas.core.computation.ops import is_term 

31from pandas.core.construction import extract_array 

32from pandas.core.indexes.base import Index 

33 

34from pandas.io.formats.printing import ( 

35 pprint_thing, 

36 pprint_thing_encoded, 

37) 

38 

39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 from pandas.compat.chainmap import DeepChainMap 

41 

42 

43class PyTablesScope(_scope.Scope): 

44 __slots__ = ("queryables",) 

45 

46 queryables: dict[str, Any] 

47 

48 def __init__( 

49 self, 

50 level: int, 

51 global_dict=None, 

52 local_dict=None, 

53 queryables: dict[str, Any] | None = None, 

54 ) -> None: 

55 super().__init__(level + 1, global_dict=global_dict, local_dict=local_dict) 

56 self.queryables = queryables or {} 

57 

58 

59class Term(ops.Term): 

60 env: PyTablesScope 

61 

62 def __new__(cls, name, env, side=None, encoding=None): 

63 if isinstance(name, str): 

64 klass = cls 

65 else: 

66 klass = Constant 

67 return object.__new__(klass) 

68 

69 def __init__(self, name, env: PyTablesScope, side=None, encoding=None) -> None: 

70 super().__init__(name, env, side=side, encoding=encoding) 

71 

72 def _resolve_name(self): 

73 # must be a queryables 

74 if self.side == "left": 

75 # Note: The behavior of __new__ ensures that self.name is a str here 

76 if self.name not in self.env.queryables: 

77 raise NameError(f"name {repr(self.name)} is not defined") 

78 return self.name 

79 

80 # resolve the rhs (and allow it to be None) 

81 try: 

82 return self.env.resolve(self.name, is_local=False) 

83 except UndefinedVariableError: 

84 return self.name 

85 

86 # read-only property overwriting read/write property 

87 @property # type: ignore[misc] 

88 def value(self): 

89 return self._value 

90 

91 

92class Constant(Term): 

93 def __init__(self, value, env: PyTablesScope, side=None, encoding=None) -> None: 

94 assert isinstance(env, PyTablesScope), type(env) 

95 super().__init__(value, env, side=side, encoding=encoding) 

96 

97 def _resolve_name(self): 

98 return self._name 

99 

100 

101class BinOp(ops.BinOp): 

102 

103 _max_selectors = 31 

104 

105 op: str 

106 queryables: dict[str, Any] 

107 condition: str | None 

108 

109 def __init__(self, op: str, lhs, rhs, queryables: dict[str, Any], encoding) -> None: 

110 super().__init__(op, lhs, rhs) 

111 self.queryables = queryables 

112 self.encoding = encoding 

113 self.condition = None 

114 

115 def _disallow_scalar_only_bool_ops(self): 

116 pass 

117 

118 def prune(self, klass): 

119 def pr(left, right): 

120 """create and return a new specialized BinOp from myself""" 

121 if left is None: 

122 return right 

123 elif right is None: 

124 return left 

125 

126 k = klass 

127 if isinstance(left, ConditionBinOp): 

128 if isinstance(right, ConditionBinOp): 

129 k = JointConditionBinOp 

130 elif isinstance(left, k): 

131 return left 

132 elif isinstance(right, k): 

133 return right 

134 

135 elif isinstance(left, FilterBinOp): 

136 if isinstance(right, FilterBinOp): 

137 k = JointFilterBinOp 

138 elif isinstance(left, k): 

139 return left 

140 elif isinstance(right, k): 

141 return right 

142 

143 return k( 

144 self.op, left, right, queryables=self.queryables, encoding=self.encoding 

145 ).evaluate() 

146 

147 left, right = self.lhs, self.rhs 

148 

149 if is_term(left) and is_term(right): 

150 res = pr(left.value, right.value) 

151 elif not is_term(left) and is_term(right): 

152 res = pr(left.prune(klass), right.value) 

153 elif is_term(left) and not is_term(right): 

154 res = pr(left.value, right.prune(klass)) 

155 elif not (is_term(left) or is_term(right)): 

156 res = pr(left.prune(klass), right.prune(klass)) 

157 

158 return res 

159 

160 def conform(self, rhs): 

161 """inplace conform rhs""" 

162 if not is_list_like(rhs): 

163 rhs = [rhs] 

164 if isinstance(rhs, np.ndarray): 

165 rhs = rhs.ravel() 

166 return rhs 

167 

168 @property 

169 def is_valid(self) -> bool: 

170 """return True if this is a valid field""" 

171 return self.lhs in self.queryables 

172 

173 @property 

174 def is_in_table(self) -> bool: 

175 """ 

176 return True if this is a valid column name for generation (e.g. an 

177 actual column in the table) 

178 """ 

179 return self.queryables.get(self.lhs) is not None 

180 

181 @property 

182 def kind(self): 

183 """the kind of my field""" 

184 return getattr(self.queryables.get(self.lhs), "kind", None) 

185 

186 @property 

187 def meta(self): 

188 """the meta of my field""" 

189 return getattr(self.queryables.get(self.lhs), "meta", None) 

190 

191 @property 

192 def metadata(self): 

193 """the metadata of my field""" 

194 return getattr(self.queryables.get(self.lhs), "metadata", None) 

195 

196 def generate(self, v) -> str: 

197 """create and return the op string for this TermValue""" 

198 val = v.tostring(self.encoding) 

199 return f"({self.lhs} {self.op} {val})" 

200 

201 def convert_value(self, v) -> TermValue: 

202 """ 

203 convert the expression that is in the term to something that is 

204 accepted by pytables 

205 """ 

206 

207 def stringify(value): 

208 if self.encoding is not None: 

209 return pprint_thing_encoded(value, encoding=self.encoding) 

210 return pprint_thing(value) 

211 

212 kind = ensure_decoded(self.kind) 

213 meta = ensure_decoded(self.meta) 

214 if kind == "datetime64" or kind == "datetime": 

215 if isinstance(v, (int, float)): 

216 v = stringify(v) 

217 v = ensure_decoded(v) 

218 v = Timestamp(v) 

219 if v.tz is not None: 

220 v = v.tz_convert("UTC") 

221 return TermValue(v, v.value, kind) 

222 elif kind == "timedelta64" or kind == "timedelta": 

223 if isinstance(v, str): 

224 v = Timedelta(v).value 

225 else: 

226 v = Timedelta(v, unit="s").value 

227 return TermValue(int(v), v, kind) 

228 elif meta == "category": 

229 metadata = extract_array(self.metadata, extract_numpy=True) 

230 result: npt.NDArray[np.intp] | np.intp | int 

231 if v not in metadata: 

232 result = -1 

233 else: 

234 result = metadata.searchsorted(v, side="left") 

235 return TermValue(result, result, "integer") 

236 elif kind == "integer": 

237 v = int(float(v)) 

238 return TermValue(v, v, kind) 

239 elif kind == "float": 

240 v = float(v) 

241 return TermValue(v, v, kind) 

242 elif kind == "bool": 

243 if isinstance(v, str): 

244 v = not v.strip().lower() in [ 

245 "false", 

246 "f", 

247 "no", 

248 "n", 

249 "none", 

250 "0", 

251 "[]", 

252 "{}", 

253 "", 

254 ] 

255 else: 

256 v = bool(v) 

257 return TermValue(v, v, kind) 

258 elif isinstance(v, str): 

259 # string quoting 

260 return TermValue(v, stringify(v), "string") 

261 else: 

262 raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column") 

263 

264 def convert_values(self): 

265 pass 

266 

267 

268class FilterBinOp(BinOp): 

269 filter: tuple[Any, Any, Index] | None = None 

270 

271 def __repr__(self) -> str: 

272 if self.filter is None: 

273 return "Filter: Not Initialized" 

274 return pprint_thing(f"[Filter : [{self.filter[0]}] -> [{self.filter[1]}]") 

275 

276 def invert(self): 

277 """invert the filter""" 

278 if self.filter is not None: 

279 self.filter = ( 

280 self.filter[0], 

281 self.generate_filter_op(invert=True), 

282 self.filter[2], 

283 ) 

284 return self 

285 

286 def format(self): 

287 """return the actual filter format""" 

288 return [self.filter] 

289 

290 def evaluate(self): 

291 

292 if not self.is_valid: 

293 raise ValueError(f"query term is not valid [{self}]") 

294 

295 rhs = self.conform(self.rhs) 

296 values = list(rhs) 

297 

298 if self.is_in_table: 

299 

300 # if too many values to create the expression, use a filter instead 

301 if self.op in ["==", "!="] and len(values) > self._max_selectors: 

302 

303 filter_op = self.generate_filter_op() 

304 self.filter = (self.lhs, filter_op, Index(values)) 

305 

306 return self 

307 return None 

308 

309 # equality conditions 

310 if self.op in ["==", "!="]: 

311 

312 filter_op = self.generate_filter_op() 

313 self.filter = (self.lhs, filter_op, Index(values)) 

314 

315 else: 

316 raise TypeError( 

317 f"passing a filterable condition to a non-table indexer [{self}]" 

318 ) 

319 

320 return self 

321 

322 def generate_filter_op(self, invert: bool = False): 

323 if (self.op == "!=" and not invert) or (self.op == "==" and invert): 

324 return lambda axis, vals: ~axis.isin(vals) 

325 else: 

326 return lambda axis, vals: axis.isin(vals) 

327 

328 

329class JointFilterBinOp(FilterBinOp): 

330 def format(self): 

331 raise NotImplementedError("unable to collapse Joint Filters") 

332 

333 def evaluate(self): 

334 return self 

335 

336 

337class ConditionBinOp(BinOp): 

338 def __repr__(self) -> str: 

339 return pprint_thing(f"[Condition : [{self.condition}]]") 

340 

341 def invert(self): 

342 """invert the condition""" 

343 # if self.condition is not None: 

344 # self.condition = "~(%s)" % self.condition 

345 # return self 

346 raise NotImplementedError( 

347 "cannot use an invert condition when passing to numexpr" 

348 ) 

349 

350 def format(self): 

351 """return the actual ne format""" 

352 return self.condition 

353 

354 def evaluate(self): 

355 

356 if not self.is_valid: 

357 raise ValueError(f"query term is not valid [{self}]") 

358 

359 # convert values if we are in the table 

360 if not self.is_in_table: 

361 return None 

362 

363 rhs = self.conform(self.rhs) 

364 values = [self.convert_value(v) for v in rhs] 

365 

366 # equality conditions 

367 if self.op in ["==", "!="]: 

368 

369 # too many values to create the expression? 

370 if len(values) <= self._max_selectors: 

371 vs = [self.generate(v) for v in values] 

372 self.condition = f"({' | '.join(vs)})" 

373 

374 # use a filter after reading 

375 else: 

376 return None 

377 else: 

378 self.condition = self.generate(values[0]) 

379 

380 return self 

381 

382 

383class JointConditionBinOp(ConditionBinOp): 

384 def evaluate(self): 

385 self.condition = f"({self.lhs.condition} {self.op} {self.rhs.condition})" 

386 return self 

387 

388 

389class UnaryOp(ops.UnaryOp): 

390 def prune(self, klass): 

391 

392 if self.op != "~": 

393 raise NotImplementedError("UnaryOp only support invert type ops") 

394 

395 operand = self.operand 

396 operand = operand.prune(klass) 

397 

398 if operand is not None and ( 

399 issubclass(klass, ConditionBinOp) 

400 and operand.condition is not None 

401 or not issubclass(klass, ConditionBinOp) 

402 and issubclass(klass, FilterBinOp) 

403 and operand.filter is not None 

404 ): 

405 return operand.invert() 

406 return None 

407 

408 

409class PyTablesExprVisitor(BaseExprVisitor): 

410 const_type = Constant 

411 term_type = Term 

412 

413 def __init__(self, env, engine, parser, **kwargs) -> None: 

414 super().__init__(env, engine, parser) 

415 for bin_op in self.binary_ops: 

416 bin_node = self.binary_op_nodes_map[bin_op] 

417 setattr( 

418 self, 

419 f"visit_{bin_node}", 

420 lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs), 

421 ) 

422 

423 def visit_UnaryOp(self, node, **kwargs): 

424 if isinstance(node.op, (ast.Not, ast.Invert)): 

425 return UnaryOp("~", self.visit(node.operand)) 

426 elif isinstance(node.op, ast.USub): 

427 return self.const_type(-self.visit(node.operand).value, self.env) 

428 elif isinstance(node.op, ast.UAdd): 

429 raise NotImplementedError("Unary addition not supported") 

430 

431 def visit_Index(self, node, **kwargs): 

432 return self.visit(node.value).value 

433 

434 def visit_Assign(self, node, **kwargs): 

435 cmpr = ast.Compare( 

436 ops=[ast.Eq()], left=node.targets[0], comparators=[node.value] 

437 ) 

438 return self.visit(cmpr) 

439 

440 def visit_Subscript(self, node, **kwargs): 

441 # only allow simple subscripts 

442 

443 value = self.visit(node.value) 

444 slobj = self.visit(node.slice) 

445 try: 

446 value = value.value 

447 except AttributeError: 

448 pass 

449 

450 if isinstance(slobj, Term): 

451 # In py39 np.ndarray lookups with Term containing int raise 

452 slobj = slobj.value 

453 

454 try: 

455 return self.const_type(value[slobj], self.env) 

456 except TypeError as err: 

457 raise ValueError( 

458 f"cannot subscript {repr(value)} with {repr(slobj)}" 

459 ) from err 

460 

461 def visit_Attribute(self, node, **kwargs): 

462 attr = node.attr 

463 value = node.value 

464 

465 ctx = type(node.ctx) 

466 if ctx == ast.Load: 

467 # resolve the value 

468 resolved = self.visit(value) 

469 

470 # try to get the value to see if we are another expression 

471 try: 

472 resolved = resolved.value 

473 except (AttributeError): 

474 pass 

475 

476 try: 

477 return self.term_type(getattr(resolved, attr), self.env) 

478 except AttributeError: 

479 

480 # something like datetime.datetime where scope is overridden 

481 if isinstance(value, ast.Name) and value.id == attr: 

482 return resolved 

483 

484 raise ValueError(f"Invalid Attribute context {ctx.__name__}") 

485 

486 def translate_In(self, op): 

487 return ast.Eq() if isinstance(op, ast.In) else op 

488 

489 def _rewrite_membership_op(self, node, left, right): 

490 return self.visit(node.op), node.op, left, right 

491 

492 

493def _validate_where(w): 

494 """ 

495 Validate that the where statement is of the right type. 

496 

497 The type may either be String, Expr, or list-like of Exprs. 

498 

499 Parameters 

500 ---------- 

501 w : String term expression, Expr, or list-like of Exprs. 

502 

503 Returns 

504 ------- 

505 where : The original where clause if the check was successful. 

506 

507 Raises 

508 ------ 

509 TypeError : An invalid data type was passed in for w (e.g. dict). 

510 """ 

511 if not (isinstance(w, (PyTablesExpr, str)) or is_list_like(w)): 

512 raise TypeError( 

513 "where must be passed as a string, PyTablesExpr, " 

514 "or list-like of PyTablesExpr" 

515 ) 

516 

517 return w 

518 

519 

520class PyTablesExpr(expr.Expr): 

521 """ 

522 Hold a pytables-like expression, comprised of possibly multiple 'terms'. 

523 

524 Parameters 

525 ---------- 

526 where : string term expression, PyTablesExpr, or list-like of PyTablesExprs 

527 queryables : a "kinds" map (dict of column name -> kind), or None if column 

528 is non-indexable 

529 encoding : an encoding that will encode the query terms 

530 

531 Returns 

532 ------- 

533 a PyTablesExpr object 

534 

535 Examples 

536 -------- 

537 'index>=date' 

538 "columns=['A', 'D']" 

539 'columns=A' 

540 'columns==A' 

541 "~(columns=['A','B'])" 

542 'index>df.index[3] & string="bar"' 

543 '(index>df.index[3] & index<=df.index[6]) | string="bar"' 

544 "ts>=Timestamp('2012-02-01')" 

545 "major_axis>=20130101" 

546 """ 

547 

548 _visitor: PyTablesExprVisitor | None 

549 env: PyTablesScope 

550 expr: str 

551 

552 def __init__( 

553 self, 

554 where, 

555 queryables: dict[str, Any] | None = None, 

556 encoding=None, 

557 scope_level: int = 0, 

558 ) -> None: 

559 

560 where = _validate_where(where) 

561 

562 self.encoding = encoding 

563 self.condition = None 

564 self.filter = None 

565 self.terms = None 

566 self._visitor = None 

567 

568 # capture the environment if needed 

569 local_dict: DeepChainMap[Any, Any] | None = None 

570 

571 if isinstance(where, PyTablesExpr): 

572 local_dict = where.env.scope 

573 _where = where.expr 

574 

575 elif is_list_like(where): 

576 where = list(where) 

577 for idx, w in enumerate(where): 

578 if isinstance(w, PyTablesExpr): 

579 local_dict = w.env.scope 

580 else: 

581 w = _validate_where(w) 

582 where[idx] = w 

583 _where = " & ".join([f"({w})" for w in com.flatten(where)]) 

584 else: 

585 # _validate_where ensures we otherwise have a string 

586 _where = where 

587 

588 self.expr = _where 

589 self.env = PyTablesScope(scope_level + 1, local_dict=local_dict) 

590 

591 if queryables is not None and isinstance(self.expr, str): 

592 self.env.queryables.update(queryables) 

593 self._visitor = PyTablesExprVisitor( 

594 self.env, 

595 queryables=queryables, 

596 parser="pytables", 

597 engine="pytables", 

598 encoding=encoding, 

599 ) 

600 self.terms = self.parse() 

601 

602 def __repr__(self) -> str: 

603 if self.terms is not None: 

604 return pprint_thing(self.terms) 

605 return pprint_thing(self.expr) 

606 

607 def evaluate(self): 

608 """create and return the numexpr condition and filter""" 

609 try: 

610 self.condition = self.terms.prune(ConditionBinOp) 

611 except AttributeError as err: 

612 raise ValueError( 

613 f"cannot process expression [{self.expr}], [{self}] " 

614 "is not a valid condition" 

615 ) from err 

616 try: 

617 self.filter = self.terms.prune(FilterBinOp) 

618 except AttributeError as err: 

619 raise ValueError( 

620 f"cannot process expression [{self.expr}], [{self}] " 

621 "is not a valid filter" 

622 ) from err 

623 

624 return self.condition, self.filter 

625 

626 

627class TermValue: 

628 """hold a term value the we use to construct a condition/filter""" 

629 

630 def __init__(self, value, converted, kind: str) -> None: 

631 assert isinstance(kind, str), kind 

632 self.value = value 

633 self.converted = converted 

634 self.kind = kind 

635 

636 def tostring(self, encoding) -> str: 

637 """quote the string if not encoded else encode and return""" 

638 if self.kind == "string": 

639 if encoding is not None: 

640 return str(self.converted) 

641 return f'"{self.converted}"' 

642 elif self.kind == "float": 

643 # python 2 str(float) is not always 

644 # round-trippable so use repr() 

645 return repr(self.converted) 

646 return str(self.converted) 

647 

648 

649def maybe_expression(s) -> bool: 

650 """loose checking if s is a pytables-acceptable expression""" 

651 if not isinstance(s, str): 

652 return False 

653 ops = PyTablesExprVisitor.binary_ops + PyTablesExprVisitor.unary_ops + ("=",) 

654 

655 # make sure we have an op at least 

656 return any(op in s for op in ops)