Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/computation/expressions.py: 25%

107 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Expressions 

3----------- 

4 

5Offer fast expression evaluation through numexpr 

6 

7""" 

8from __future__ import annotations 

9 

10import operator 

11import warnings 

12 

13import numpy as np 

14 

15from pandas._config import get_option 

16 

17from pandas._typing import FuncType 

18from pandas.util._exceptions import find_stack_level 

19 

20from pandas.core.computation.check import NUMEXPR_INSTALLED 

21from pandas.core.ops import roperator 

22 

23if NUMEXPR_INSTALLED: 23 ↛ 24line 23 didn't jump to line 24, because the condition on line 23 was never true

24 import numexpr as ne 

25 

26_TEST_MODE: bool | None = None 

27_TEST_RESULT: list[bool] = [] 

28USE_NUMEXPR = NUMEXPR_INSTALLED 

29_evaluate: FuncType | None = None 

30_where: FuncType | None = None 

31 

32# the set of dtypes that we will allow pass to numexpr 

33_ALLOWED_DTYPES = { 

34 "evaluate": {"int64", "int32", "float64", "float32", "bool"}, 

35 "where": {"int64", "float64", "bool"}, 

36} 

37 

38# the minimum prod shape that we will use numexpr 

39_MIN_ELEMENTS = 1_000_000 

40 

41 

42def set_use_numexpr(v=True) -> None: 

43 # set/unset to use numexpr 

44 global USE_NUMEXPR 

45 if NUMEXPR_INSTALLED: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true

46 USE_NUMEXPR = v 

47 

48 # choose what we are going to do 

49 global _evaluate, _where 

50 

51 _evaluate = _evaluate_numexpr if USE_NUMEXPR else _evaluate_standard 

52 _where = _where_numexpr if USE_NUMEXPR else _where_standard 

53 

54 

55def set_numexpr_threads(n=None) -> None: 

56 # if we are using numexpr, set the threads to n 

57 # otherwise reset 

58 if NUMEXPR_INSTALLED and USE_NUMEXPR: 

59 if n is None: 

60 n = ne.detect_number_of_cores() 

61 ne.set_num_threads(n) 

62 

63 

64def _evaluate_standard(op, op_str, a, b): 

65 """ 

66 Standard evaluation. 

67 """ 

68 if _TEST_MODE: 

69 _store_test_result(False) 

70 return op(a, b) 

71 

72 

73def _can_use_numexpr(op, op_str, a, b, dtype_check): 

74 """return a boolean if we WILL be using numexpr""" 

75 if op_str is not None: 

76 

77 # required min elements (otherwise we are adding overhead) 

78 if a.size > _MIN_ELEMENTS: 

79 # check for dtype compatibility 

80 dtypes: set[str] = set() 

81 for o in [a, b]: 

82 # ndarray and Series Case 

83 if hasattr(o, "dtype"): 

84 dtypes |= {o.dtype.name} 

85 

86 # allowed are a superset 

87 if not len(dtypes) or _ALLOWED_DTYPES[dtype_check] >= dtypes: 

88 return True 

89 

90 return False 

91 

92 

93def _evaluate_numexpr(op, op_str, a, b): 

94 result = None 

95 

96 if _can_use_numexpr(op, op_str, a, b, "evaluate"): 

97 is_reversed = op.__name__.strip("_").startswith("r") 

98 if is_reversed: 

99 # we were originally called by a reversed op method 

100 a, b = b, a 

101 

102 a_value = a 

103 b_value = b 

104 

105 try: 

106 result = ne.evaluate( 

107 f"a_value {op_str} b_value", 

108 local_dict={"a_value": a_value, "b_value": b_value}, 

109 casting="safe", 

110 ) 

111 except TypeError: 

112 # numexpr raises eg for array ** array with integers 

113 # (https://github.com/pydata/numexpr/issues/379) 

114 pass 

115 except NotImplementedError: 

116 if _bool_arith_fallback(op_str, a, b): 

117 pass 

118 else: 

119 raise 

120 

121 if is_reversed: 

122 # reverse order to original for fallback 

123 a, b = b, a 

124 

125 if _TEST_MODE: 

126 _store_test_result(result is not None) 

127 

128 if result is None: 

129 result = _evaluate_standard(op, op_str, a, b) 

130 

131 return result 

132 

133 

134_op_str_mapping = { 

135 operator.add: "+", 

136 roperator.radd: "+", 

137 operator.mul: "*", 

138 roperator.rmul: "*", 

139 operator.sub: "-", 

140 roperator.rsub: "-", 

141 operator.truediv: "/", 

142 roperator.rtruediv: "/", 

143 # floordiv not supported by numexpr 2.x 

144 operator.floordiv: None, 

145 roperator.rfloordiv: None, 

146 # we require Python semantics for mod of negative for backwards compatibility 

147 # see https://github.com/pydata/numexpr/issues/365 

148 # so sticking with unaccelerated for now GH#36552 

149 operator.mod: None, 

150 roperator.rmod: None, 

151 operator.pow: "**", 

152 roperator.rpow: "**", 

153 operator.eq: "==", 

154 operator.ne: "!=", 

155 operator.le: "<=", 

156 operator.lt: "<", 

157 operator.ge: ">=", 

158 operator.gt: ">", 

159 operator.and_: "&", 

160 roperator.rand_: "&", 

161 operator.or_: "|", 

162 roperator.ror_: "|", 

163 operator.xor: "^", 

164 roperator.rxor: "^", 

165 divmod: None, 

166 roperator.rdivmod: None, 

167} 

168 

169 

170def _where_standard(cond, a, b): 

171 # Caller is responsible for extracting ndarray if necessary 

172 return np.where(cond, a, b) 

173 

174 

175def _where_numexpr(cond, a, b): 

176 # Caller is responsible for extracting ndarray if necessary 

177 result = None 

178 

179 if _can_use_numexpr(None, "where", a, b, "where"): 

180 

181 result = ne.evaluate( 

182 "where(cond_value, a_value, b_value)", 

183 local_dict={"cond_value": cond, "a_value": a, "b_value": b}, 

184 casting="safe", 

185 ) 

186 

187 if result is None: 

188 result = _where_standard(cond, a, b) 

189 

190 return result 

191 

192 

193# turn myself on 

194set_use_numexpr(get_option("compute.use_numexpr")) 

195 

196 

197def _has_bool_dtype(x): 

198 try: 

199 return x.dtype == bool 

200 except AttributeError: 

201 return isinstance(x, (bool, np.bool_)) 

202 

203 

204_BOOL_OP_UNSUPPORTED = {"+": "|", "*": "&", "-": "^"} 

205 

206 

207def _bool_arith_fallback(op_str, a, b): 

208 """ 

209 Check if we should fallback to the python `_evaluate_standard` in case 

210 of an unsupported operation by numexpr, which is the case for some 

211 boolean ops. 

212 """ 

213 if _has_bool_dtype(a) and _has_bool_dtype(b): 

214 if op_str in _BOOL_OP_UNSUPPORTED: 

215 warnings.warn( 

216 f"evaluating in Python space because the {repr(op_str)} " 

217 "operator is not supported by numexpr for the bool dtype, " 

218 f"use {repr(_BOOL_OP_UNSUPPORTED[op_str])} instead.", 

219 stacklevel=find_stack_level(), 

220 ) 

221 return True 

222 return False 

223 

224 

225def evaluate(op, a, b, use_numexpr: bool = True): 

226 """ 

227 Evaluate and return the expression of the op on a and b. 

228 

229 Parameters 

230 ---------- 

231 op : the actual operand 

232 a : left operand 

233 b : right operand 

234 use_numexpr : bool, default True 

235 Whether to try to use numexpr. 

236 """ 

237 op_str = _op_str_mapping[op] 

238 if op_str is not None: 

239 if use_numexpr: 

240 # error: "None" not callable 

241 return _evaluate(op, op_str, a, b) # type: ignore[misc] 

242 return _evaluate_standard(op, op_str, a, b) 

243 

244 

245def where(cond, a, b, use_numexpr=True): 

246 """ 

247 Evaluate the where condition cond on a and b. 

248 

249 Parameters 

250 ---------- 

251 cond : np.ndarray[bool] 

252 a : return if cond is True 

253 b : return if cond is False 

254 use_numexpr : bool, default True 

255 Whether to try to use numexpr. 

256 """ 

257 assert _where is not None 

258 return _where(cond, a, b) if use_numexpr else _where_standard(cond, a, b) 

259 

260 

261def set_test_mode(v: bool = True) -> None: 

262 """ 

263 Keeps track of whether numexpr was used. 

264 

265 Stores an additional ``True`` for every successful use of evaluate with 

266 numexpr since the last ``get_test_result``. 

267 """ 

268 global _TEST_MODE, _TEST_RESULT 

269 _TEST_MODE = v 

270 _TEST_RESULT = [] 

271 

272 

273def _store_test_result(used_numexpr: bool) -> None: 

274 global _TEST_RESULT 

275 if used_numexpr: 

276 _TEST_RESULT.append(used_numexpr) 

277 

278 

279def get_test_result() -> list[bool]: 

280 """ 

281 Get test result and reset test_results. 

282 """ 

283 global _TEST_RESULT 

284 res = _TEST_RESULT 

285 _TEST_RESULT = [] 

286 return res