Coverage for /var/srv/projects/ 11%

152 statements  

« prev     ^ index     » next v6.4.4, created at 2023-07-17 14:22 -0600


2Module that contains many useful utilities 

3for validating data or function arguments 


5from __future__ import annotations 


7from typing import ( 

8 Any, 

9 Iterable, 

10 Sequence, 

11 TypeVar, 

12 overload, 


14import warnings 


16import numpy as np 


18from pandas.util._exceptions import find_stack_level 


20from pandas.core.dtypes.common import ( 

21 is_bool, 

22 is_integer, 



25BoolishT = TypeVar("BoolishT", bool, int) 

26BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None) 



29def _check_arg_length(fname, args, max_fname_arg_count, compat_args): 

30 """ 

31 Checks whether 'args' has length of at most 'compat_args'. Raises 

32 a TypeError if that is not the case, similar to in Python when a 

33 function is called with too many arguments. 

34 """ 

35 if max_fname_arg_count < 0: 

36 raise ValueError("'max_fname_arg_count' must be non-negative") 


38 if len(args) > len(compat_args): 

39 max_arg_count = len(compat_args) + max_fname_arg_count 

40 actual_arg_count = len(args) + max_fname_arg_count 

41 argument = "argument" if max_arg_count == 1 else "arguments" 


43 raise TypeError( 

44 f"{fname}() takes at most {max_arg_count} {argument} " 

45 f"({actual_arg_count} given)" 

46 ) 



49def _check_for_default_values(fname, arg_val_dict, compat_args): 

50 """ 

51 Check that the keys in `arg_val_dict` are mapped to their 

52 default values as specified in `compat_args`. 


54 Note that this function is to be called only when it has been 

55 checked that arg_val_dict.keys() is a subset of compat_args 

56 """ 

57 for key in arg_val_dict: 

58 # try checking equality directly with '=' operator, 

59 # as comparison may have been overridden for the left 

60 # hand object 

61 try: 

62 v1 = arg_val_dict[key] 

63 v2 = compat_args[key] 


65 # check for None-ness otherwise we could end up 

66 # comparing a numpy array vs None 

67 if (v1 is not None and v2 is None) or (v1 is None and v2 is not None): 

68 match = False 

69 else: 

70 match = v1 == v2 


72 if not is_bool(match): 

73 raise ValueError("'match' is not a boolean") 


75 # could not compare them directly, so try comparison 

76 # using the 'is' operator 

77 except ValueError: 

78 match = arg_val_dict[key] is compat_args[key] 


80 if not match: 

81 raise ValueError( 

82 f"the '{key}' parameter is not supported in " 

83 f"the pandas implementation of {fname}()" 

84 ) 



87def validate_args(fname, args, max_fname_arg_count, compat_args) -> None: 

88 """ 

89 Checks whether the length of the `*args` argument passed into a function 

90 has at most `len(compat_args)` arguments and whether or not all of these 

91 elements in `args` are set to their default values. 


93 Parameters 

94 ---------- 

95 fname : str 

96 The name of the function being passed the `*args` parameter 

97 args : tuple 

98 The `*args` parameter passed into a function 

99 max_fname_arg_count : int 

100 The maximum number of arguments that the function `fname` 

101 can accept, excluding those in `args`. Used for displaying 

102 appropriate error messages. Must be non-negative. 

103 compat_args : dict 

104 A dictionary of keys and their associated default values. 

105 In order to accommodate buggy behaviour in some versions of `numpy`, 

106 where a signature displayed keyword arguments but then passed those 

107 arguments **positionally** internally when calling downstream 

108 implementations, a dict ensures that the original 

109 order of the keyword arguments is enforced. 


111 Raises 

112 ------ 

113 TypeError 

114 If `args` contains more values than there are `compat_args` 

115 ValueError 

116 If `args` contains values that do not correspond to those 

117 of the default values specified in `compat_args` 

118 """ 

119 _check_arg_length(fname, args, max_fname_arg_count, compat_args) 


121 # We do this so that we can provide a more informative 

122 # error message about the parameters that we are not 

123 # supporting in the pandas implementation of 'fname' 

124 kwargs = dict(zip(compat_args, args)) 

125 _check_for_default_values(fname, kwargs, compat_args) 



128def _check_for_invalid_keys(fname, kwargs, compat_args): 

129 """ 

130 Checks whether 'kwargs' contains any keys that are not 

131 in 'compat_args' and raises a TypeError if there is one. 

132 """ 

133 # set(dict) --> set of the dictionary's keys 

134 diff = set(kwargs) - set(compat_args) 


136 if diff: 

137 bad_arg = list(diff)[0] 

138 raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'") 



141def validate_kwargs(fname, kwargs, compat_args) -> None: 

142 """ 

143 Checks whether parameters passed to the **kwargs argument in a 

144 function `fname` are valid parameters as specified in `*compat_args` 

145 and whether or not they are set to their default values. 


147 Parameters 

148 ---------- 

149 fname : str 

150 The name of the function being passed the `**kwargs` parameter 

151 kwargs : dict 

152 The `**kwargs` parameter passed into `fname` 

153 compat_args: dict 

154 A dictionary of keys that `kwargs` is allowed to have and their 

155 associated default values 


157 Raises 

158 ------ 

159 TypeError if `kwargs` contains keys not in `compat_args` 

160 ValueError if `kwargs` contains keys in `compat_args` that do not 

161 map to the default values specified in `compat_args` 

162 """ 

163 kwds = kwargs.copy() 

164 _check_for_invalid_keys(fname, kwargs, compat_args) 

165 _check_for_default_values(fname, kwds, compat_args) 



168def validate_args_and_kwargs( 

169 fname, args, kwargs, max_fname_arg_count, compat_args 

170) -> None: 

171 """ 

172 Checks whether parameters passed to the *args and **kwargs argument in a 

173 function `fname` are valid parameters as specified in `*compat_args` 

174 and whether or not they are set to their default values. 


176 Parameters 

177 ---------- 

178 fname: str 

179 The name of the function being passed the `**kwargs` parameter 

180 args: tuple 

181 The `*args` parameter passed into a function 

182 kwargs: dict 

183 The `**kwargs` parameter passed into `fname` 

184 max_fname_arg_count: int 

185 The minimum number of arguments that the function `fname` 

186 requires, excluding those in `args`. Used for displaying 

187 appropriate error messages. Must be non-negative. 

188 compat_args: dict 

189 A dictionary of keys that `kwargs` is allowed to 

190 have and their associated default values. 


192 Raises 

193 ------ 

194 TypeError if `args` contains more values than there are 

195 `compat_args` OR `kwargs` contains keys not in `compat_args` 

196 ValueError if `args` contains values not at the default value (`None`) 

197 `kwargs` contains keys in `compat_args` that do not map to the default 

198 value as specified in `compat_args` 


200 See Also 

201 -------- 

202 validate_args : Purely args validation. 

203 validate_kwargs : Purely kwargs validation. 


205 """ 

206 # Check that the total number of arguments passed in (i.e. 

207 # args and kwargs) does not exceed the length of compat_args 

208 _check_arg_length( 

209 fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args 

210 ) 


212 # Check there is no overlap with the positional and keyword 

213 # arguments, similar to what is done in actual Python functions 

214 args_dict = dict(zip(compat_args, args)) 


216 for key in args_dict: 

217 if key in kwargs: 

218 raise TypeError( 

219 f"{fname}() got multiple values for keyword argument '{key}'" 

220 ) 


222 kwargs.update(args_dict) 

223 validate_kwargs(fname, kwargs, compat_args) 



226def validate_bool_kwarg( 

227 value: BoolishNoneT, arg_name, none_allowed=True, int_allowed=False 

228) -> BoolishNoneT: 

229 """ 

230 Ensure that argument passed in arg_name can be interpreted as boolean. 


232 Parameters 

233 ---------- 

234 value : bool 

235 Value to be validated. 

236 arg_name : str 

237 Name of the argument. To be reflected in the error message. 

238 none_allowed : bool, default True 

239 Whether to consider None to be a valid boolean. 

240 int_allowed : bool, default False 

241 Whether to consider integer value to be a valid boolean. 


243 Returns 

244 ------- 

245 value 

246 The same value as input. 


248 Raises 

249 ------ 

250 ValueError 

251 If the value is not a valid boolean. 

252 """ 

253 good_value = is_bool(value) 

254 if none_allowed: 

255 good_value = good_value or value is None 


257 if int_allowed: 

258 good_value = good_value or isinstance(value, int) 


260 if not good_value: 

261 raise ValueError( 

262 f'For argument "{arg_name}" expected type bool, received ' 

263 f"type {type(value).__name__}." 

264 ) 

265 return value 



268def validate_axis_style_args( 

269 data, args, kwargs, arg_name, method_name 

270) -> dict[str, Any]: 

271 """ 

272 Argument handler for mixed index, columns / axis functions 


274 In an attempt to handle both `.method(index, columns)`, and 

275 `.method(arg, axis=.)`, we have to do some bad things to argument 

276 parsing. This translates all arguments to `{index=., columns=.}` style. 


278 Parameters 

279 ---------- 

280 data : DataFrame 

281 args : tuple 

282 All positional arguments from the user 

283 kwargs : dict 

284 All keyword arguments from the user 

285 arg_name, method_name : str 

286 Used for better error messages 


288 Returns 

289 ------- 

290 kwargs : dict 

291 A dictionary of keyword arguments. Doesn't modify ``kwargs`` 

292 inplace, so update them with the return value here. 


294 Examples 

295 -------- 

296 >>> df = pd.DataFrame(range(2)) 

297 >>> validate_axis_style_args(df, (str.upper,), {'columns': id}, 

298 ... 'mapper', 'rename') 

299 {'columns': <built-in function id>, 'index': <method 'upper' of 'str' objects>} 


301 This emits a warning 

302 >>> validate_axis_style_args(df, (str.upper, id), {}, 

303 ... 'mapper', 'rename') 

304 {'index': <method 'upper' of 'str' objects>, 'columns': <built-in function id>} 

305 """ 

306 # TODO: Change to keyword-only args and remove all this 


308 out = {} 

309 # Goal: fill 'out' with index/columns-style arguments 

310 # like out = {'index': foo, 'columns': bar} 


312 # Start by validating for consistency 

313 if "axis" in kwargs and any(x in kwargs for x in data._AXIS_TO_AXIS_NUMBER): 

314 msg = "Cannot specify both 'axis' and any of 'index' or 'columns'." 

315 raise TypeError(msg) 


317 # First fill with explicit values provided by the user... 

318 if arg_name in kwargs: 

319 if args: 

320 msg = f"{method_name} got multiple values for argument '{arg_name}'" 

321 raise TypeError(msg) 


323 axis = data._get_axis_name(kwargs.get("axis", 0)) 

324 out[axis] = kwargs[arg_name] 


326 # More user-provided arguments, now from kwargs 

327 for k, v in kwargs.items(): 

328 try: 

329 ax = data._get_axis_name(k) 

330 except ValueError: 

331 pass 

332 else: 

333 out[ax] = v 


335 # All user-provided kwargs have been handled now. 

336 # Now we supplement with positional arguments, emitting warnings 

337 # when there's ambiguity and raising when there's conflicts 


339 if len(args) == 0: 

340 pass # It's up to the function to decide if this is valid 

341 elif len(args) == 1: 

342 axis = data._get_axis_name(kwargs.get("axis", 0)) 

343 out[axis] = args[0] 

344 elif len(args) == 2: 

345 if "axis" in kwargs: 

346 # Unambiguously wrong 

347 msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" 

348 raise TypeError(msg) 


350 msg = ( 

351 f"Interpreting call\n\t'.{method_name}(a, b)' as " 

352 f"\n\t'.{method_name}(index=a, columns=b)'.\nUse named " 

353 "arguments to remove any ambiguity. In the future, using " 

354 "positional arguments for 'index' or 'columns' will raise " 

355 "a 'TypeError'." 

356 ) 

357 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) 

358 out[data._get_axis_name(0)] = args[0] 

359 out[data._get_axis_name(1)] = args[1] 

360 else: 

361 msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'." 

362 raise TypeError(msg) 

363 return out 



366def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): 

367 """ 

368 Validate the keyword arguments to 'fillna'. 


370 This checks that exactly one of 'value' and 'method' is specified. 

371 If 'method' is specified, this validates that it's a valid method. 


373 Parameters 

374 ---------- 

375 value, method : object 

376 The 'value' and 'method' keyword arguments for 'fillna'. 

377 validate_scalar_dict_value : bool, default True 

378 Whether to validate that 'value' is a scalar or dict. Specifically, 

379 validate that it is not a list or tuple. 


381 Returns 

382 ------- 

383 value, method : object 

384 """ 

385 from pandas.core.missing import clean_fill_method 


387 if value is None and method is None: 

388 raise ValueError("Must specify a fill 'value' or 'method'.") 

389 elif value is None and method is not None: 

390 method = clean_fill_method(method) 


392 elif value is not None and method is None: 

393 if validate_scalar_dict_value and isinstance(value, (list, tuple)): 

394 raise TypeError( 

395 '"value" parameter must be a scalar or dict, but ' 

396 f'you passed a "{type(value).__name__}"' 

397 ) 


399 elif value is not None and method is not None: 

400 raise ValueError("Cannot specify both 'value' and 'method'.") 


402 return value, method 



405def validate_percentile(q: float | Iterable[float]) -> np.ndarray: 

406 """ 

407 Validate percentiles (used by describe and quantile). 


409 This function checks if the given float or iterable of floats is a valid percentile 

410 otherwise raises a ValueError. 


412 Parameters 

413 ---------- 

414 q: float or iterable of floats 

415 A single percentile or an iterable of percentiles. 


417 Returns 

418 ------- 

419 ndarray 

420 An ndarray of the percentiles if valid. 


422 Raises 

423 ------ 

424 ValueError if percentiles are not in given interval([0, 1]). 

425 """ 

426 q_arr = np.asarray(q) 

427 # Don't change this to an f-string. The string formatting 

428 # is too expensive for cases where we don't need it. 

429 msg = "percentiles should all be in the interval [0, 1]. Try {} instead." 

430 if q_arr.ndim == 0: 

431 if not 0 <= q_arr <= 1: 

432 raise ValueError(msg.format(q_arr / 100.0)) 

433 else: 

434 if not all(0 <= qs <= 1 for qs in q_arr): 

435 raise ValueError(msg.format(q_arr / 100.0)) 

436 return q_arr 




440def validate_ascending(ascending: BoolishT) -> BoolishT: 

441 ... 




445def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]: 

446 ... 



449def validate_ascending( 

450 ascending: bool | int | Sequence[BoolishT], 

451) -> bool | int | list[BoolishT]: 

452 """Validate ``ascending`` kwargs for ``sort_index`` method.""" 

453 kwargs = {"none_allowed": False, "int_allowed": True} 

454 if not isinstance(ascending, Sequence): 

455 return validate_bool_kwarg(ascending, "ascending", **kwargs) 


457 return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] 



460def validate_endpoints(closed: str | None) -> tuple[bool, bool]: 

461 """ 

462 Check that the `closed` argument is among [None, "left", "right"] 


464 Parameters 

465 ---------- 

466 closed : {None, "left", "right"} 


468 Returns 

469 ------- 

470 left_closed : bool 

471 right_closed : bool 


473 Raises 

474 ------ 

475 ValueError : if argument is not among valid values 

476 """ 

477 left_closed = False 

478 right_closed = False 


480 if closed is None: 

481 left_closed = True 

482 right_closed = True 

483 elif closed == "left": 

484 left_closed = True 

485 elif closed == "right": 

486 right_closed = True 

487 else: 

488 raise ValueError("Closed has to be either 'left', 'right' or None") 


490 return left_closed, right_closed 



493def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]: 

494 """ 

495 Check that the `inclusive` argument is among {"both", "neither", "left", "right"}. 


497 Parameters 

498 ---------- 

499 inclusive : {"both", "neither", "left", "right"} 


501 Returns 

502 ------- 

503 left_right_inclusive : tuple[bool, bool] 


505 Raises 

506 ------ 

507 ValueError : if argument is not among valid values 

508 """ 

509 left_right_inclusive: tuple[bool, bool] | None = None 


511 if isinstance(inclusive, str): 

512 left_right_inclusive = { 

513 "both": (True, True), 

514 "left": (True, False), 

515 "right": (False, True), 

516 "neither": (False, False), 

517 }.get(inclusive) 


519 if left_right_inclusive is None: 

520 raise ValueError( 

521 "Inclusive has to be either 'both', 'neither', 'left' or 'right'" 

522 ) 


524 return left_right_inclusive 



527def validate_insert_loc(loc: int, length: int) -> int: 

528 """ 

529 Check that we have an integer between -length and length, inclusive. 


531 Standardize negative loc to within [0, length]. 


533 The exceptions we raise on failure match np.insert. 

534 """ 

535 if not is_integer(loc): 

536 raise TypeError(f"loc must be an integer between -{length} and {length}") 


538 if loc < 0: 

539 loc += length 

540 if not 0 <= loc <= length: 

541 raise IndexError(f"loc must be an integer between -{length} and {length}") 

542 return loc