Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/util/_validators.py: 11%

152 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Module that contains many useful utilities 

3for validating data or function arguments 

4""" 

5from __future__ import annotations 

6 

7from typing import ( 

8 Any, 

9 Iterable, 

10 Sequence, 

11 TypeVar, 

12 overload, 

13) 

14import warnings 

15 

16import numpy as np 

17 

18from pandas.util._exceptions import find_stack_level 

19 

20from pandas.core.dtypes.common import ( 

21 is_bool, 

22 is_integer, 

23) 

24 

25BoolishT = TypeVar("BoolishT", bool, int) 

26BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None) 

27 

28 

29def _check_arg_length(fname, args, max_fname_arg_count, compat_args): 

30 """ 

31 Checks whether 'args' has length of at most 'compat_args'. Raises 

32 a TypeError if that is not the case, similar to in Python when a 

33 function is called with too many arguments. 

34 """ 

35 if max_fname_arg_count < 0: 

36 raise ValueError("'max_fname_arg_count' must be non-negative") 

37 

38 if len(args) > len(compat_args): 

39 max_arg_count = len(compat_args) + max_fname_arg_count 

40 actual_arg_count = len(args) + max_fname_arg_count 

41 argument = "argument" if max_arg_count == 1 else "arguments" 

42 

43 raise TypeError( 

44 f"{fname}() takes at most {max_arg_count} {argument} " 

45 f"({actual_arg_count} given)" 

46 ) 

47 

48 

49def _check_for_default_values(fname, arg_val_dict, compat_args): 

50 """ 

51 Check that the keys in `arg_val_dict` are mapped to their 

52 default values as specified in `compat_args`. 

53 

54 Note that this function is to be called only when it has been 

55 checked that arg_val_dict.keys() is a subset of compat_args 

56 """ 

57 for key in arg_val_dict: 

58 # try checking equality directly with '=' operator, 

59 # as comparison may have been overridden for the left 

60 # hand object 

61 try: 

62 v1 = arg_val_dict[key] 

63 v2 = compat_args[key] 

64 

65 # check for None-ness otherwise we could end up 

66 # comparing a numpy array vs None 

67 if (v1 is not None and v2 is None) or (v1 is None and v2 is not None): 

68 match = False 

69 else: 

70 match = v1 == v2 

71 

72 if not is_bool(match): 

73 raise ValueError("'match' is not a boolean") 

74 

75 # could not compare them directly, so try comparison 

76 # using the 'is' operator 

77 except ValueError: 

78 match = arg_val_dict[key] is compat_args[key] 

79 

80 if not match: 

81 raise ValueError( 

82 f"the '{key}' parameter is not supported in " 

83 f"the pandas implementation of {fname}()" 

84 ) 

85 

86 

87def validate_args(fname, args, max_fname_arg_count, compat_args) -> None: 

88 """ 

89 Checks whether the length of the `*args` argument passed into a function 

90 has at most `len(compat_args)` arguments and whether or not all of these 

91 elements in `args` are set to their default values. 

92 

93 Parameters 

94 ---------- 

95 fname : str 

96 The name of the function being passed the `*args` parameter 

97 args : tuple 

98 The `*args` parameter passed into a function 

99 max_fname_arg_count : int 

100 The maximum number of arguments that the function `fname` 

101 can accept, excluding those in `args`. Used for displaying 

102 appropriate error messages. Must be non-negative. 

103 compat_args : dict 

104 A dictionary of keys and their associated default values. 

105 In order to accommodate buggy behaviour in some versions of `numpy`, 

106 where a signature displayed keyword arguments but then passed those 

107 arguments **positionally** internally when calling downstream 

108 implementations, a dict ensures that the original 

109 order of the keyword arguments is enforced. 

110 

111 Raises 

112 ------ 

113 TypeError 

114 If `args` contains more values than there are `compat_args` 

115 ValueError 

116 If `args` contains values that do not correspond to those 

117 of the default values specified in `compat_args` 

118 """ 

119 _check_arg_length(fname, args, max_fname_arg_count, compat_args) 

120 

121 # We do this so that we can provide a more informative 

122 # error message about the parameters that we are not 

123 # supporting in the pandas implementation of 'fname' 

124 kwargs = dict(zip(compat_args, args)) 

125 _check_for_default_values(fname, kwargs, compat_args) 

126 

127 

128def _check_for_invalid_keys(fname, kwargs, compat_args): 

129 """ 

130 Checks whether 'kwargs' contains any keys that are not 

131 in 'compat_args' and raises a TypeError if there is one. 

132 """ 

133 # set(dict) --> set of the dictionary's keys 

134 diff = set(kwargs) - set(compat_args) 

135 

136 if diff: 

137 bad_arg = list(diff)[0] 

138 raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'") 

139 

140 

141def validate_kwargs(fname, kwargs, compat_args) -> None: 

142 """ 

143 Checks whether parameters passed to the **kwargs argument in a 

144 function `fname` are valid parameters as specified in `*compat_args` 

145 and whether or not they are set to their default values. 

146 

147 Parameters 

148 ---------- 

149 fname : str 

150 The name of the function being passed the `**kwargs` parameter 

151 kwargs : dict 

152 The `**kwargs` parameter passed into `fname` 

153 compat_args: dict 

154 A dictionary of keys that `kwargs` is allowed to have and their 

155 associated default values 

156 

157 Raises 

158 ------ 

159 TypeError if `kwargs` contains keys not in `compat_args` 

160 ValueError if `kwargs` contains keys in `compat_args` that do not 

161 map to the default values specified in `compat_args` 

162 """ 

163 kwds = kwargs.copy() 

164 _check_for_invalid_keys(fname, kwargs, compat_args) 

165 _check_for_default_values(fname, kwds, compat_args) 

166 

167 

168def validate_args_and_kwargs( 

169 fname, args, kwargs, max_fname_arg_count, compat_args 

170) -> None: 

171 """ 

172 Checks whether parameters passed to the *args and **kwargs argument in a 

173 function `fname` are valid parameters as specified in `*compat_args` 

174 and whether or not they are set to their default values. 

175 

176 Parameters 

177 ---------- 

178 fname: str 

179 The name of the function being passed the `**kwargs` parameter 

180 args: tuple 

181 The `*args` parameter passed into a function 

182 kwargs: dict 

183 The `**kwargs` parameter passed into `fname` 

184 max_fname_arg_count: int 

185 The minimum number of arguments that the function `fname` 

186 requires, excluding those in `args`. Used for displaying 

187 appropriate error messages. Must be non-negative. 

188 compat_args: dict 

189 A dictionary of keys that `kwargs` is allowed to 

190 have and their associated default values. 

191 

192 Raises 

193 ------ 

194 TypeError if `args` contains more values than there are 

195 `compat_args` OR `kwargs` contains keys not in `compat_args` 

196 ValueError if `args` contains values not at the default value (`None`) 

197 `kwargs` contains keys in `compat_args` that do not map to the default 

198 value as specified in `compat_args` 

199 

200 See Also 

201 -------- 

202 validate_args : Purely args validation. 

203 validate_kwargs : Purely kwargs validation. 

204 

205 """ 

206 # Check that the total number of arguments passed in (i.e. 

207 # args and kwargs) does not exceed the length of compat_args 

208 _check_arg_length( 

209 fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args 

210 ) 

211 

212 # Check there is no overlap with the positional and keyword 

213 # arguments, similar to what is done in actual Python functions 

214 args_dict = dict(zip(compat_args, args)) 

215 

216 for key in args_dict: 

217 if key in kwargs: 

218 raise TypeError( 

219 f"{fname}() got multiple values for keyword argument '{key}'" 

220 ) 

221 

222 kwargs.update(args_dict) 

223 validate_kwargs(fname, kwargs, compat_args) 

224 

225 

226def validate_bool_kwarg( 

227 value: BoolishNoneT, arg_name, none_allowed=True, int_allowed=False 

228) -> BoolishNoneT: 

229 """ 

230 Ensure that argument passed in arg_name can be interpreted as boolean. 

231 

232 Parameters 

233 ---------- 

234 value : bool 

235 Value to be validated. 

236 arg_name : str 

237 Name of the argument. To be reflected in the error message. 

238 none_allowed : bool, default True 

239 Whether to consider None to be a valid boolean. 

240 int_allowed : bool, default False 

241 Whether to consider integer value to be a valid boolean. 

242 

243 Returns 

244 ------- 

245 value 

246 The same value as input. 

247 

248 Raises 

249 ------ 

250 ValueError 

251 If the value is not a valid boolean. 

252 """ 

253 good_value = is_bool(value) 

254 if none_allowed: 

255 good_value = good_value or value is None 

256 

257 if int_allowed: 

258 good_value = good_value or isinstance(value, int) 

259 

260 if not good_value: 

261 raise ValueError( 

262 f'For argument "{arg_name}" expected type bool, received ' 

263 f"type {type(value).__name__}." 

264 ) 

265 return value 

266 

267 

268def validate_axis_style_args( 

269 data, args, kwargs, arg_name, method_name 

270) -> dict[str, Any]: 

271 """ 

272 Argument handler for mixed index, columns / axis functions 

273 

274 In an attempt to handle both `.method(index, columns)`, and 

275 `.method(arg, axis=.)`, we have to do some bad things to argument 

276 parsing. This translates all arguments to `{index=., columns=.}` style. 

277 

278 Parameters 

279 ---------- 

280 data : DataFrame 

281 args : tuple 

282 All positional arguments from the user 

283 kwargs : dict 

284 All keyword arguments from the user 

285 arg_name, method_name : str 

286 Used for better error messages 

287 

288 Returns 

289 ------- 

290 kwargs : dict 

291 A dictionary of keyword arguments. Doesn't modify ``kwargs`` 

292 inplace, so update them with the return value here. 

293 

294 Examples 

295 -------- 

296 >>> df = pd.DataFrame(range(2)) 

297 >>> validate_axis_style_args(df, (str.upper,), {'columns': id}, 

298 ... 'mapper', 'rename') 

299 {'columns': <built-in function id>, 'index': <method 'upper' of 'str' objects>} 

300 

301 This emits a warning 

302 >>> validate_axis_style_args(df, (str.upper, id), {}, 

303 ... 'mapper', 'rename') 

304 {'index': <method 'upper' of 'str' objects>, 'columns': <built-in function id>} 

305 """ 

306 # TODO: Change to keyword-only args and remove all this 

307 

308 out = {} 

309 # Goal: fill 'out' with index/columns-style arguments 

310 # like out = {'index': foo, 'columns': bar} 

311 

312 # Start by validating for consistency 

313 if "axis" in kwargs and any(x in kwargs for x in data._AXIS_TO_AXIS_NUMBER): 

314 msg = "Cannot specify both 'axis' and any of 'index' or 'columns'." 

315 raise TypeError(msg) 

316 

317 # First fill with explicit values provided by the user... 

318 if arg_name in kwargs: 

319 if args: 

320 msg = f"{method_name} got multiple values for argument '{arg_name}'" 

321 raise TypeError(msg) 

322 

323 axis = data._get_axis_name(kwargs.get("axis", 0)) 

324 out[axis] = kwargs[arg_name] 

325 

326 # More user-provided arguments, now from kwargs 

327 for k, v in kwargs.items(): 

328 try: 

329 ax = data._get_axis_name(k) 

330 except ValueError: 

331 pass 

332 else: 

333 out[ax] = v 

334 

335 # All user-provided kwargs have been handled now. 

336 # Now we supplement with positional arguments, emitting warnings 

337 # when there's ambiguity and raising when there's conflicts 

338 

339 if len(args) == 0: 

340 pass # It's up to the function to decide if this is valid 

341 elif len(args) == 1: 

342 axis = data._get_axis_name(kwargs.get("axis", 0)) 

343 out[axis] = args[0] 

344 elif len(args) == 2: 

345 if "axis" in kwargs: 

346 # Unambiguously wrong 

347 msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" 

348 raise TypeError(msg) 

349 

350 msg = ( 

351 f"Interpreting call\n\t'.{method_name}(a, b)' as " 

352 f"\n\t'.{method_name}(index=a, columns=b)'.\nUse named " 

353 "arguments to remove any ambiguity. In the future, using " 

354 "positional arguments for 'index' or 'columns' will raise " 

355 "a 'TypeError'." 

356 ) 

357 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) 

358 out[data._get_axis_name(0)] = args[0] 

359 out[data._get_axis_name(1)] = args[1] 

360 else: 

361 msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'." 

362 raise TypeError(msg) 

363 return out 

364 

365 

366def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): 

367 """ 

368 Validate the keyword arguments to 'fillna'. 

369 

370 This checks that exactly one of 'value' and 'method' is specified. 

371 If 'method' is specified, this validates that it's a valid method. 

372 

373 Parameters 

374 ---------- 

375 value, method : object 

376 The 'value' and 'method' keyword arguments for 'fillna'. 

377 validate_scalar_dict_value : bool, default True 

378 Whether to validate that 'value' is a scalar or dict. Specifically, 

379 validate that it is not a list or tuple. 

380 

381 Returns 

382 ------- 

383 value, method : object 

384 """ 

385 from pandas.core.missing import clean_fill_method 

386 

387 if value is None and method is None: 

388 raise ValueError("Must specify a fill 'value' or 'method'.") 

389 elif value is None and method is not None: 

390 method = clean_fill_method(method) 

391 

392 elif value is not None and method is None: 

393 if validate_scalar_dict_value and isinstance(value, (list, tuple)): 

394 raise TypeError( 

395 '"value" parameter must be a scalar or dict, but ' 

396 f'you passed a "{type(value).__name__}"' 

397 ) 

398 

399 elif value is not None and method is not None: 

400 raise ValueError("Cannot specify both 'value' and 'method'.") 

401 

402 return value, method 

403 

404 

405def validate_percentile(q: float | Iterable[float]) -> np.ndarray: 

406 """ 

407 Validate percentiles (used by describe and quantile). 

408 

409 This function checks if the given float or iterable of floats is a valid percentile 

410 otherwise raises a ValueError. 

411 

412 Parameters 

413 ---------- 

414 q: float or iterable of floats 

415 A single percentile or an iterable of percentiles. 

416 

417 Returns 

418 ------- 

419 ndarray 

420 An ndarray of the percentiles if valid. 

421 

422 Raises 

423 ------ 

424 ValueError if percentiles are not in given interval([0, 1]). 

425 """ 

426 q_arr = np.asarray(q) 

427 # Don't change this to an f-string. The string formatting 

428 # is too expensive for cases where we don't need it. 

429 msg = "percentiles should all be in the interval [0, 1]. Try {} instead." 

430 if q_arr.ndim == 0: 

431 if not 0 <= q_arr <= 1: 

432 raise ValueError(msg.format(q_arr / 100.0)) 

433 else: 

434 if not all(0 <= qs <= 1 for qs in q_arr): 

435 raise ValueError(msg.format(q_arr / 100.0)) 

436 return q_arr 

437 

438 

439@overload 

440def validate_ascending(ascending: BoolishT) -> BoolishT: 

441 ... 

442 

443 

444@overload 

445def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]: 

446 ... 

447 

448 

449def validate_ascending( 

450 ascending: bool | int | Sequence[BoolishT], 

451) -> bool | int | list[BoolishT]: 

452 """Validate ``ascending`` kwargs for ``sort_index`` method.""" 

453 kwargs = {"none_allowed": False, "int_allowed": True} 

454 if not isinstance(ascending, Sequence): 

455 return validate_bool_kwarg(ascending, "ascending", **kwargs) 

456 

457 return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] 

458 

459 

460def validate_endpoints(closed: str | None) -> tuple[bool, bool]: 

461 """ 

462 Check that the `closed` argument is among [None, "left", "right"] 

463 

464 Parameters 

465 ---------- 

466 closed : {None, "left", "right"} 

467 

468 Returns 

469 ------- 

470 left_closed : bool 

471 right_closed : bool 

472 

473 Raises 

474 ------ 

475 ValueError : if argument is not among valid values 

476 """ 

477 left_closed = False 

478 right_closed = False 

479 

480 if closed is None: 

481 left_closed = True 

482 right_closed = True 

483 elif closed == "left": 

484 left_closed = True 

485 elif closed == "right": 

486 right_closed = True 

487 else: 

488 raise ValueError("Closed has to be either 'left', 'right' or None") 

489 

490 return left_closed, right_closed 

491 

492 

493def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]: 

494 """ 

495 Check that the `inclusive` argument is among {"both", "neither", "left", "right"}. 

496 

497 Parameters 

498 ---------- 

499 inclusive : {"both", "neither", "left", "right"} 

500 

501 Returns 

502 ------- 

503 left_right_inclusive : tuple[bool, bool] 

504 

505 Raises 

506 ------ 

507 ValueError : if argument is not among valid values 

508 """ 

509 left_right_inclusive: tuple[bool, bool] | None = None 

510 

511 if isinstance(inclusive, str): 

512 left_right_inclusive = { 

513 "both": (True, True), 

514 "left": (True, False), 

515 "right": (False, True), 

516 "neither": (False, False), 

517 }.get(inclusive) 

518 

519 if left_right_inclusive is None: 

520 raise ValueError( 

521 "Inclusive has to be either 'both', 'neither', 'left' or 'right'" 

522 ) 

523 

524 return left_right_inclusive 

525 

526 

527def validate_insert_loc(loc: int, length: int) -> int: 

528 """ 

529 Check that we have an integer between -length and length, inclusive. 

530 

531 Standardize negative loc to within [0, length]. 

532 

533 The exceptions we raise on failure match np.insert. 

534 """ 

535 if not is_integer(loc): 

536 raise TypeError(f"loc must be an integer between -{length} and {length}") 

537 

538 if loc < 0: 

539 loc += length 

540 if not 0 <= loc <= length: 

541 raise IndexError(f"loc must be an integer between -{length} and {length}") 

542 return loc