Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/common.py: 19%

207 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Misc tools for implementing data structures 

3 

4Note: pandas.core.common is *not* part of the public API. 

5""" 

6from __future__ import annotations 

7 

8import builtins 

9from collections import ( 

10 abc, 

11 defaultdict, 

12) 

13import contextlib 

14from functools import partial 

15import inspect 

16from typing import ( 

17 TYPE_CHECKING, 

18 Any, 

19 Callable, 

20 Collection, 

21 Hashable, 

22 Iterable, 

23 Iterator, 

24 Sequence, 

25 cast, 

26 overload, 

27) 

28import warnings 

29 

30import numpy as np 

31 

32from pandas._libs import lib 

33from pandas._typing import ( 

34 AnyArrayLike, 

35 ArrayLike, 

36 NpDtype, 

37 RandomState, 

38 T, 

39) 

40from pandas.util._exceptions import find_stack_level 

41 

42from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike 

43from pandas.core.dtypes.common import ( 

44 is_array_like, 

45 is_bool_dtype, 

46 is_extension_array_dtype, 

47 is_integer, 

48) 

49from pandas.core.dtypes.generic import ( 

50 ABCExtensionArray, 

51 ABCIndex, 

52 ABCSeries, 

53) 

54from pandas.core.dtypes.inference import iterable_not_string 

55from pandas.core.dtypes.missing import isna 

56 

57if TYPE_CHECKING: 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true

58 from pandas import Index 

59 

60 

61def flatten(line): 

62 """ 

63 Flatten an arbitrarily nested sequence. 

64 

65 Parameters 

66 ---------- 

67 line : sequence 

68 The non string sequence to flatten 

69 

70 Notes 

71 ----- 

72 This doesn't consider strings sequences. 

73 

74 Returns 

75 ------- 

76 flattened : generator 

77 """ 

78 for element in line: 

79 if iterable_not_string(element): 

80 yield from flatten(element) 

81 else: 

82 yield element 

83 

84 

85def consensus_name_attr(objs): 

86 name = objs[0].name 

87 for obj in objs[1:]: 

88 try: 

89 if obj.name != name: 

90 name = None 

91 except ValueError: 

92 name = None 

93 return name 

94 

95 

96def is_bool_indexer(key: Any) -> bool: 

97 """ 

98 Check whether `key` is a valid boolean indexer. 

99 

100 Parameters 

101 ---------- 

102 key : Any 

103 Only list-likes may be considered boolean indexers. 

104 All other types are not considered a boolean indexer. 

105 For array-like input, boolean ndarrays or ExtensionArrays 

106 with ``_is_boolean`` set are considered boolean indexers. 

107 

108 Returns 

109 ------- 

110 bool 

111 Whether `key` is a valid boolean indexer. 

112 

113 Raises 

114 ------ 

115 ValueError 

116 When the array is an object-dtype ndarray or ExtensionArray 

117 and contains missing values. 

118 

119 See Also 

120 -------- 

121 check_array_indexer : Check that `key` is a valid array to index, 

122 and convert to an ndarray. 

123 """ 

124 if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( 

125 is_array_like(key) and is_extension_array_dtype(key.dtype) 

126 ): 

127 if key.dtype == np.object_: 

128 key_array = np.asarray(key) 

129 

130 if not lib.is_bool_array(key_array): 

131 na_msg = "Cannot mask with non-boolean array containing NA / NaN values" 

132 if lib.infer_dtype(key_array) == "boolean" and isna(key_array).any(): 

133 # Don't raise on e.g. ["A", "B", np.nan], see 

134 # test_loc_getitem_list_of_labels_categoricalindex_with_na 

135 raise ValueError(na_msg) 

136 return False 

137 return True 

138 elif is_bool_dtype(key.dtype): 

139 return True 

140 elif isinstance(key, list): 

141 # check if np.array(key).dtype would be bool 

142 if len(key) > 0: 

143 if type(key) is not list: 

144 # GH#42461 cython will raise TypeError if we pass a subclass 

145 key = list(key) 

146 return lib.is_bool_list(key) 

147 

148 return False 

149 

150 

151def cast_scalar_indexer(val, warn_float: bool = False): 

152 """ 

153 To avoid numpy DeprecationWarnings, cast float to integer where valid. 

154 

155 Parameters 

156 ---------- 

157 val : scalar 

158 warn_float : bool, default False 

159 If True, issue deprecation warning for a float indexer. 

160 

161 Returns 

162 ------- 

163 outval : scalar 

164 """ 

165 # assumes lib.is_scalar(val) 

166 if lib.is_float(val) and val.is_integer(): 

167 if warn_float: 

168 warnings.warn( 

169 "Indexing with a float is deprecated, and will raise an IndexError " 

170 "in pandas 2.0. You can manually convert to an integer key instead.", 

171 FutureWarning, 

172 stacklevel=find_stack_level(), 

173 ) 

174 return int(val) 

175 return val 

176 

177 

178def not_none(*args): 

179 """ 

180 Returns a generator consisting of the arguments that are not None. 

181 """ 

182 return (arg for arg in args if arg is not None) 

183 

184 

185def any_none(*args) -> bool: 

186 """ 

187 Returns a boolean indicating if any argument is None. 

188 """ 

189 return any(arg is None for arg in args) 

190 

191 

192def all_none(*args) -> bool: 

193 """ 

194 Returns a boolean indicating if all arguments are None. 

195 """ 

196 return all(arg is None for arg in args) 

197 

198 

199def any_not_none(*args) -> bool: 

200 """ 

201 Returns a boolean indicating if any argument is not None. 

202 """ 

203 return any(arg is not None for arg in args) 

204 

205 

206def all_not_none(*args) -> bool: 

207 """ 

208 Returns a boolean indicating if all arguments are not None. 

209 """ 

210 return all(arg is not None for arg in args) 

211 

212 

213def count_not_none(*args) -> int: 

214 """ 

215 Returns the count of arguments that are not None. 

216 """ 

217 return sum(x is not None for x in args) 

218 

219 

220@overload 

221def asarray_tuplesafe( 

222 values: ArrayLike | list | tuple | zip, dtype: NpDtype | None = ... 

223) -> np.ndarray: 

224 # ExtensionArray can only be returned when values is an Index, all other iterables 

225 # will return np.ndarray. Unfortunately "all other" cannot be encoded in a type 

226 # signature, so instead we special-case some common types. 

227 ... 

228 

229 

230@overload 

231def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike: 

232 ... 

233 

234 

235def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike: 

236 

237 if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")): 

238 values = list(values) 

239 elif isinstance(values, ABCIndex): 

240 return values._values 

241 

242 if isinstance(values, list) and dtype in [np.object_, object]: 

243 return construct_1d_object_array_from_listlike(values) 

244 

245 result = np.asarray(values, dtype=dtype) 

246 

247 if issubclass(result.dtype.type, str): 

248 result = np.asarray(values, dtype=object) 

249 

250 if result.ndim == 2: 

251 # Avoid building an array of arrays: 

252 values = [tuple(x) for x in values] 

253 result = construct_1d_object_array_from_listlike(values) 

254 

255 return result 

256 

257 

258def index_labels_to_array( 

259 labels: np.ndarray | Iterable, dtype: NpDtype | None = None 

260) -> np.ndarray: 

261 """ 

262 Transform label or iterable of labels to array, for use in Index. 

263 

264 Parameters 

265 ---------- 

266 dtype : dtype 

267 If specified, use as dtype of the resulting array, otherwise infer. 

268 

269 Returns 

270 ------- 

271 array 

272 """ 

273 if isinstance(labels, (str, tuple)): 

274 labels = [labels] 

275 

276 if not isinstance(labels, (list, np.ndarray)): 

277 try: 

278 labels = list(labels) 

279 except TypeError: # non-iterable 

280 labels = [labels] 

281 

282 labels = asarray_tuplesafe(labels, dtype=dtype) 

283 

284 return labels 

285 

286 

287def maybe_make_list(obj): 

288 if obj is not None and not isinstance(obj, (tuple, list)): 

289 return [obj] 

290 return obj 

291 

292 

293def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T: 

294 """ 

295 If obj is Iterable but not list-like, consume into list. 

296 """ 

297 if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): 

298 return list(obj) 

299 obj = cast(Collection, obj) 

300 return obj 

301 

302 

303def is_null_slice(obj) -> bool: 

304 """ 

305 We have a null slice. 

306 """ 

307 return ( 

308 isinstance(obj, slice) 

309 and obj.start is None 

310 and obj.stop is None 

311 and obj.step is None 

312 ) 

313 

314 

315def is_true_slices(line) -> list[bool]: 

316 """ 

317 Find non-trivial slices in "line": return a list of booleans with same length. 

318 """ 

319 return [isinstance(k, slice) and not is_null_slice(k) for k in line] 

320 

321 

322# TODO: used only once in indexing; belongs elsewhere? 

323def is_full_slice(obj, line: int) -> bool: 

324 """ 

325 We have a full length slice. 

326 """ 

327 return ( 

328 isinstance(obj, slice) 

329 and obj.start == 0 

330 and obj.stop == line 

331 and obj.step is None 

332 ) 

333 

334 

335def get_callable_name(obj): 

336 # typical case has name 

337 if hasattr(obj, "__name__"): 

338 return getattr(obj, "__name__") 

339 # some objects don't; could recurse 

340 if isinstance(obj, partial): 

341 return get_callable_name(obj.func) 

342 # fall back to class name 

343 if callable(obj): 

344 return type(obj).__name__ 

345 # everything failed (probably because the argument 

346 # wasn't actually callable); we return None 

347 # instead of the empty string in this case to allow 

348 # distinguishing between no name and a name of '' 

349 return None 

350 

351 

352def apply_if_callable(maybe_callable, obj, **kwargs): 

353 """ 

354 Evaluate possibly callable input using obj and kwargs if it is callable, 

355 otherwise return as it is. 

356 

357 Parameters 

358 ---------- 

359 maybe_callable : possibly a callable 

360 obj : NDFrame 

361 **kwargs 

362 """ 

363 if callable(maybe_callable): 

364 return maybe_callable(obj, **kwargs) 

365 

366 return maybe_callable 

367 

368 

369def standardize_mapping(into): 

370 """ 

371 Helper function to standardize a supplied mapping. 

372 

373 Parameters 

374 ---------- 

375 into : instance or subclass of collections.abc.Mapping 

376 Must be a class, an initialized collections.defaultdict, 

377 or an instance of a collections.abc.Mapping subclass. 

378 

379 Returns 

380 ------- 

381 mapping : a collections.abc.Mapping subclass or other constructor 

382 a callable object that can accept an iterator to create 

383 the desired Mapping. 

384 

385 See Also 

386 -------- 

387 DataFrame.to_dict 

388 Series.to_dict 

389 """ 

390 if not inspect.isclass(into): 

391 if isinstance(into, defaultdict): 

392 return partial(defaultdict, into.default_factory) 

393 into = type(into) 

394 if not issubclass(into, abc.Mapping): 

395 raise TypeError(f"unsupported type: {into}") 

396 elif into == defaultdict: 

397 raise TypeError("to_dict() only accepts initialized defaultdicts") 

398 return into 

399 

400 

401@overload 

402def random_state(state: np.random.Generator) -> np.random.Generator: 

403 ... 

404 

405 

406@overload 

407def random_state( 

408 state: int | ArrayLike | np.random.BitGenerator | np.random.RandomState | None, 

409) -> np.random.RandomState: 

410 ... 

411 

412 

413def random_state(state: RandomState | None = None): 

414 """ 

415 Helper function for processing random_state arguments. 

416 

417 Parameters 

418 ---------- 

419 state : int, array-like, BitGenerator, Generator, np.random.RandomState, None. 

420 If receives an int, array-like, or BitGenerator, passes to 

421 np.random.RandomState() as seed. 

422 If receives an np.random RandomState or Generator, just returns that unchanged. 

423 If receives `None`, returns np.random. 

424 If receives anything else, raises an informative ValueError. 

425 

426 .. versionchanged:: 1.1.0 

427 

428 array-like and BitGenerator object now passed to np.random.RandomState() 

429 as seed 

430 

431 Default None. 

432 

433 Returns 

434 ------- 

435 np.random.RandomState or np.random.Generator. If state is None, returns np.random 

436 

437 """ 

438 if ( 

439 is_integer(state) 

440 or is_array_like(state) 

441 or isinstance(state, np.random.BitGenerator) 

442 ): 

443 # error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int, 

444 # Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected 

445 # "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]], 

446 # Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]], 

447 # Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]], 

448 # Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, 

449 # integer[Any]]]]]]], 

450 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, 

451 # integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]], 

452 # Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool, 

453 # int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]], 

454 # BitGenerator]" 

455 return np.random.RandomState(state) # type: ignore[arg-type] 

456 elif isinstance(state, np.random.RandomState): 

457 return state 

458 elif isinstance(state, np.random.Generator): 

459 return state 

460 elif state is None: 

461 return np.random 

462 else: 

463 raise ValueError( 

464 "random_state must be an integer, array-like, a BitGenerator, Generator, " 

465 "a numpy RandomState, or None" 

466 ) 

467 

468 

469def pipe( 

470 obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs 

471) -> T: 

472 """ 

473 Apply a function ``func`` to object ``obj`` either by passing obj as the 

474 first argument to the function or, in the case that the func is a tuple, 

475 interpret the first element of the tuple as a function and pass the obj to 

476 that function as a keyword argument whose key is the value of the second 

477 element of the tuple. 

478 

479 Parameters 

480 ---------- 

481 func : callable or tuple of (callable, str) 

482 Function to apply to this object or, alternatively, a 

483 ``(callable, data_keyword)`` tuple where ``data_keyword`` is a 

484 string indicating the keyword of ``callable`` that expects the 

485 object. 

486 *args : iterable, optional 

487 Positional arguments passed into ``func``. 

488 **kwargs : dict, optional 

489 A dictionary of keyword arguments passed into ``func``. 

490 

491 Returns 

492 ------- 

493 object : the return type of ``func``. 

494 """ 

495 if isinstance(func, tuple): 

496 func, target = func 

497 if target in kwargs: 

498 msg = f"{target} is both the pipe target and a keyword argument" 

499 raise ValueError(msg) 

500 kwargs[target] = obj 

501 return func(*args, **kwargs) 

502 else: 

503 return func(obj, *args, **kwargs) 

504 

505 

506def get_rename_function(mapper): 

507 """ 

508 Returns a function that will map names/labels, dependent if mapper 

509 is a dict, Series or just a function. 

510 """ 

511 

512 def f(x): 

513 if x in mapper: 

514 return mapper[x] 

515 else: 

516 return x 

517 

518 return f if isinstance(mapper, (abc.Mapping, ABCSeries)) else mapper 

519 

520 

521def convert_to_list_like( 

522 values: Hashable | Iterable | AnyArrayLike, 

523) -> list | AnyArrayLike: 

524 """ 

525 Convert list-like or scalar input to list-like. List, numpy and pandas array-like 

526 inputs are returned unmodified whereas others are converted to list. 

527 """ 

528 if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): 

529 return values 

530 elif isinstance(values, abc.Iterable) and not isinstance(values, str): 

531 return list(values) 

532 

533 return [values] 

534 

535 

536@contextlib.contextmanager 

537def temp_setattr(obj, attr: str, value) -> Iterator[None]: 

538 """Temporarily set attribute on an object. 

539 

540 Args: 

541 obj: Object whose attribute will be modified. 

542 attr: Attribute to modify. 

543 value: Value to temporarily set attribute to. 

544 

545 Yields: 

546 obj with modified attribute. 

547 """ 

548 old_value = getattr(obj, attr) 

549 setattr(obj, attr, value) 

550 try: 

551 yield obj 

552 finally: 

553 setattr(obj, attr, old_value) 

554 

555 

556def require_length_match(data, index: Index) -> None: 

557 """ 

558 Check the length of data matches the length of the index. 

559 """ 

560 if len(data) != len(index): 

561 raise ValueError( 

562 "Length of values " 

563 f"({len(data)}) " 

564 "does not match length of index " 

565 f"({len(index)})" 

566 ) 

567 

568 

569# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0, 

570# whereas np.min and np.max (which directly call obj.min and obj.max) 

571# default to axis=None. 

572_builtin_table = { 

573 builtins.sum: np.sum, 

574 builtins.max: np.maximum.reduce, 

575 builtins.min: np.minimum.reduce, 

576} 

577 

578_cython_table = { 

579 builtins.sum: "sum", 

580 builtins.max: "max", 

581 builtins.min: "min", 

582 np.all: "all", 

583 np.any: "any", 

584 np.sum: "sum", 

585 np.nansum: "sum", 

586 np.mean: "mean", 

587 np.nanmean: "mean", 

588 np.prod: "prod", 

589 np.nanprod: "prod", 

590 np.std: "std", 

591 np.nanstd: "std", 

592 np.var: "var", 

593 np.nanvar: "var", 

594 np.median: "median", 

595 np.nanmedian: "median", 

596 np.max: "max", 

597 np.nanmax: "max", 

598 np.min: "min", 

599 np.nanmin: "min", 

600 np.cumprod: "cumprod", 

601 np.nancumprod: "cumprod", 

602 np.cumsum: "cumsum", 

603 np.nancumsum: "cumsum", 

604} 

605 

606 

607def get_cython_func(arg: Callable) -> str | None: 

608 """ 

609 if we define an internal function for this argument, return it 

610 """ 

611 return _cython_table.get(arg) 

612 

613 

614def is_builtin_func(arg): 

615 """ 

616 if we define a builtin function for this argument, return it, 

617 otherwise return the arg 

618 """ 

619 return _builtin_table.get(arg, arg) 

620 

621 

622def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]: 

623 """ 

624 If a name is missing then replace it by level_n, where n is the count 

625 

626 .. versionadded:: 1.4.0 

627 

628 Parameters 

629 ---------- 

630 names : list-like 

631 list of column names or None values. 

632 

633 Returns 

634 ------- 

635 list 

636 list of column names with the None values replaced. 

637 """ 

638 return [f"level_{i}" if name is None else name for i, name in enumerate(names)] 

639 

640 

641def resolve_numeric_only(numeric_only: bool | None | lib.NoDefault) -> bool: 

642 """Determine the Boolean value of numeric_only. 

643 

644 See GH#46560 for details on the deprecation. 

645 

646 Parameters 

647 ---------- 

648 numeric_only : bool, None, or lib.no_default 

649 Value passed to the method. 

650 

651 Returns 

652 ------- 

653 Resolved value of numeric_only. 

654 """ 

655 if numeric_only is lib.no_default: 

656 # Methods that behave like numeric_only=True and only got the numeric_only 

657 # arg in 1.5.0 default to lib.no_default 

658 result = True 

659 elif numeric_only is None: 

660 # Methods that had the numeric_only arg prior to 1.5.0 and try all columns 

661 # first default to None 

662 result = False 

663 else: 

664 result = numeric_only 

665 return result 

666 

667 

668def deprecate_numeric_only_default( 

669 cls: type, name: str, deprecate_none: bool = False 

670) -> None: 

671 """Emit FutureWarning message for deprecation of numeric_only. 

672 

673 See GH#46560 for details on the deprecation. 

674 

675 Parameters 

676 ---------- 

677 cls : type 

678 pandas type that is generating the warning. 

679 name : str 

680 Name of the method that is generating the warning. 

681 deprecate_none : bool, default False 

682 Whether to also warn about the deprecation of specifying ``numeric_only=None``. 

683 """ 

684 if name in ["all", "any"]: 

685 arg_name = "bool_only" 

686 else: 

687 arg_name = "numeric_only" 

688 

689 msg = ( 

690 f"The default value of {arg_name} in {cls.__name__}.{name} is " 

691 "deprecated. In a future version, it will default to False. " 

692 ) 

693 if deprecate_none: 

694 msg += f"In addition, specifying '{arg_name}=None' is deprecated. " 

695 msg += ( 

696 f"Select only valid columns or specify the value of {arg_name} to silence " 

697 "this warning." 

698 ) 

699 

700 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())