Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/common.py: 19%
207 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Misc tools for implementing data structures
4Note: pandas.core.common is *not* part of the public API.
5"""
6from __future__ import annotations
8import builtins
9from collections import (
10 abc,
11 defaultdict,
12)
13import contextlib
14from functools import partial
15import inspect
16from typing import (
17 TYPE_CHECKING,
18 Any,
19 Callable,
20 Collection,
21 Hashable,
22 Iterable,
23 Iterator,
24 Sequence,
25 cast,
26 overload,
27)
28import warnings
30import numpy as np
32from pandas._libs import lib
33from pandas._typing import (
34 AnyArrayLike,
35 ArrayLike,
36 NpDtype,
37 RandomState,
38 T,
39)
40from pandas.util._exceptions import find_stack_level
42from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
43from pandas.core.dtypes.common import (
44 is_array_like,
45 is_bool_dtype,
46 is_extension_array_dtype,
47 is_integer,
48)
49from pandas.core.dtypes.generic import (
50 ABCExtensionArray,
51 ABCIndex,
52 ABCSeries,
53)
54from pandas.core.dtypes.inference import iterable_not_string
55from pandas.core.dtypes.missing import isna
57if TYPE_CHECKING: 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true
58 from pandas import Index
61def flatten(line):
62 """
63 Flatten an arbitrarily nested sequence.
65 Parameters
66 ----------
67 line : sequence
68 The non string sequence to flatten
70 Notes
71 -----
72 This doesn't consider strings sequences.
74 Returns
75 -------
76 flattened : generator
77 """
78 for element in line:
79 if iterable_not_string(element):
80 yield from flatten(element)
81 else:
82 yield element
85def consensus_name_attr(objs):
86 name = objs[0].name
87 for obj in objs[1:]:
88 try:
89 if obj.name != name:
90 name = None
91 except ValueError:
92 name = None
93 return name
96def is_bool_indexer(key: Any) -> bool:
97 """
98 Check whether `key` is a valid boolean indexer.
100 Parameters
101 ----------
102 key : Any
103 Only list-likes may be considered boolean indexers.
104 All other types are not considered a boolean indexer.
105 For array-like input, boolean ndarrays or ExtensionArrays
106 with ``_is_boolean`` set are considered boolean indexers.
108 Returns
109 -------
110 bool
111 Whether `key` is a valid boolean indexer.
113 Raises
114 ------
115 ValueError
116 When the array is an object-dtype ndarray or ExtensionArray
117 and contains missing values.
119 See Also
120 --------
121 check_array_indexer : Check that `key` is a valid array to index,
122 and convert to an ndarray.
123 """
124 if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (
125 is_array_like(key) and is_extension_array_dtype(key.dtype)
126 ):
127 if key.dtype == np.object_:
128 key_array = np.asarray(key)
130 if not lib.is_bool_array(key_array):
131 na_msg = "Cannot mask with non-boolean array containing NA / NaN values"
132 if lib.infer_dtype(key_array) == "boolean" and isna(key_array).any():
133 # Don't raise on e.g. ["A", "B", np.nan], see
134 # test_loc_getitem_list_of_labels_categoricalindex_with_na
135 raise ValueError(na_msg)
136 return False
137 return True
138 elif is_bool_dtype(key.dtype):
139 return True
140 elif isinstance(key, list):
141 # check if np.array(key).dtype would be bool
142 if len(key) > 0:
143 if type(key) is not list:
144 # GH#42461 cython will raise TypeError if we pass a subclass
145 key = list(key)
146 return lib.is_bool_list(key)
148 return False
151def cast_scalar_indexer(val, warn_float: bool = False):
152 """
153 To avoid numpy DeprecationWarnings, cast float to integer where valid.
155 Parameters
156 ----------
157 val : scalar
158 warn_float : bool, default False
159 If True, issue deprecation warning for a float indexer.
161 Returns
162 -------
163 outval : scalar
164 """
165 # assumes lib.is_scalar(val)
166 if lib.is_float(val) and val.is_integer():
167 if warn_float:
168 warnings.warn(
169 "Indexing with a float is deprecated, and will raise an IndexError "
170 "in pandas 2.0. You can manually convert to an integer key instead.",
171 FutureWarning,
172 stacklevel=find_stack_level(),
173 )
174 return int(val)
175 return val
178def not_none(*args):
179 """
180 Returns a generator consisting of the arguments that are not None.
181 """
182 return (arg for arg in args if arg is not None)
185def any_none(*args) -> bool:
186 """
187 Returns a boolean indicating if any argument is None.
188 """
189 return any(arg is None for arg in args)
192def all_none(*args) -> bool:
193 """
194 Returns a boolean indicating if all arguments are None.
195 """
196 return all(arg is None for arg in args)
199def any_not_none(*args) -> bool:
200 """
201 Returns a boolean indicating if any argument is not None.
202 """
203 return any(arg is not None for arg in args)
206def all_not_none(*args) -> bool:
207 """
208 Returns a boolean indicating if all arguments are not None.
209 """
210 return all(arg is not None for arg in args)
213def count_not_none(*args) -> int:
214 """
215 Returns the count of arguments that are not None.
216 """
217 return sum(x is not None for x in args)
220@overload
221def asarray_tuplesafe(
222 values: ArrayLike | list | tuple | zip, dtype: NpDtype | None = ...
223) -> np.ndarray:
224 # ExtensionArray can only be returned when values is an Index, all other iterables
225 # will return np.ndarray. Unfortunately "all other" cannot be encoded in a type
226 # signature, so instead we special-case some common types.
227 ...
230@overload
231def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike:
232 ...
235def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike:
237 if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")):
238 values = list(values)
239 elif isinstance(values, ABCIndex):
240 return values._values
242 if isinstance(values, list) and dtype in [np.object_, object]:
243 return construct_1d_object_array_from_listlike(values)
245 result = np.asarray(values, dtype=dtype)
247 if issubclass(result.dtype.type, str):
248 result = np.asarray(values, dtype=object)
250 if result.ndim == 2:
251 # Avoid building an array of arrays:
252 values = [tuple(x) for x in values]
253 result = construct_1d_object_array_from_listlike(values)
255 return result
258def index_labels_to_array(
259 labels: np.ndarray | Iterable, dtype: NpDtype | None = None
260) -> np.ndarray:
261 """
262 Transform label or iterable of labels to array, for use in Index.
264 Parameters
265 ----------
266 dtype : dtype
267 If specified, use as dtype of the resulting array, otherwise infer.
269 Returns
270 -------
271 array
272 """
273 if isinstance(labels, (str, tuple)):
274 labels = [labels]
276 if not isinstance(labels, (list, np.ndarray)):
277 try:
278 labels = list(labels)
279 except TypeError: # non-iterable
280 labels = [labels]
282 labels = asarray_tuplesafe(labels, dtype=dtype)
284 return labels
287def maybe_make_list(obj):
288 if obj is not None and not isinstance(obj, (tuple, list)):
289 return [obj]
290 return obj
293def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T:
294 """
295 If obj is Iterable but not list-like, consume into list.
296 """
297 if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized):
298 return list(obj)
299 obj = cast(Collection, obj)
300 return obj
303def is_null_slice(obj) -> bool:
304 """
305 We have a null slice.
306 """
307 return (
308 isinstance(obj, slice)
309 and obj.start is None
310 and obj.stop is None
311 and obj.step is None
312 )
315def is_true_slices(line) -> list[bool]:
316 """
317 Find non-trivial slices in "line": return a list of booleans with same length.
318 """
319 return [isinstance(k, slice) and not is_null_slice(k) for k in line]
322# TODO: used only once in indexing; belongs elsewhere?
323def is_full_slice(obj, line: int) -> bool:
324 """
325 We have a full length slice.
326 """
327 return (
328 isinstance(obj, slice)
329 and obj.start == 0
330 and obj.stop == line
331 and obj.step is None
332 )
335def get_callable_name(obj):
336 # typical case has name
337 if hasattr(obj, "__name__"):
338 return getattr(obj, "__name__")
339 # some objects don't; could recurse
340 if isinstance(obj, partial):
341 return get_callable_name(obj.func)
342 # fall back to class name
343 if callable(obj):
344 return type(obj).__name__
345 # everything failed (probably because the argument
346 # wasn't actually callable); we return None
347 # instead of the empty string in this case to allow
348 # distinguishing between no name and a name of ''
349 return None
352def apply_if_callable(maybe_callable, obj, **kwargs):
353 """
354 Evaluate possibly callable input using obj and kwargs if it is callable,
355 otherwise return as it is.
357 Parameters
358 ----------
359 maybe_callable : possibly a callable
360 obj : NDFrame
361 **kwargs
362 """
363 if callable(maybe_callable):
364 return maybe_callable(obj, **kwargs)
366 return maybe_callable
369def standardize_mapping(into):
370 """
371 Helper function to standardize a supplied mapping.
373 Parameters
374 ----------
375 into : instance or subclass of collections.abc.Mapping
376 Must be a class, an initialized collections.defaultdict,
377 or an instance of a collections.abc.Mapping subclass.
379 Returns
380 -------
381 mapping : a collections.abc.Mapping subclass or other constructor
382 a callable object that can accept an iterator to create
383 the desired Mapping.
385 See Also
386 --------
387 DataFrame.to_dict
388 Series.to_dict
389 """
390 if not inspect.isclass(into):
391 if isinstance(into, defaultdict):
392 return partial(defaultdict, into.default_factory)
393 into = type(into)
394 if not issubclass(into, abc.Mapping):
395 raise TypeError(f"unsupported type: {into}")
396 elif into == defaultdict:
397 raise TypeError("to_dict() only accepts initialized defaultdicts")
398 return into
401@overload
402def random_state(state: np.random.Generator) -> np.random.Generator:
403 ...
406@overload
407def random_state(
408 state: int | ArrayLike | np.random.BitGenerator | np.random.RandomState | None,
409) -> np.random.RandomState:
410 ...
413def random_state(state: RandomState | None = None):
414 """
415 Helper function for processing random_state arguments.
417 Parameters
418 ----------
419 state : int, array-like, BitGenerator, Generator, np.random.RandomState, None.
420 If receives an int, array-like, or BitGenerator, passes to
421 np.random.RandomState() as seed.
422 If receives an np.random RandomState or Generator, just returns that unchanged.
423 If receives `None`, returns np.random.
424 If receives anything else, raises an informative ValueError.
426 .. versionchanged:: 1.1.0
428 array-like and BitGenerator object now passed to np.random.RandomState()
429 as seed
431 Default None.
433 Returns
434 -------
435 np.random.RandomState or np.random.Generator. If state is None, returns np.random
437 """
438 if (
439 is_integer(state)
440 or is_array_like(state)
441 or isinstance(state, np.random.BitGenerator)
442 ):
443 # error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int,
444 # Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected
445 # "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]],
446 # Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]],
447 # Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]],
448 # Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_,
449 # integer[Any]]]]]]],
450 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_,
451 # integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]],
452 # Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool,
453 # int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]],
454 # BitGenerator]"
455 return np.random.RandomState(state) # type: ignore[arg-type]
456 elif isinstance(state, np.random.RandomState):
457 return state
458 elif isinstance(state, np.random.Generator):
459 return state
460 elif state is None:
461 return np.random
462 else:
463 raise ValueError(
464 "random_state must be an integer, array-like, a BitGenerator, Generator, "
465 "a numpy RandomState, or None"
466 )
469def pipe(
470 obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs
471) -> T:
472 """
473 Apply a function ``func`` to object ``obj`` either by passing obj as the
474 first argument to the function or, in the case that the func is a tuple,
475 interpret the first element of the tuple as a function and pass the obj to
476 that function as a keyword argument whose key is the value of the second
477 element of the tuple.
479 Parameters
480 ----------
481 func : callable or tuple of (callable, str)
482 Function to apply to this object or, alternatively, a
483 ``(callable, data_keyword)`` tuple where ``data_keyword`` is a
484 string indicating the keyword of ``callable`` that expects the
485 object.
486 *args : iterable, optional
487 Positional arguments passed into ``func``.
488 **kwargs : dict, optional
489 A dictionary of keyword arguments passed into ``func``.
491 Returns
492 -------
493 object : the return type of ``func``.
494 """
495 if isinstance(func, tuple):
496 func, target = func
497 if target in kwargs:
498 msg = f"{target} is both the pipe target and a keyword argument"
499 raise ValueError(msg)
500 kwargs[target] = obj
501 return func(*args, **kwargs)
502 else:
503 return func(obj, *args, **kwargs)
506def get_rename_function(mapper):
507 """
508 Returns a function that will map names/labels, dependent if mapper
509 is a dict, Series or just a function.
510 """
512 def f(x):
513 if x in mapper:
514 return mapper[x]
515 else:
516 return x
518 return f if isinstance(mapper, (abc.Mapping, ABCSeries)) else mapper
521def convert_to_list_like(
522 values: Hashable | Iterable | AnyArrayLike,
523) -> list | AnyArrayLike:
524 """
525 Convert list-like or scalar input to list-like. List, numpy and pandas array-like
526 inputs are returned unmodified whereas others are converted to list.
527 """
528 if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)):
529 return values
530 elif isinstance(values, abc.Iterable) and not isinstance(values, str):
531 return list(values)
533 return [values]
536@contextlib.contextmanager
537def temp_setattr(obj, attr: str, value) -> Iterator[None]:
538 """Temporarily set attribute on an object.
540 Args:
541 obj: Object whose attribute will be modified.
542 attr: Attribute to modify.
543 value: Value to temporarily set attribute to.
545 Yields:
546 obj with modified attribute.
547 """
548 old_value = getattr(obj, attr)
549 setattr(obj, attr, value)
550 try:
551 yield obj
552 finally:
553 setattr(obj, attr, old_value)
556def require_length_match(data, index: Index) -> None:
557 """
558 Check the length of data matches the length of the index.
559 """
560 if len(data) != len(index):
561 raise ValueError(
562 "Length of values "
563 f"({len(data)}) "
564 "does not match length of index "
565 f"({len(index)})"
566 )
569# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
570# whereas np.min and np.max (which directly call obj.min and obj.max)
571# default to axis=None.
572_builtin_table = {
573 builtins.sum: np.sum,
574 builtins.max: np.maximum.reduce,
575 builtins.min: np.minimum.reduce,
576}
578_cython_table = {
579 builtins.sum: "sum",
580 builtins.max: "max",
581 builtins.min: "min",
582 np.all: "all",
583 np.any: "any",
584 np.sum: "sum",
585 np.nansum: "sum",
586 np.mean: "mean",
587 np.nanmean: "mean",
588 np.prod: "prod",
589 np.nanprod: "prod",
590 np.std: "std",
591 np.nanstd: "std",
592 np.var: "var",
593 np.nanvar: "var",
594 np.median: "median",
595 np.nanmedian: "median",
596 np.max: "max",
597 np.nanmax: "max",
598 np.min: "min",
599 np.nanmin: "min",
600 np.cumprod: "cumprod",
601 np.nancumprod: "cumprod",
602 np.cumsum: "cumsum",
603 np.nancumsum: "cumsum",
604}
607def get_cython_func(arg: Callable) -> str | None:
608 """
609 if we define an internal function for this argument, return it
610 """
611 return _cython_table.get(arg)
614def is_builtin_func(arg):
615 """
616 if we define a builtin function for this argument, return it,
617 otherwise return the arg
618 """
619 return _builtin_table.get(arg, arg)
622def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
623 """
624 If a name is missing then replace it by level_n, where n is the count
626 .. versionadded:: 1.4.0
628 Parameters
629 ----------
630 names : list-like
631 list of column names or None values.
633 Returns
634 -------
635 list
636 list of column names with the None values replaced.
637 """
638 return [f"level_{i}" if name is None else name for i, name in enumerate(names)]
641def resolve_numeric_only(numeric_only: bool | None | lib.NoDefault) -> bool:
642 """Determine the Boolean value of numeric_only.
644 See GH#46560 for details on the deprecation.
646 Parameters
647 ----------
648 numeric_only : bool, None, or lib.no_default
649 Value passed to the method.
651 Returns
652 -------
653 Resolved value of numeric_only.
654 """
655 if numeric_only is lib.no_default:
656 # Methods that behave like numeric_only=True and only got the numeric_only
657 # arg in 1.5.0 default to lib.no_default
658 result = True
659 elif numeric_only is None:
660 # Methods that had the numeric_only arg prior to 1.5.0 and try all columns
661 # first default to None
662 result = False
663 else:
664 result = numeric_only
665 return result
668def deprecate_numeric_only_default(
669 cls: type, name: str, deprecate_none: bool = False
670) -> None:
671 """Emit FutureWarning message for deprecation of numeric_only.
673 See GH#46560 for details on the deprecation.
675 Parameters
676 ----------
677 cls : type
678 pandas type that is generating the warning.
679 name : str
680 Name of the method that is generating the warning.
681 deprecate_none : bool, default False
682 Whether to also warn about the deprecation of specifying ``numeric_only=None``.
683 """
684 if name in ["all", "any"]:
685 arg_name = "bool_only"
686 else:
687 arg_name = "numeric_only"
689 msg = (
690 f"The default value of {arg_name} in {cls.__name__}.{name} is "
691 "deprecated. In a future version, it will default to False. "
692 )
693 if deprecate_none:
694 msg += f"In addition, specifying '{arg_name}=None' is deprecated. "
695 msg += (
696 f"Select only valid columns or specify the value of {arg_name} to silence "
697 "this warning."
698 )
700 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())