Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/dtypes/missing.py: 15%
256 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2missing types & inference
3"""
4from __future__ import annotations
6from decimal import Decimal
7from functools import partial
8from typing import (
9 TYPE_CHECKING,
10 overload,
11)
13import numpy as np
15from pandas._config import get_option
17from pandas._libs import lib
18import pandas._libs.missing as libmissing
19from pandas._libs.tslibs import (
20 NaT,
21 Period,
22 iNaT,
23)
25from pandas.core.dtypes.common import (
26 DT64NS_DTYPE,
27 TD64NS_DTYPE,
28 ensure_object,
29 is_bool_dtype,
30 is_categorical_dtype,
31 is_complex_dtype,
32 is_datetimelike_v_numeric,
33 is_dtype_equal,
34 is_extension_array_dtype,
35 is_float_dtype,
36 is_integer_dtype,
37 is_object_dtype,
38 is_scalar,
39 is_string_or_object_np_dtype,
40 needs_i8_conversion,
41)
42from pandas.core.dtypes.dtypes import (
43 CategoricalDtype,
44 DatetimeTZDtype,
45 ExtensionDtype,
46 IntervalDtype,
47 PeriodDtype,
48)
49from pandas.core.dtypes.generic import (
50 ABCDataFrame,
51 ABCExtensionArray,
52 ABCIndex,
53 ABCMultiIndex,
54 ABCSeries,
55)
56from pandas.core.dtypes.inference import is_list_like
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from pandas._typing import (
60 ArrayLike,
61 DtypeObj,
62 NDFrame,
63 NDFrameT,
64 Scalar,
65 npt,
66 )
68 from pandas.core.indexes.base import Index
71isposinf_scalar = libmissing.isposinf_scalar
72isneginf_scalar = libmissing.isneginf_scalar
74nan_checker = np.isnan
75INF_AS_NA = False
76_dtype_object = np.dtype("object")
77_dtype_str = np.dtype(str)
80@overload
81def isna(obj: Scalar) -> bool:
82 ...
85@overload
86def isna(
87 obj: ArrayLike | Index | list,
88) -> npt.NDArray[np.bool_]:
89 ...
92@overload
93def isna(obj: NDFrameT) -> NDFrameT:
94 ...
97# handle unions
98@overload
99def isna(obj: NDFrameT | ArrayLike | Index | list) -> NDFrameT | npt.NDArray[np.bool_]:
100 ...
103@overload
104def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
105 ...
108def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
109 """
110 Detect missing values for an array-like object.
112 This function takes a scalar or array-like object and indicates
113 whether values are missing (``NaN`` in numeric arrays, ``None`` or ``NaN``
114 in object arrays, ``NaT`` in datetimelike).
116 Parameters
117 ----------
118 obj : scalar or array-like
119 Object to check for null or missing values.
121 Returns
122 -------
123 bool or array-like of bool
124 For scalar input, returns a scalar boolean.
125 For array input, returns an array of boolean indicating whether each
126 corresponding element is missing.
128 See Also
129 --------
130 notna : Boolean inverse of pandas.isna.
131 Series.isna : Detect missing values in a Series.
132 DataFrame.isna : Detect missing values in a DataFrame.
133 Index.isna : Detect missing values in an Index.
135 Examples
136 --------
137 Scalar arguments (including strings) result in a scalar boolean.
139 >>> pd.isna('dog')
140 False
142 >>> pd.isna(pd.NA)
143 True
145 >>> pd.isna(np.nan)
146 True
148 ndarrays result in an ndarray of booleans.
150 >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]])
151 >>> array
152 array([[ 1., nan, 3.],
153 [ 4., 5., nan]])
154 >>> pd.isna(array)
155 array([[False, True, False],
156 [False, False, True]])
158 For indexes, an ndarray of booleans is returned.
160 >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None,
161 ... "2017-07-08"])
162 >>> index
163 DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'],
164 dtype='datetime64[ns]', freq=None)
165 >>> pd.isna(index)
166 array([False, False, True, False])
168 For Series and DataFrame, the same type is returned, containing booleans.
170 >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']])
171 >>> df
172 0 1 2
173 0 ant bee cat
174 1 dog None fly
175 >>> pd.isna(df)
176 0 1 2
177 0 False False False
178 1 False True False
180 >>> pd.isna(df[1])
181 0 False
182 1 True
183 Name: 1, dtype: bool
184 """
185 return _isna(obj)
188isnull = isna
191def _isna(obj, inf_as_na: bool = False):
192 """
193 Detect missing values, treating None, NaN or NA as null. Infinite
194 values will also be treated as null if inf_as_na is True.
196 Parameters
197 ----------
198 obj: ndarray or object value
199 Input array or scalar value.
200 inf_as_na: bool
201 Whether to treat infinity as null.
203 Returns
204 -------
205 boolean ndarray or boolean
206 """
207 if is_scalar(obj):
208 return libmissing.checknull(obj, inf_as_na=inf_as_na)
209 elif isinstance(obj, ABCMultiIndex):
210 raise NotImplementedError("isna is not defined for MultiIndex")
211 elif isinstance(obj, type):
212 return False
213 elif isinstance(obj, (np.ndarray, ABCExtensionArray)):
214 return _isna_array(obj, inf_as_na=inf_as_na)
215 elif isinstance(obj, ABCIndex):
216 # Try to use cached isna, which also short-circuits for integer dtypes
217 # and avoids materializing RangeIndex._values
218 if not obj._can_hold_na:
219 return obj.isna()
220 return _isna_array(obj._values, inf_as_na=inf_as_na)
222 elif isinstance(obj, ABCSeries):
223 result = _isna_array(obj._values, inf_as_na=inf_as_na)
224 # box
225 result = obj._constructor(result, index=obj.index, name=obj.name, copy=False)
226 return result
227 elif isinstance(obj, ABCDataFrame):
228 return obj.isna()
229 elif isinstance(obj, list):
230 return _isna_array(np.asarray(obj, dtype=object), inf_as_na=inf_as_na)
231 elif hasattr(obj, "__array__"):
232 return _isna_array(np.asarray(obj), inf_as_na=inf_as_na)
233 else:
234 return False
237def _use_inf_as_na(key):
238 """
239 Option change callback for na/inf behaviour.
241 Choose which replacement for numpy.isnan / -numpy.isfinite is used.
243 Parameters
244 ----------
245 flag: bool
246 True means treat None, NaN, INF, -INF as null (old way),
247 False means None and NaN are null, but INF, -INF are not null
248 (new way).
250 Notes
251 -----
252 This approach to setting global module values is discussed and
253 approved here:
255 * https://stackoverflow.com/questions/4859217/
256 programmatically-creating-variables-in-python/4859312#4859312
257 """
258 inf_as_na = get_option(key)
259 globals()["_isna"] = partial(_isna, inf_as_na=inf_as_na)
260 if inf_as_na:
261 globals()["nan_checker"] = lambda x: ~np.isfinite(x)
262 globals()["INF_AS_NA"] = True
263 else:
264 globals()["nan_checker"] = np.isnan
265 globals()["INF_AS_NA"] = False
268def _isna_array(values: ArrayLike, inf_as_na: bool = False):
269 """
270 Return an array indicating which values of the input array are NaN / NA.
272 Parameters
273 ----------
274 obj: ndarray or ExtensionArray
275 The input array whose elements are to be checked.
276 inf_as_na: bool
277 Whether or not to treat infinite values as NA.
279 Returns
280 -------
281 array-like
282 Array of boolean values denoting the NA status of each element.
283 """
284 dtype = values.dtype
286 if not isinstance(values, np.ndarray):
287 # i.e. ExtensionArray
288 if inf_as_na and is_categorical_dtype(dtype):
289 result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
290 else:
291 # error: Incompatible types in assignment (expression has type
292 # "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
293 # type "ndarray[Any, dtype[bool_]]")
294 result = values.isna() # type: ignore[assignment]
295 elif is_string_or_object_np_dtype(values.dtype):
296 result = _isna_string_dtype(values, inf_as_na=inf_as_na)
297 elif needs_i8_conversion(dtype):
298 # this is the NaT pattern
299 result = values.view("i8") == iNaT
300 else:
301 if inf_as_na:
302 result = ~np.isfinite(values)
303 else:
304 result = np.isnan(values)
306 return result
309def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bool_]:
310 # Working around NumPy ticket 1542
311 dtype = values.dtype
313 if dtype.kind in ("S", "U"):
314 result = np.zeros(values.shape, dtype=bool)
315 else:
317 if values.ndim == 1:
318 result = libmissing.isnaobj(values, inf_as_na=inf_as_na)
319 elif values.ndim == 2:
320 result = libmissing.isnaobj2d(values, inf_as_na=inf_as_na)
321 else:
322 # 0-D, reached via e.g. mask_missing
323 result = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na)
324 result = result.reshape(values.shape)
326 return result
329@overload
330def notna(obj: Scalar) -> bool:
331 ...
334@overload
335def notna(
336 obj: ArrayLike | Index | list,
337) -> npt.NDArray[np.bool_]:
338 ...
341@overload
342def notna(obj: NDFrameT) -> NDFrameT:
343 ...
346# handle unions
347@overload
348def notna(obj: NDFrameT | ArrayLike | Index | list) -> NDFrameT | npt.NDArray[np.bool_]:
349 ...
352@overload
353def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
354 ...
357def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
358 """
359 Detect non-missing values for an array-like object.
361 This function takes a scalar or array-like object and indicates
362 whether values are valid (not missing, which is ``NaN`` in numeric
363 arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike).
365 Parameters
366 ----------
367 obj : array-like or object value
368 Object to check for *not* null or *non*-missing values.
370 Returns
371 -------
372 bool or array-like of bool
373 For scalar input, returns a scalar boolean.
374 For array input, returns an array of boolean indicating whether each
375 corresponding element is valid.
377 See Also
378 --------
379 isna : Boolean inverse of pandas.notna.
380 Series.notna : Detect valid values in a Series.
381 DataFrame.notna : Detect valid values in a DataFrame.
382 Index.notna : Detect valid values in an Index.
384 Examples
385 --------
386 Scalar arguments (including strings) result in a scalar boolean.
388 >>> pd.notna('dog')
389 True
391 >>> pd.notna(pd.NA)
392 False
394 >>> pd.notna(np.nan)
395 False
397 ndarrays result in an ndarray of booleans.
399 >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]])
400 >>> array
401 array([[ 1., nan, 3.],
402 [ 4., 5., nan]])
403 >>> pd.notna(array)
404 array([[ True, False, True],
405 [ True, True, False]])
407 For indexes, an ndarray of booleans is returned.
409 >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None,
410 ... "2017-07-08"])
411 >>> index
412 DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'],
413 dtype='datetime64[ns]', freq=None)
414 >>> pd.notna(index)
415 array([ True, True, False, True])
417 For Series and DataFrame, the same type is returned, containing booleans.
419 >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']])
420 >>> df
421 0 1 2
422 0 ant bee cat
423 1 dog None fly
424 >>> pd.notna(df)
425 0 1 2
426 0 True True True
427 1 True False True
429 >>> pd.notna(df[1])
430 0 True
431 1 False
432 Name: 1, dtype: bool
433 """
434 res = isna(obj)
435 if isinstance(res, bool):
436 return not res
437 return ~res
440notnull = notna
443def isna_compat(arr, fill_value=np.nan) -> bool:
444 """
445 Parameters
446 ----------
447 arr: a numpy array
448 fill_value: fill value, default to np.nan
450 Returns
451 -------
452 True if we can fill using this fill_value
453 """
454 if isna(fill_value):
455 dtype = arr.dtype
456 return not (is_bool_dtype(dtype) or is_integer_dtype(dtype))
457 return True
460def array_equivalent(
461 left,
462 right,
463 strict_nan: bool = False,
464 dtype_equal: bool = False,
465) -> bool:
466 """
467 True if two arrays, left and right, have equal non-NaN elements, and NaNs
468 in corresponding locations. False otherwise. It is assumed that left and
469 right are NumPy arrays of the same dtype. The behavior of this function
470 (particularly with respect to NaNs) is not defined if the dtypes are
471 different.
473 Parameters
474 ----------
475 left, right : ndarrays
476 strict_nan : bool, default False
477 If True, consider NaN and None to be different.
478 dtype_equal : bool, default False
479 Whether `left` and `right` are known to have the same dtype
480 according to `is_dtype_equal`. Some methods like `BlockManager.equals`.
481 require that the dtypes match. Setting this to ``True`` can improve
482 performance, but will give different results for arrays that are
483 equal but different dtypes.
485 Returns
486 -------
487 b : bool
488 Returns True if the arrays are equivalent.
490 Examples
491 --------
492 >>> array_equivalent(
493 ... np.array([1, 2, np.nan]),
494 ... np.array([1, 2, np.nan]))
495 True
496 >>> array_equivalent(
497 ... np.array([1, np.nan, 2]),
498 ... np.array([1, 2, np.nan]))
499 False
500 """
501 left, right = np.asarray(left), np.asarray(right)
503 # shape compat
504 if left.shape != right.shape:
505 return False
507 if dtype_equal:
508 # fastpath when we require that the dtypes match (Block.equals)
509 if left.dtype.kind in ["f", "c"]:
510 return _array_equivalent_float(left, right)
511 elif is_datetimelike_v_numeric(left.dtype, right.dtype):
512 return False
513 elif needs_i8_conversion(left.dtype):
514 return _array_equivalent_datetimelike(left, right)
515 elif is_string_or_object_np_dtype(left.dtype):
516 # TODO: fastpath for pandas' StringDtype
517 return _array_equivalent_object(left, right, strict_nan)
518 else:
519 return np.array_equal(left, right)
521 # Slow path when we allow comparing different dtypes.
522 # Object arrays can contain None, NaN and NaT.
523 # string dtypes must be come to this path for NumPy 1.7.1 compat
524 if left.dtype.kind in "OSU" or right.dtype.kind in "OSU":
525 # Note: `in "OSU"` is non-trivially faster than `in ["O", "S", "U"]`
526 # or `in ("O", "S", "U")`
527 return _array_equivalent_object(left, right, strict_nan)
529 # NaNs can occur in float and complex arrays.
530 if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype):
531 if not (left.size and right.size):
532 return True
533 return ((left == right) | (isna(left) & isna(right))).all()
535 elif is_datetimelike_v_numeric(left, right):
536 # GH#29553 avoid numpy deprecation warning
537 return False
539 elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype):
540 # datetime64, timedelta64, Period
541 if not is_dtype_equal(left.dtype, right.dtype):
542 return False
544 left = left.view("i8")
545 right = right.view("i8")
547 # if we have structured dtypes, compare first
548 if (
549 left.dtype.type is np.void or right.dtype.type is np.void
550 ) and left.dtype != right.dtype:
551 return False
553 return np.array_equal(left, right)
556def _array_equivalent_float(left, right) -> bool:
557 return bool(((left == right) | (np.isnan(left) & np.isnan(right))).all())
560def _array_equivalent_datetimelike(left, right):
561 return np.array_equal(left.view("i8"), right.view("i8"))
564def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bool):
565 if not strict_nan:
566 # isna considers NaN and None to be equivalent.
568 if left.flags["F_CONTIGUOUS"] and right.flags["F_CONTIGUOUS"]:
569 # we can improve performance by doing a copy-free ravel
570 # e.g. in frame_methods.Equals.time_frame_nonunique_equal
571 # if we transposed the frames
572 left = left.ravel("K")
573 right = right.ravel("K")
575 return lib.array_equivalent_object(
576 ensure_object(left.ravel()), ensure_object(right.ravel())
577 )
579 for left_value, right_value in zip(left, right):
580 if left_value is NaT and right_value is not NaT:
581 return False
583 elif left_value is libmissing.NA and right_value is not libmissing.NA:
584 return False
586 elif isinstance(left_value, float) and np.isnan(left_value):
587 if not isinstance(right_value, float) or not np.isnan(right_value):
588 return False
589 else:
590 try:
591 if np.any(np.asarray(left_value != right_value)):
592 return False
593 except TypeError as err:
594 if "boolean value of NA is ambiguous" in str(err):
595 return False
596 raise
597 return True
600def array_equals(left: ArrayLike, right: ArrayLike) -> bool:
601 """
602 ExtensionArray-compatible implementation of array_equivalent.
603 """
604 if not is_dtype_equal(left.dtype, right.dtype):
605 return False
606 elif isinstance(left, ABCExtensionArray):
607 return left.equals(right)
608 else:
609 return array_equivalent(left, right, dtype_equal=True)
612def infer_fill_value(val):
613 """
614 infer the fill value for the nan/NaT from the provided
615 scalar/ndarray/list-like if we are a NaT, return the correct dtyped
616 element to provide proper block construction
617 """
618 if not is_list_like(val):
619 val = [val]
620 val = np.array(val, copy=False)
621 if needs_i8_conversion(val.dtype):
622 return np.array("NaT", dtype=val.dtype)
623 elif is_object_dtype(val.dtype):
624 dtype = lib.infer_dtype(ensure_object(val), skipna=False)
625 if dtype in ["datetime", "datetime64"]:
626 return np.array("NaT", dtype=DT64NS_DTYPE)
627 elif dtype in ["timedelta", "timedelta64"]:
628 return np.array("NaT", dtype=TD64NS_DTYPE)
629 return np.nan
632def maybe_fill(arr: np.ndarray) -> np.ndarray:
633 """
634 Fill numpy.ndarray with NaN, unless we have a integer or boolean dtype.
635 """
636 if arr.dtype.kind not in ("u", "i", "b"):
637 arr.fill(np.nan)
638 return arr
641def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
642 """
643 Return a dtype compat na value
645 Parameters
646 ----------
647 dtype : string / dtype
648 compat : bool, default True
650 Returns
651 -------
652 np.dtype or a pandas dtype
654 Examples
655 --------
656 >>> na_value_for_dtype(np.dtype('int64'))
657 0
658 >>> na_value_for_dtype(np.dtype('int64'), compat=False)
659 nan
660 >>> na_value_for_dtype(np.dtype('float64'))
661 nan
662 >>> na_value_for_dtype(np.dtype('bool'))
663 False
664 >>> na_value_for_dtype(np.dtype('datetime64[ns]'))
665 numpy.datetime64('NaT')
666 """
668 if isinstance(dtype, ExtensionDtype):
669 return dtype.na_value
670 elif needs_i8_conversion(dtype):
671 return dtype.type("NaT", "ns")
672 elif is_float_dtype(dtype):
673 return np.nan
674 elif is_integer_dtype(dtype):
675 if compat:
676 return 0
677 return np.nan
678 elif is_bool_dtype(dtype):
679 if compat:
680 return False
681 return np.nan
682 return np.nan
685def remove_na_arraylike(arr):
686 """
687 Return array-like containing only true/non-NaN values, possibly empty.
688 """
689 if is_extension_array_dtype(arr):
690 return arr[notna(arr)]
691 else:
692 return arr[notna(np.asarray(arr))]
695def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool:
696 """
697 isna check that excludes incompatible dtypes
699 Parameters
700 ----------
701 obj : object
702 dtype : np.datetime64, np.timedelta64, DatetimeTZDtype, or PeriodDtype
704 Returns
705 -------
706 bool
707 """
708 if not lib.is_scalar(obj) or not isna(obj):
709 return False
710 elif dtype.kind == "M":
711 if isinstance(dtype, np.dtype):
712 # i.e. not tzaware
713 return not isinstance(obj, (np.timedelta64, Decimal))
714 # we have to rule out tznaive dt64("NaT")
715 return not isinstance(obj, (np.timedelta64, np.datetime64, Decimal))
716 elif dtype.kind == "m":
717 return not isinstance(obj, (np.datetime64, Decimal))
718 elif dtype.kind in ["i", "u", "f", "c"]:
719 # Numeric
720 return obj is not NaT and not isinstance(obj, (np.datetime64, np.timedelta64))
721 elif dtype.kind == "b":
722 # We allow pd.NA, None, np.nan in BooleanArray (same as IntervalDtype)
723 return lib.is_float(obj) or obj is None or obj is libmissing.NA
725 elif dtype == _dtype_str:
726 # numpy string dtypes to avoid float np.nan
727 return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal, float))
729 elif dtype == _dtype_object:
730 # This is needed for Categorical, but is kind of weird
731 return True
733 elif isinstance(dtype, PeriodDtype):
734 return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal))
736 elif isinstance(dtype, IntervalDtype):
737 return lib.is_float(obj) or obj is None or obj is libmissing.NA
739 elif isinstance(dtype, CategoricalDtype):
740 return is_valid_na_for_dtype(obj, dtype.categories.dtype)
742 # fallback, default to allowing NaN, None, NA, NaT
743 return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal))
746def isna_all(arr: ArrayLike) -> bool:
747 """
748 Optimized equivalent to isna(arr).all()
749 """
750 total_len = len(arr)
752 # Usually it's enough to check but a small fraction of values to see if
753 # a block is NOT null, chunks should help in such cases.
754 # parameters 1000 and 40 were chosen arbitrarily
755 chunk_len = max(total_len // 40, 1000)
757 dtype = arr.dtype
758 if dtype.kind == "f" and isinstance(dtype, np.dtype):
759 checker = nan_checker
761 elif (
762 (isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"])
763 or isinstance(dtype, DatetimeTZDtype)
764 or dtype.type is Period
765 ):
766 # error: Incompatible types in assignment (expression has type
767 # "Callable[[Any], Any]", variable has type "ufunc")
768 checker = lambda x: np.asarray(x.view("i8")) == iNaT # type: ignore[assignment]
770 else:
771 # error: Incompatible types in assignment (expression has type "Callable[[Any],
772 # Any]", variable has type "ufunc")
773 checker = lambda x: _isna_array( # type: ignore[assignment]
774 x, inf_as_na=INF_AS_NA
775 )
777 return all(
778 checker(arr[i : i + chunk_len]).all() for i in range(0, total_len, chunk_len)
779 )