Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/_testing/asserters.py: 7%
412 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from typing import (
4 Literal,
5 cast,
6)
7import warnings
9import numpy as np
11from pandas._libs.lib import (
12 NoDefault,
13 no_default,
14)
15from pandas._libs.missing import is_matching_na
16from pandas._libs.sparse import SparseIndex
17import pandas._libs.testing as _testing
18from pandas.util._exceptions import find_stack_level
20from pandas.core.dtypes.common import (
21 is_bool,
22 is_categorical_dtype,
23 is_extension_array_dtype,
24 is_interval_dtype,
25 is_number,
26 is_numeric_dtype,
27 needs_i8_conversion,
28)
29from pandas.core.dtypes.dtypes import (
30 CategoricalDtype,
31 PandasDtype,
32)
33from pandas.core.dtypes.missing import array_equivalent
35import pandas as pd
36from pandas import (
37 Categorical,
38 DataFrame,
39 DatetimeIndex,
40 Index,
41 IntervalIndex,
42 MultiIndex,
43 PeriodIndex,
44 RangeIndex,
45 Series,
46 TimedeltaIndex,
47)
48from pandas.core.algorithms import take_nd
49from pandas.core.arrays import (
50 DatetimeArray,
51 ExtensionArray,
52 IntervalArray,
53 PeriodArray,
54 TimedeltaArray,
55)
56from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
57from pandas.core.arrays.string_ import StringDtype
58from pandas.core.indexes.api import safe_sort_index
60from pandas.io.formats.printing import pprint_thing
63def assert_almost_equal(
64 left,
65 right,
66 check_dtype: bool | Literal["equiv"] = "equiv",
67 check_less_precise: bool | int | NoDefault = no_default,
68 rtol: float = 1.0e-5,
69 atol: float = 1.0e-8,
70 **kwargs,
71) -> None:
72 """
73 Check that the left and right objects are approximately equal.
75 By approximately equal, we refer to objects that are numbers or that
76 contain numbers which may be equivalent to specific levels of precision.
78 Parameters
79 ----------
80 left : object
81 right : object
82 check_dtype : bool or {'equiv'}, default 'equiv'
83 Check dtype if both a and b are the same type. If 'equiv' is passed in,
84 then `RangeIndex` and `Int64Index` are also considered equivalent
85 when doing type checking.
86 check_less_precise : bool or int, default False
87 Specify comparison precision. 5 digits (False) or 3 digits (True)
88 after decimal points are compared. If int, then specify the number
89 of digits to compare.
91 When comparing two numbers, if the first number has magnitude less
92 than 1e-5, we compare the two numbers directly and check whether
93 they are equivalent within the specified precision. Otherwise, we
94 compare the **ratio** of the second number to the first number and
95 check whether it is equivalent to 1 within the specified precision.
97 .. deprecated:: 1.1.0
98 Use `rtol` and `atol` instead to define relative/absolute
99 tolerance, respectively. Similar to :func:`math.isclose`.
100 rtol : float, default 1e-5
101 Relative tolerance.
103 .. versionadded:: 1.1.0
104 atol : float, default 1e-8
105 Absolute tolerance.
107 .. versionadded:: 1.1.0
108 """
109 if check_less_precise is not no_default:
110 warnings.warn(
111 "The 'check_less_precise' keyword in testing.assert_*_equal "
112 "is deprecated and will be removed in a future version. "
113 "You can stop passing 'check_less_precise' to silence this warning.",
114 FutureWarning,
115 stacklevel=find_stack_level(),
116 )
117 rtol = atol = _get_tol_from_less_precise(check_less_precise)
119 if isinstance(left, Index):
120 assert_index_equal(
121 left,
122 right,
123 check_exact=False,
124 exact=check_dtype,
125 rtol=rtol,
126 atol=atol,
127 **kwargs,
128 )
130 elif isinstance(left, Series):
131 assert_series_equal(
132 left,
133 right,
134 check_exact=False,
135 check_dtype=check_dtype,
136 rtol=rtol,
137 atol=atol,
138 **kwargs,
139 )
141 elif isinstance(left, DataFrame):
142 assert_frame_equal(
143 left,
144 right,
145 check_exact=False,
146 check_dtype=check_dtype,
147 rtol=rtol,
148 atol=atol,
149 **kwargs,
150 )
152 else:
153 # Other sequences.
154 if check_dtype:
155 if is_number(left) and is_number(right):
156 # Do not compare numeric classes, like np.float64 and float.
157 pass
158 elif is_bool(left) and is_bool(right):
159 # Do not compare bool classes, like np.bool_ and bool.
160 pass
161 else:
162 if isinstance(left, np.ndarray) or isinstance(right, np.ndarray):
163 obj = "numpy array"
164 else:
165 obj = "Input"
166 assert_class_equal(left, right, obj=obj)
168 # if we have "equiv", this becomes True
169 _testing.assert_almost_equal(
170 left, right, check_dtype=bool(check_dtype), rtol=rtol, atol=atol, **kwargs
171 )
174def _get_tol_from_less_precise(check_less_precise: bool | int) -> float:
175 """
176 Return the tolerance equivalent to the deprecated `check_less_precise`
177 parameter.
179 Parameters
180 ----------
181 check_less_precise : bool or int
183 Returns
184 -------
185 float
186 Tolerance to be used as relative/absolute tolerance.
188 Examples
189 --------
190 >>> # Using check_less_precise as a bool:
191 >>> _get_tol_from_less_precise(False)
192 5e-06
193 >>> _get_tol_from_less_precise(True)
194 0.0005
195 >>> # Using check_less_precise as an int representing the decimal
196 >>> # tolerance intended:
197 >>> _get_tol_from_less_precise(2)
198 0.005
199 >>> _get_tol_from_less_precise(8)
200 5e-09
201 """
202 if isinstance(check_less_precise, bool):
203 if check_less_precise:
204 # 3-digit tolerance
205 return 0.5e-3
206 else:
207 # 5-digit tolerance
208 return 0.5e-5
209 else:
210 # Equivalent to setting checking_less_precise=<decimals>
211 return 0.5 * 10**-check_less_precise
214def _check_isinstance(left, right, cls):
215 """
216 Helper method for our assert_* methods that ensures that
217 the two objects being compared have the right type before
218 proceeding with the comparison.
220 Parameters
221 ----------
222 left : The first object being compared.
223 right : The second object being compared.
224 cls : The class type to check against.
226 Raises
227 ------
228 AssertionError : Either `left` or `right` is not an instance of `cls`.
229 """
230 cls_name = cls.__name__
232 if not isinstance(left, cls):
233 raise AssertionError(
234 f"{cls_name} Expected type {cls}, found {type(left)} instead"
235 )
236 if not isinstance(right, cls):
237 raise AssertionError(
238 f"{cls_name} Expected type {cls}, found {type(right)} instead"
239 )
242def assert_dict_equal(left, right, compare_keys: bool = True) -> None:
244 _check_isinstance(left, right, dict)
245 _testing.assert_dict_equal(left, right, compare_keys=compare_keys)
248def assert_index_equal(
249 left: Index,
250 right: Index,
251 exact: bool | str = "equiv",
252 check_names: bool = True,
253 check_less_precise: bool | int | NoDefault = no_default,
254 check_exact: bool = True,
255 check_categorical: bool = True,
256 check_order: bool = True,
257 rtol: float = 1.0e-5,
258 atol: float = 1.0e-8,
259 obj: str = "Index",
260) -> None:
261 """
262 Check that left and right Index are equal.
264 Parameters
265 ----------
266 left : Index
267 right : Index
268 exact : bool or {'equiv'}, default 'equiv'
269 Whether to check the Index class, dtype and inferred_type
270 are identical. If 'equiv', then RangeIndex can be substituted for
271 Int64Index as well.
272 check_names : bool, default True
273 Whether to check the names attribute.
274 check_less_precise : bool or int, default False
275 Specify comparison precision. Only used when check_exact is False.
276 5 digits (False) or 3 digits (True) after decimal points are compared.
277 If int, then specify the digits to compare.
279 .. deprecated:: 1.1.0
280 Use `rtol` and `atol` instead to define relative/absolute
281 tolerance, respectively. Similar to :func:`math.isclose`.
282 check_exact : bool, default True
283 Whether to compare number exactly.
284 check_categorical : bool, default True
285 Whether to compare internal Categorical exactly.
286 check_order : bool, default True
287 Whether to compare the order of index entries as well as their values.
288 If True, both indexes must contain the same elements, in the same order.
289 If False, both indexes must contain the same elements, but in any order.
291 .. versionadded:: 1.2.0
292 rtol : float, default 1e-5
293 Relative tolerance. Only used when check_exact is False.
295 .. versionadded:: 1.1.0
296 atol : float, default 1e-8
297 Absolute tolerance. Only used when check_exact is False.
299 .. versionadded:: 1.1.0
300 obj : str, default 'Index'
301 Specify object name being compared, internally used to show appropriate
302 assertion message.
304 Examples
305 --------
306 >>> from pandas import testing as tm
307 >>> a = pd.Index([1, 2, 3])
308 >>> b = pd.Index([1, 2, 3])
309 >>> tm.assert_index_equal(a, b)
310 """
311 __tracebackhide__ = True
313 def _check_types(left, right, obj="Index") -> None:
314 if not exact:
315 return
317 assert_class_equal(left, right, exact=exact, obj=obj)
318 assert_attr_equal("inferred_type", left, right, obj=obj)
320 # Skip exact dtype checking when `check_categorical` is False
321 if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype):
322 if check_categorical:
323 assert_attr_equal("dtype", left, right, obj=obj)
324 assert_index_equal(left.categories, right.categories, exact=exact)
325 return
327 assert_attr_equal("dtype", left, right, obj=obj)
329 def _get_ilevel_values(index, level):
330 # accept level number only
331 unique = index.levels[level]
332 level_codes = index.codes[level]
333 filled = take_nd(unique._values, level_codes, fill_value=unique._na_value)
334 return unique._shallow_copy(filled, name=index.names[level])
336 if check_less_precise is not no_default:
337 warnings.warn(
338 "The 'check_less_precise' keyword in testing.assert_*_equal "
339 "is deprecated and will be removed in a future version. "
340 "You can stop passing 'check_less_precise' to silence this warning.",
341 FutureWarning,
342 stacklevel=find_stack_level(),
343 )
344 rtol = atol = _get_tol_from_less_precise(check_less_precise)
346 # instance validation
347 _check_isinstance(left, right, Index)
349 # class / dtype comparison
350 _check_types(left, right, obj=obj)
352 # level comparison
353 if left.nlevels != right.nlevels:
354 msg1 = f"{obj} levels are different"
355 msg2 = f"{left.nlevels}, {left}"
356 msg3 = f"{right.nlevels}, {right}"
357 raise_assert_detail(obj, msg1, msg2, msg3)
359 # length comparison
360 if len(left) != len(right):
361 msg1 = f"{obj} length are different"
362 msg2 = f"{len(left)}, {left}"
363 msg3 = f"{len(right)}, {right}"
364 raise_assert_detail(obj, msg1, msg2, msg3)
366 # If order doesn't matter then sort the index entries
367 if not check_order:
368 left = safe_sort_index(left)
369 right = safe_sort_index(right)
371 # MultiIndex special comparison for little-friendly error messages
372 if left.nlevels > 1:
373 left = cast(MultiIndex, left)
374 right = cast(MultiIndex, right)
376 for level in range(left.nlevels):
377 # cannot use get_level_values here because it can change dtype
378 llevel = _get_ilevel_values(left, level)
379 rlevel = _get_ilevel_values(right, level)
381 lobj = f"MultiIndex level [{level}]"
382 assert_index_equal(
383 llevel,
384 rlevel,
385 exact=exact,
386 check_names=check_names,
387 check_exact=check_exact,
388 rtol=rtol,
389 atol=atol,
390 obj=lobj,
391 )
392 # get_level_values may change dtype
393 _check_types(left.levels[level], right.levels[level], obj=obj)
395 # skip exact index checking when `check_categorical` is False
396 if check_exact and check_categorical:
397 if not left.equals(right):
398 mismatch = left._values != right._values
400 if is_extension_array_dtype(mismatch):
401 mismatch = cast("ExtensionArray", mismatch).fillna(True)
403 diff = np.sum(mismatch.astype(int)) * 100.0 / len(left)
404 msg = f"{obj} values are different ({np.round(diff, 5)} %)"
405 raise_assert_detail(obj, msg, left, right)
406 else:
408 # if we have "equiv", this becomes True
409 exact_bool = bool(exact)
410 _testing.assert_almost_equal(
411 left.values,
412 right.values,
413 rtol=rtol,
414 atol=atol,
415 check_dtype=exact_bool,
416 obj=obj,
417 lobj=left,
418 robj=right,
419 )
421 # metadata comparison
422 if check_names:
423 assert_attr_equal("names", left, right, obj=obj)
424 if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex):
425 assert_attr_equal("freq", left, right, obj=obj)
426 if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex):
427 assert_interval_array_equal(left._values, right._values)
429 if check_categorical:
430 if is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype):
431 assert_categorical_equal(left._values, right._values, obj=f"{obj} category")
434def assert_class_equal(left, right, exact: bool | str = True, obj="Input") -> None:
435 """
436 Checks classes are equal.
437 """
438 from pandas.core.indexes.numeric import NumericIndex
440 __tracebackhide__ = True
442 def repr_class(x):
443 if isinstance(x, Index):
444 # return Index as it is to include values in the error message
445 return x
447 return type(x).__name__
449 if type(left) == type(right):
450 return
452 if exact == "equiv":
453 # accept equivalence of NumericIndex (sub-)classes
454 if isinstance(left, NumericIndex) and isinstance(right, NumericIndex):
455 return
457 msg = f"{obj} classes are different"
458 raise_assert_detail(obj, msg, repr_class(left), repr_class(right))
461def assert_attr_equal(attr: str, left, right, obj: str = "Attributes") -> None:
462 """
463 Check attributes are equal. Both objects must have attribute.
465 Parameters
466 ----------
467 attr : str
468 Attribute name being compared.
469 left : object
470 right : object
471 obj : str, default 'Attributes'
472 Specify object name being compared, internally used to show appropriate
473 assertion message
474 """
475 __tracebackhide__ = True
477 left_attr = getattr(left, attr)
478 right_attr = getattr(right, attr)
480 if left_attr is right_attr or is_matching_na(left_attr, right_attr):
481 # e.g. both np.nan, both NaT, both pd.NA, ...
482 return None
484 try:
485 result = left_attr == right_attr
486 except TypeError:
487 # datetimetz on rhs may raise TypeError
488 result = False
489 if (left_attr is pd.NA) ^ (right_attr is pd.NA):
490 result = False
491 elif not isinstance(result, bool):
492 result = result.all()
494 if not result:
495 msg = f'Attribute "{attr}" are different'
496 raise_assert_detail(obj, msg, left_attr, right_attr)
497 return None
500def assert_is_valid_plot_return_object(objs) -> None:
501 import matplotlib.pyplot as plt
503 if isinstance(objs, (Series, np.ndarray)):
504 for el in objs.ravel():
505 msg = (
506 "one of 'objs' is not a matplotlib Axes instance, "
507 f"type encountered {repr(type(el).__name__)}"
508 )
509 assert isinstance(el, (plt.Axes, dict)), msg
510 else:
511 msg = (
512 "objs is neither an ndarray of Artist instances nor a single "
513 "ArtistArtist instance, tuple, or dict, 'objs' is a "
514 f"{repr(type(objs).__name__)}"
515 )
516 assert isinstance(objs, (plt.Artist, tuple, dict)), msg
519def assert_is_sorted(seq) -> None:
520 """Assert that the sequence is sorted."""
521 if isinstance(seq, (Index, Series)):
522 seq = seq.values
523 # sorting does not change precisions
524 assert_numpy_array_equal(seq, np.sort(np.array(seq)))
527def assert_categorical_equal(
528 left, right, check_dtype=True, check_category_order=True, obj="Categorical"
529) -> None:
530 """
531 Test that Categoricals are equivalent.
533 Parameters
534 ----------
535 left : Categorical
536 right : Categorical
537 check_dtype : bool, default True
538 Check that integer dtype of the codes are the same.
539 check_category_order : bool, default True
540 Whether the order of the categories should be compared, which
541 implies identical integer codes. If False, only the resulting
542 values are compared. The ordered attribute is
543 checked regardless.
544 obj : str, default 'Categorical'
545 Specify object name being compared, internally used to show appropriate
546 assertion message.
547 """
548 _check_isinstance(left, right, Categorical)
550 exact: bool | str
551 if isinstance(left.categories, RangeIndex) or isinstance(
552 right.categories, RangeIndex
553 ):
554 exact = "equiv"
555 else:
556 # We still want to require exact matches for NumericIndex
557 exact = True
559 if check_category_order:
560 assert_index_equal(
561 left.categories, right.categories, obj=f"{obj}.categories", exact=exact
562 )
563 assert_numpy_array_equal(
564 left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes"
565 )
566 else:
567 try:
568 lc = left.categories.sort_values()
569 rc = right.categories.sort_values()
570 except TypeError:
571 # e.g. '<' not supported between instances of 'int' and 'str'
572 lc, rc = left.categories, right.categories
573 assert_index_equal(lc, rc, obj=f"{obj}.categories", exact=exact)
574 assert_index_equal(
575 left.categories.take(left.codes),
576 right.categories.take(right.codes),
577 obj=f"{obj}.values",
578 exact=exact,
579 )
581 assert_attr_equal("ordered", left, right, obj=obj)
584def assert_interval_array_equal(
585 left, right, exact="equiv", obj="IntervalArray"
586) -> None:
587 """
588 Test that two IntervalArrays are equivalent.
590 Parameters
591 ----------
592 left, right : IntervalArray
593 The IntervalArrays to compare.
594 exact : bool or {'equiv'}, default 'equiv'
595 Whether to check the Index class, dtype and inferred_type
596 are identical. If 'equiv', then RangeIndex can be substituted for
597 Int64Index as well.
598 obj : str, default 'IntervalArray'
599 Specify object name being compared, internally used to show appropriate
600 assertion message
601 """
602 _check_isinstance(left, right, IntervalArray)
604 kwargs = {}
605 if left._left.dtype.kind in ["m", "M"]:
606 # We have a DatetimeArray or TimedeltaArray
607 kwargs["check_freq"] = False
609 assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
610 assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
612 assert_attr_equal("closed", left, right, obj=obj)
615def assert_period_array_equal(left, right, obj="PeriodArray") -> None:
616 _check_isinstance(left, right, PeriodArray)
618 assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
619 assert_attr_equal("freq", left, right, obj=obj)
622def assert_datetime_array_equal(
623 left, right, obj="DatetimeArray", check_freq=True
624) -> None:
625 __tracebackhide__ = True
626 _check_isinstance(left, right, DatetimeArray)
628 assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
629 if check_freq:
630 assert_attr_equal("freq", left, right, obj=obj)
631 assert_attr_equal("tz", left, right, obj=obj)
634def assert_timedelta_array_equal(
635 left, right, obj="TimedeltaArray", check_freq=True
636) -> None:
637 __tracebackhide__ = True
638 _check_isinstance(left, right, TimedeltaArray)
639 assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
640 if check_freq:
641 assert_attr_equal("freq", left, right, obj=obj)
644def raise_assert_detail(obj, message, left, right, diff=None, index_values=None):
645 __tracebackhide__ = True
647 msg = f"""{obj} are different
649{message}"""
651 if isinstance(index_values, np.ndarray):
652 msg += f"\n[index]: {pprint_thing(index_values)}"
654 if isinstance(left, np.ndarray):
655 left = pprint_thing(left)
656 elif (
657 isinstance(left, CategoricalDtype)
658 or isinstance(left, PandasDtype)
659 or isinstance(left, StringDtype)
660 ):
661 left = repr(left)
663 if isinstance(right, np.ndarray):
664 right = pprint_thing(right)
665 elif (
666 isinstance(right, CategoricalDtype)
667 or isinstance(right, PandasDtype)
668 or isinstance(right, StringDtype)
669 ):
670 right = repr(right)
672 msg += f"""
673[left]: {left}
674[right]: {right}"""
676 if diff is not None:
677 msg += f"\n[diff]: {diff}"
679 raise AssertionError(msg)
682def assert_numpy_array_equal(
683 left,
684 right,
685 strict_nan=False,
686 check_dtype: bool | Literal["equiv"] = True,
687 err_msg=None,
688 check_same=None,
689 obj="numpy array",
690 index_values=None,
691) -> None:
692 """
693 Check that 'np.ndarray' is equivalent.
695 Parameters
696 ----------
697 left, right : numpy.ndarray or iterable
698 The two arrays to be compared.
699 strict_nan : bool, default False
700 If True, consider NaN and None to be different.
701 check_dtype : bool, default True
702 Check dtype if both a and b are np.ndarray.
703 err_msg : str, default None
704 If provided, used as assertion message.
705 check_same : None|'copy'|'same', default None
706 Ensure left and right refer/do not refer to the same memory area.
707 obj : str, default 'numpy array'
708 Specify object name being compared, internally used to show appropriate
709 assertion message.
710 index_values : numpy.ndarray, default None
711 optional index (shared by both left and right), used in output.
712 """
713 __tracebackhide__ = True
715 # instance validation
716 # Show a detailed error message when classes are different
717 assert_class_equal(left, right, obj=obj)
718 # both classes must be an np.ndarray
719 _check_isinstance(left, right, np.ndarray)
721 def _get_base(obj):
722 return obj.base if getattr(obj, "base", None) is not None else obj
724 left_base = _get_base(left)
725 right_base = _get_base(right)
727 if check_same == "same":
728 if left_base is not right_base:
729 raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}")
730 elif check_same == "copy":
731 if left_base is right_base:
732 raise AssertionError(f"{repr(left_base)} is {repr(right_base)}")
734 def _raise(left, right, err_msg):
735 if err_msg is None:
736 if left.shape != right.shape:
737 raise_assert_detail(
738 obj, f"{obj} shapes are different", left.shape, right.shape
739 )
741 diff = 0
742 for left_arr, right_arr in zip(left, right):
743 # count up differences
744 if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
745 diff += 1
747 diff = diff * 100.0 / left.size
748 msg = f"{obj} values are different ({np.round(diff, 5)} %)"
749 raise_assert_detail(obj, msg, left, right, index_values=index_values)
751 raise AssertionError(err_msg)
753 # compare shape and values
754 if not array_equivalent(left, right, strict_nan=strict_nan):
755 _raise(left, right, err_msg)
757 if check_dtype:
758 if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
759 assert_attr_equal("dtype", left, right, obj=obj)
762def assert_extension_array_equal(
763 left,
764 right,
765 check_dtype: bool | Literal["equiv"] = True,
766 index_values=None,
767 check_less_precise=no_default,
768 check_exact=False,
769 rtol: float = 1.0e-5,
770 atol: float = 1.0e-8,
771) -> None:
772 """
773 Check that left and right ExtensionArrays are equal.
775 Parameters
776 ----------
777 left, right : ExtensionArray
778 The two arrays to compare.
779 check_dtype : bool, default True
780 Whether to check if the ExtensionArray dtypes are identical.
781 index_values : numpy.ndarray, default None
782 Optional index (shared by both left and right), used in output.
783 check_less_precise : bool or int, default False
784 Specify comparison precision. Only used when check_exact is False.
785 5 digits (False) or 3 digits (True) after decimal points are compared.
786 If int, then specify the digits to compare.
788 .. deprecated:: 1.1.0
789 Use `rtol` and `atol` instead to define relative/absolute
790 tolerance, respectively. Similar to :func:`math.isclose`.
791 check_exact : bool, default False
792 Whether to compare number exactly.
793 rtol : float, default 1e-5
794 Relative tolerance. Only used when check_exact is False.
796 .. versionadded:: 1.1.0
797 atol : float, default 1e-8
798 Absolute tolerance. Only used when check_exact is False.
800 .. versionadded:: 1.1.0
802 Notes
803 -----
804 Missing values are checked separately from valid values.
805 A mask of missing values is computed for each and checked to match.
806 The remaining all-valid values are cast to object dtype and checked.
808 Examples
809 --------
810 >>> from pandas import testing as tm
811 >>> a = pd.Series([1, 2, 3, 4])
812 >>> b, c = a.array, a.array
813 >>> tm.assert_extension_array_equal(b, c)
814 """
815 if check_less_precise is not no_default:
816 warnings.warn(
817 "The 'check_less_precise' keyword in testing.assert_*_equal "
818 "is deprecated and will be removed in a future version. "
819 "You can stop passing 'check_less_precise' to silence this warning.",
820 FutureWarning,
821 stacklevel=find_stack_level(),
822 )
823 rtol = atol = _get_tol_from_less_precise(check_less_precise)
825 assert isinstance(left, ExtensionArray), "left is not an ExtensionArray"
826 assert isinstance(right, ExtensionArray), "right is not an ExtensionArray"
827 if check_dtype:
828 assert_attr_equal("dtype", left, right, obj="ExtensionArray")
830 if (
831 isinstance(left, DatetimeLikeArrayMixin)
832 and isinstance(right, DatetimeLikeArrayMixin)
833 and type(right) == type(left)
834 ):
835 # Avoid slow object-dtype comparisons
836 # np.asarray for case where we have a np.MaskedArray
837 assert_numpy_array_equal(
838 np.asarray(left.asi8), np.asarray(right.asi8), index_values=index_values
839 )
840 return
842 left_na = np.asarray(left.isna())
843 right_na = np.asarray(right.isna())
844 assert_numpy_array_equal(
845 left_na, right_na, obj="ExtensionArray NA mask", index_values=index_values
846 )
848 left_valid = left[~left_na].to_numpy(dtype=object)
849 right_valid = right[~right_na].to_numpy(dtype=object)
850 if check_exact:
851 assert_numpy_array_equal(
852 left_valid, right_valid, obj="ExtensionArray", index_values=index_values
853 )
854 else:
855 _testing.assert_almost_equal(
856 left_valid,
857 right_valid,
858 check_dtype=bool(check_dtype),
859 rtol=rtol,
860 atol=atol,
861 obj="ExtensionArray",
862 index_values=index_values,
863 )
866# This could be refactored to use the NDFrame.equals method
867def assert_series_equal(
868 left,
869 right,
870 check_dtype: bool | Literal["equiv"] = True,
871 check_index_type: bool | Literal["equiv"] = "equiv",
872 check_series_type=True,
873 check_less_precise: bool | int | NoDefault = no_default,
874 check_names=True,
875 check_exact=False,
876 check_datetimelike_compat=False,
877 check_categorical=True,
878 check_category_order=True,
879 check_freq=True,
880 check_flags=True,
881 rtol=1.0e-5,
882 atol=1.0e-8,
883 obj="Series",
884 *,
885 check_index=True,
886 check_like=False,
887) -> None:
888 """
889 Check that left and right Series are equal.
891 Parameters
892 ----------
893 left : Series
894 right : Series
895 check_dtype : bool, default True
896 Whether to check the Series dtype is identical.
897 check_index_type : bool or {'equiv'}, default 'equiv'
898 Whether to check the Index class, dtype and inferred_type
899 are identical.
900 check_series_type : bool, default True
901 Whether to check the Series class is identical.
902 check_less_precise : bool or int, default False
903 Specify comparison precision. Only used when check_exact is False.
904 5 digits (False) or 3 digits (True) after decimal points are compared.
905 If int, then specify the digits to compare.
907 When comparing two numbers, if the first number has magnitude less
908 than 1e-5, we compare the two numbers directly and check whether
909 they are equivalent within the specified precision. Otherwise, we
910 compare the **ratio** of the second number to the first number and
911 check whether it is equivalent to 1 within the specified precision.
913 .. deprecated:: 1.1.0
914 Use `rtol` and `atol` instead to define relative/absolute
915 tolerance, respectively. Similar to :func:`math.isclose`.
916 check_names : bool, default True
917 Whether to check the Series and Index names attribute.
918 check_exact : bool, default False
919 Whether to compare number exactly.
920 check_datetimelike_compat : bool, default False
921 Compare datetime-like which is comparable ignoring dtype.
922 check_categorical : bool, default True
923 Whether to compare internal Categorical exactly.
924 check_category_order : bool, default True
925 Whether to compare category order of internal Categoricals.
927 .. versionadded:: 1.0.2
928 check_freq : bool, default True
929 Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
931 .. versionadded:: 1.1.0
932 check_flags : bool, default True
933 Whether to check the `flags` attribute.
935 .. versionadded:: 1.2.0
937 rtol : float, default 1e-5
938 Relative tolerance. Only used when check_exact is False.
940 .. versionadded:: 1.1.0
941 atol : float, default 1e-8
942 Absolute tolerance. Only used when check_exact is False.
944 .. versionadded:: 1.1.0
945 obj : str, default 'Series'
946 Specify object name being compared, internally used to show appropriate
947 assertion message.
948 check_index : bool, default True
949 Whether to check index equivalence. If False, then compare only values.
951 .. versionadded:: 1.3.0
952 check_like : bool, default False
953 If True, ignore the order of the index. Must be False if check_index is False.
954 Note: same labels must be with the same data.
956 .. versionadded:: 1.5.0
958 Examples
959 --------
960 >>> from pandas import testing as tm
961 >>> a = pd.Series([1, 2, 3, 4])
962 >>> b = pd.Series([1, 2, 3, 4])
963 >>> tm.assert_series_equal(a, b)
964 """
965 __tracebackhide__ = True
967 if not check_index and check_like:
968 raise ValueError("check_like must be False if check_index is False")
970 if check_less_precise is not no_default:
971 warnings.warn(
972 "The 'check_less_precise' keyword in testing.assert_*_equal "
973 "is deprecated and will be removed in a future version. "
974 "You can stop passing 'check_less_precise' to silence this warning.",
975 FutureWarning,
976 stacklevel=find_stack_level(),
977 )
978 rtol = atol = _get_tol_from_less_precise(check_less_precise)
980 # instance validation
981 _check_isinstance(left, right, Series)
983 if check_series_type:
984 assert_class_equal(left, right, obj=obj)
986 # length comparison
987 if len(left) != len(right):
988 msg1 = f"{len(left)}, {left.index}"
989 msg2 = f"{len(right)}, {right.index}"
990 raise_assert_detail(obj, "Series length are different", msg1, msg2)
992 if check_flags:
993 assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
995 if check_index:
996 # GH #38183
997 assert_index_equal(
998 left.index,
999 right.index,
1000 exact=check_index_type,
1001 check_names=check_names,
1002 check_exact=check_exact,
1003 check_categorical=check_categorical,
1004 check_order=not check_like,
1005 rtol=rtol,
1006 atol=atol,
1007 obj=f"{obj}.index",
1008 )
1010 if check_like:
1011 left, right = left.reindex_like(right), right
1013 if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)):
1014 lidx = left.index
1015 ridx = right.index
1016 assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq)
1018 if check_dtype:
1019 # We want to skip exact dtype checking when `check_categorical`
1020 # is False. We'll still raise if only one is a `Categorical`,
1021 # regardless of `check_categorical`
1022 if (
1023 isinstance(left.dtype, CategoricalDtype)
1024 and isinstance(right.dtype, CategoricalDtype)
1025 and not check_categorical
1026 ):
1027 pass
1028 else:
1029 assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
1031 if check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype(right.dtype):
1032 left_values = left._values
1033 right_values = right._values
1034 # Only check exact if dtype is numeric
1035 if isinstance(left_values, ExtensionArray) and isinstance(
1036 right_values, ExtensionArray
1037 ):
1038 assert_extension_array_equal(
1039 left_values,
1040 right_values,
1041 check_dtype=check_dtype,
1042 index_values=np.asarray(left.index),
1043 )
1044 else:
1045 assert_numpy_array_equal(
1046 left_values,
1047 right_values,
1048 check_dtype=check_dtype,
1049 obj=str(obj),
1050 index_values=np.asarray(left.index),
1051 )
1052 elif check_datetimelike_compat and (
1053 needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype)
1054 ):
1055 # we want to check only if we have compat dtypes
1056 # e.g. integer and M|m are NOT compat, but we can simply check
1057 # the values in that case
1059 # datetimelike may have different objects (e.g. datetime.datetime
1060 # vs Timestamp) but will compare equal
1061 if not Index(left._values).equals(Index(right._values)):
1062 msg = (
1063 f"[datetimelike_compat=True] {left._values} "
1064 f"is not equal to {right._values}."
1065 )
1066 raise AssertionError(msg)
1067 elif is_interval_dtype(left.dtype) and is_interval_dtype(right.dtype):
1068 assert_interval_array_equal(left.array, right.array)
1069 elif isinstance(left.dtype, CategoricalDtype) or isinstance(
1070 right.dtype, CategoricalDtype
1071 ):
1072 _testing.assert_almost_equal(
1073 left._values,
1074 right._values,
1075 rtol=rtol,
1076 atol=atol,
1077 check_dtype=bool(check_dtype),
1078 obj=str(obj),
1079 index_values=np.asarray(left.index),
1080 )
1081 elif is_extension_array_dtype(left.dtype) and is_extension_array_dtype(right.dtype):
1082 assert_extension_array_equal(
1083 left._values,
1084 right._values,
1085 rtol=rtol,
1086 atol=atol,
1087 check_dtype=check_dtype,
1088 index_values=np.asarray(left.index),
1089 )
1090 elif is_extension_array_dtype_and_needs_i8_conversion(
1091 left.dtype, right.dtype
1092 ) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype):
1093 assert_extension_array_equal(
1094 left._values,
1095 right._values,
1096 check_dtype=check_dtype,
1097 index_values=np.asarray(left.index),
1098 )
1099 elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
1100 # DatetimeArray or TimedeltaArray
1101 assert_extension_array_equal(
1102 left._values,
1103 right._values,
1104 check_dtype=check_dtype,
1105 index_values=np.asarray(left.index),
1106 )
1107 else:
1108 _testing.assert_almost_equal(
1109 left._values,
1110 right._values,
1111 rtol=rtol,
1112 atol=atol,
1113 check_dtype=bool(check_dtype),
1114 obj=str(obj),
1115 index_values=np.asarray(left.index),
1116 )
1118 # metadata comparison
1119 if check_names:
1120 assert_attr_equal("name", left, right, obj=obj)
1122 if check_categorical:
1123 if isinstance(left.dtype, CategoricalDtype) or isinstance(
1124 right.dtype, CategoricalDtype
1125 ):
1126 assert_categorical_equal(
1127 left._values,
1128 right._values,
1129 obj=f"{obj} category",
1130 check_category_order=check_category_order,
1131 )
1134# This could be refactored to use the NDFrame.equals method
1135def assert_frame_equal(
1136 left,
1137 right,
1138 check_dtype: bool | Literal["equiv"] = True,
1139 check_index_type: bool | Literal["equiv"] = "equiv",
1140 check_column_type="equiv",
1141 check_frame_type=True,
1142 check_less_precise=no_default,
1143 check_names=True,
1144 by_blocks=False,
1145 check_exact=False,
1146 check_datetimelike_compat=False,
1147 check_categorical=True,
1148 check_like=False,
1149 check_freq=True,
1150 check_flags=True,
1151 rtol=1.0e-5,
1152 atol=1.0e-8,
1153 obj="DataFrame",
1154) -> None:
1155 """
1156 Check that left and right DataFrame are equal.
1158 This function is intended to compare two DataFrames and output any
1159 differences. It is mostly intended for use in unit tests.
1160 Additional parameters allow varying the strictness of the
1161 equality checks performed.
1163 Parameters
1164 ----------
1165 left : DataFrame
1166 First DataFrame to compare.
1167 right : DataFrame
1168 Second DataFrame to compare.
1169 check_dtype : bool, default True
1170 Whether to check the DataFrame dtype is identical.
1171 check_index_type : bool or {'equiv'}, default 'equiv'
1172 Whether to check the Index class, dtype and inferred_type
1173 are identical.
1174 check_column_type : bool or {'equiv'}, default 'equiv'
1175 Whether to check the columns class, dtype and inferred_type
1176 are identical. Is passed as the ``exact`` argument of
1177 :func:`assert_index_equal`.
1178 check_frame_type : bool, default True
1179 Whether to check the DataFrame class is identical.
1180 check_less_precise : bool or int, default False
1181 Specify comparison precision. Only used when check_exact is False.
1182 5 digits (False) or 3 digits (True) after decimal points are compared.
1183 If int, then specify the digits to compare.
1185 When comparing two numbers, if the first number has magnitude less
1186 than 1e-5, we compare the two numbers directly and check whether
1187 they are equivalent within the specified precision. Otherwise, we
1188 compare the **ratio** of the second number to the first number and
1189 check whether it is equivalent to 1 within the specified precision.
1191 .. deprecated:: 1.1.0
1192 Use `rtol` and `atol` instead to define relative/absolute
1193 tolerance, respectively. Similar to :func:`math.isclose`.
1194 check_names : bool, default True
1195 Whether to check that the `names` attribute for both the `index`
1196 and `column` attributes of the DataFrame is identical.
1197 by_blocks : bool, default False
1198 Specify how to compare internal data. If False, compare by columns.
1199 If True, compare by blocks.
1200 check_exact : bool, default False
1201 Whether to compare number exactly.
1202 check_datetimelike_compat : bool, default False
1203 Compare datetime-like which is comparable ignoring dtype.
1204 check_categorical : bool, default True
1205 Whether to compare internal Categorical exactly.
1206 check_like : bool, default False
1207 If True, ignore the order of index & columns.
1208 Note: index labels must match their respective rows
1209 (same as in columns) - same labels must be with the same data.
1210 check_freq : bool, default True
1211 Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
1213 .. versionadded:: 1.1.0
1214 check_flags : bool, default True
1215 Whether to check the `flags` attribute.
1216 rtol : float, default 1e-5
1217 Relative tolerance. Only used when check_exact is False.
1219 .. versionadded:: 1.1.0
1220 atol : float, default 1e-8
1221 Absolute tolerance. Only used when check_exact is False.
1223 .. versionadded:: 1.1.0
1224 obj : str, default 'DataFrame'
1225 Specify object name being compared, internally used to show appropriate
1226 assertion message.
1228 See Also
1229 --------
1230 assert_series_equal : Equivalent method for asserting Series equality.
1231 DataFrame.equals : Check DataFrame equality.
1233 Examples
1234 --------
1235 This example shows comparing two DataFrames that are equal
1236 but with columns of differing dtypes.
1238 >>> from pandas.testing import assert_frame_equal
1239 >>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
1240 >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]})
1242 df1 equals itself.
1244 >>> assert_frame_equal(df1, df1)
1246 df1 differs from df2 as column 'b' is of a different type.
1248 >>> assert_frame_equal(df1, df2)
1249 Traceback (most recent call last):
1250 ...
1251 AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="b") are different
1253 Attribute "dtype" are different
1254 [left]: int64
1255 [right]: float64
1257 Ignore differing dtypes in columns with check_dtype.
1259 >>> assert_frame_equal(df1, df2, check_dtype=False)
1260 """
1261 __tracebackhide__ = True
1263 if check_less_precise is not no_default:
1264 warnings.warn(
1265 "The 'check_less_precise' keyword in testing.assert_*_equal "
1266 "is deprecated and will be removed in a future version. "
1267 "You can stop passing 'check_less_precise' to silence this warning.",
1268 FutureWarning,
1269 stacklevel=find_stack_level(),
1270 )
1271 rtol = atol = _get_tol_from_less_precise(check_less_precise)
1273 # instance validation
1274 _check_isinstance(left, right, DataFrame)
1276 if check_frame_type:
1277 assert isinstance(left, type(right))
1278 # assert_class_equal(left, right, obj=obj)
1280 # shape comparison
1281 if left.shape != right.shape:
1282 raise_assert_detail(
1283 obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}"
1284 )
1286 if check_flags:
1287 assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
1289 # index comparison
1290 assert_index_equal(
1291 left.index,
1292 right.index,
1293 exact=check_index_type,
1294 check_names=check_names,
1295 check_exact=check_exact,
1296 check_categorical=check_categorical,
1297 check_order=not check_like,
1298 rtol=rtol,
1299 atol=atol,
1300 obj=f"{obj}.index",
1301 )
1303 # column comparison
1304 assert_index_equal(
1305 left.columns,
1306 right.columns,
1307 exact=check_column_type,
1308 check_names=check_names,
1309 check_exact=check_exact,
1310 check_categorical=check_categorical,
1311 check_order=not check_like,
1312 rtol=rtol,
1313 atol=atol,
1314 obj=f"{obj}.columns",
1315 )
1317 if check_like:
1318 left, right = left.reindex_like(right), right
1320 # compare by blocks
1321 if by_blocks:
1322 rblocks = right._to_dict_of_blocks()
1323 lblocks = left._to_dict_of_blocks()
1324 for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
1325 assert dtype in lblocks
1326 assert dtype in rblocks
1327 assert_frame_equal(
1328 lblocks[dtype], rblocks[dtype], check_dtype=check_dtype, obj=obj
1329 )
1331 # compare by columns
1332 else:
1333 for i, col in enumerate(left.columns):
1334 # We have already checked that columns match, so we can do
1335 # fast location-based lookups
1336 lcol = left._ixs(i, axis=1)
1337 rcol = right._ixs(i, axis=1)
1339 # GH #38183
1340 # use check_index=False, because we do not want to run
1341 # assert_index_equal for each column,
1342 # as we already checked it for the whole dataframe before.
1343 assert_series_equal(
1344 lcol,
1345 rcol,
1346 check_dtype=check_dtype,
1347 check_index_type=check_index_type,
1348 check_exact=check_exact,
1349 check_names=check_names,
1350 check_datetimelike_compat=check_datetimelike_compat,
1351 check_categorical=check_categorical,
1352 check_freq=check_freq,
1353 obj=f'{obj}.iloc[:, {i}] (column name="{col}")',
1354 rtol=rtol,
1355 atol=atol,
1356 check_index=False,
1357 check_flags=False,
1358 )
1361def assert_equal(left, right, **kwargs) -> None:
1362 """
1363 Wrapper for tm.assert_*_equal to dispatch to the appropriate test function.
1365 Parameters
1366 ----------
1367 left, right : Index, Series, DataFrame, ExtensionArray, or np.ndarray
1368 The two items to be compared.
1369 **kwargs
1370 All keyword arguments are passed through to the underlying assert method.
1371 """
1372 __tracebackhide__ = True
1374 if isinstance(left, Index):
1375 assert_index_equal(left, right, **kwargs)
1376 if isinstance(left, (DatetimeIndex, TimedeltaIndex)):
1377 assert left.freq == right.freq, (left.freq, right.freq)
1378 elif isinstance(left, Series):
1379 assert_series_equal(left, right, **kwargs)
1380 elif isinstance(left, DataFrame):
1381 assert_frame_equal(left, right, **kwargs)
1382 elif isinstance(left, IntervalArray):
1383 assert_interval_array_equal(left, right, **kwargs)
1384 elif isinstance(left, PeriodArray):
1385 assert_period_array_equal(left, right, **kwargs)
1386 elif isinstance(left, DatetimeArray):
1387 assert_datetime_array_equal(left, right, **kwargs)
1388 elif isinstance(left, TimedeltaArray):
1389 assert_timedelta_array_equal(left, right, **kwargs)
1390 elif isinstance(left, ExtensionArray):
1391 assert_extension_array_equal(left, right, **kwargs)
1392 elif isinstance(left, np.ndarray):
1393 assert_numpy_array_equal(left, right, **kwargs)
1394 elif isinstance(left, str):
1395 assert kwargs == {}
1396 assert left == right
1397 else:
1398 assert kwargs == {}
1399 assert_almost_equal(left, right)
1402def assert_sp_array_equal(left, right) -> None:
1403 """
1404 Check that the left and right SparseArray are equal.
1406 Parameters
1407 ----------
1408 left : SparseArray
1409 right : SparseArray
1410 """
1411 _check_isinstance(left, right, pd.arrays.SparseArray)
1413 assert_numpy_array_equal(left.sp_values, right.sp_values)
1415 # SparseIndex comparison
1416 assert isinstance(left.sp_index, SparseIndex)
1417 assert isinstance(right.sp_index, SparseIndex)
1419 left_index = left.sp_index
1420 right_index = right.sp_index
1422 if not left_index.equals(right_index):
1423 raise_assert_detail(
1424 "SparseArray.index", "index are not equal", left_index, right_index
1425 )
1426 else:
1427 # Just ensure a
1428 pass
1430 assert_attr_equal("fill_value", left, right)
1431 assert_attr_equal("dtype", left, right)
1432 assert_numpy_array_equal(left.to_dense(), right.to_dense())
1435def assert_contains_all(iterable, dic) -> None:
1436 for k in iterable:
1437 assert k in dic, f"Did not contain item: {repr(k)}"
1440def assert_copy(iter1, iter2, **eql_kwargs) -> None:
1441 """
1442 iter1, iter2: iterables that produce elements
1443 comparable with assert_almost_equal
1445 Checks that the elements are equal, but not
1446 the same object. (Does not check that items
1447 in sequences are also not the same object)
1448 """
1449 for elem1, elem2 in zip(iter1, iter2):
1450 assert_almost_equal(elem1, elem2, **eql_kwargs)
1451 msg = (
1452 f"Expected object {repr(type(elem1))} and object {repr(type(elem2))} to be "
1453 "different objects, but they were the same object."
1454 )
1455 assert elem1 is not elem2, msg
1458def is_extension_array_dtype_and_needs_i8_conversion(left_dtype, right_dtype) -> bool:
1459 """
1460 Checks that we have the combination of an ExtensionArraydtype and
1461 a dtype that should be converted to int64
1463 Returns
1464 -------
1465 bool
1467 Related to issue #37609
1468 """
1469 return is_extension_array_dtype(left_dtype) and needs_i8_conversion(right_dtype)
1472def assert_indexing_slices_equivalent(ser: Series, l_slc: slice, i_slc: slice) -> None:
1473 """
1474 Check that ser.iloc[i_slc] matches ser.loc[l_slc] and, if applicable,
1475 ser[l_slc].
1476 """
1477 expected = ser.iloc[i_slc]
1479 assert_series_equal(ser.loc[l_slc], expected)
1481 if not ser.index.is_integer():
1482 # For integer indices, .loc and plain getitem are position-based.
1483 assert_series_equal(ser[l_slc], expected)
1486def assert_metadata_equivalent(left, right) -> None:
1487 """
1488 Check that ._metadata attributes are equivalent.
1489 """
1490 for attr in left._metadata:
1491 val = getattr(left, attr, None)
1492 if right is None:
1493 assert val is None
1494 else:
1495 assert val == getattr(right, attr, None)