Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/_testing/__init__.py: 26%
390 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3import collections
4from datetime import datetime
5from decimal import Decimal
6import operator
7import os
8import re
9import string
10from sys import byteorder
11from typing import (
12 TYPE_CHECKING,
13 Callable,
14 ContextManager,
15 Counter,
16 Iterable,
17)
18import warnings
20import numpy as np
22from pandas._config.localization import (
23 can_set_locale,
24 get_locales,
25 set_locale,
26)
28from pandas._typing import Dtype
29from pandas.compat import pa_version_under1p01
31from pandas.core.dtypes.common import (
32 is_float_dtype,
33 is_integer_dtype,
34 is_sequence,
35 is_unsigned_integer_dtype,
36 pandas_dtype,
37)
39import pandas as pd
40from pandas import (
41 Categorical,
42 CategoricalIndex,
43 DataFrame,
44 DatetimeIndex,
45 Index,
46 IntervalIndex,
47 MultiIndex,
48 RangeIndex,
49 Series,
50 bdate_range,
51)
52from pandas._testing._io import (
53 close,
54 network,
55 round_trip_localpath,
56 round_trip_pathlib,
57 round_trip_pickle,
58 write_to_compressed,
59)
60from pandas._testing._random import (
61 randbool,
62 rands,
63 rands_array,
64)
65from pandas._testing._warnings import (
66 assert_produces_warning,
67 maybe_produces_warning,
68)
69from pandas._testing.asserters import (
70 assert_almost_equal,
71 assert_attr_equal,
72 assert_categorical_equal,
73 assert_class_equal,
74 assert_contains_all,
75 assert_copy,
76 assert_datetime_array_equal,
77 assert_dict_equal,
78 assert_equal,
79 assert_extension_array_equal,
80 assert_frame_equal,
81 assert_index_equal,
82 assert_indexing_slices_equivalent,
83 assert_interval_array_equal,
84 assert_is_sorted,
85 assert_is_valid_plot_return_object,
86 assert_metadata_equivalent,
87 assert_numpy_array_equal,
88 assert_period_array_equal,
89 assert_series_equal,
90 assert_sp_array_equal,
91 assert_timedelta_array_equal,
92 raise_assert_detail,
93)
94from pandas._testing.compat import (
95 get_dtype,
96 get_obj,
97)
98from pandas._testing.contexts import (
99 RNGContext,
100 decompress_file,
101 ensure_clean,
102 ensure_clean_dir,
103 ensure_safe_environment_variables,
104 set_timezone,
105 use_numexpr,
106 with_csv_dialect,
107)
108from pandas.core.api import (
109 Float64Index,
110 Int64Index,
111 NumericIndex,
112 UInt64Index,
113)
114from pandas.core.arrays import (
115 BaseMaskedArray,
116 ExtensionArray,
117 PandasArray,
118)
119from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
120from pandas.core.construction import extract_array
122if TYPE_CHECKING: 122 ↛ 123line 122 didn't jump to line 123, because the condition on line 122 was never true
123 from pandas import (
124 PeriodIndex,
125 TimedeltaIndex,
126 )
128_N = 30
129_K = 4
131UNSIGNED_INT_NUMPY_DTYPES: list[Dtype] = ["uint8", "uint16", "uint32", "uint64"]
132UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"]
133SIGNED_INT_NUMPY_DTYPES: list[Dtype] = [int, "int8", "int16", "int32", "int64"]
134SIGNED_INT_EA_DTYPES: list[Dtype] = ["Int8", "Int16", "Int32", "Int64"]
135ALL_INT_NUMPY_DTYPES = UNSIGNED_INT_NUMPY_DTYPES + SIGNED_INT_NUMPY_DTYPES
136ALL_INT_EA_DTYPES = UNSIGNED_INT_EA_DTYPES + SIGNED_INT_EA_DTYPES
138FLOAT_NUMPY_DTYPES: list[Dtype] = [float, "float32", "float64"]
139FLOAT_EA_DTYPES: list[Dtype] = ["Float32", "Float64"]
140COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
141STRING_DTYPES: list[Dtype] = [str, "str", "U"]
143DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"]
144TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"]
146BOOL_DTYPES: list[Dtype] = [bool, "bool"]
147BYTES_DTYPES: list[Dtype] = [bytes, "bytes"]
148OBJECT_DTYPES: list[Dtype] = [object, "object"]
150ALL_REAL_NUMPY_DTYPES = FLOAT_NUMPY_DTYPES + ALL_INT_NUMPY_DTYPES
151ALL_NUMPY_DTYPES = (
152 ALL_REAL_NUMPY_DTYPES
153 + COMPLEX_DTYPES
154 + STRING_DTYPES
155 + DATETIME64_DTYPES
156 + TIMEDELTA64_DTYPES
157 + BOOL_DTYPES
158 + OBJECT_DTYPES
159 + BYTES_DTYPES
160)
162NARROW_NP_DTYPES = [
163 np.float16,
164 np.float32,
165 np.int8,
166 np.int16,
167 np.int32,
168 np.uint8,
169 np.uint16,
170 np.uint32,
171]
173ENDIAN = {"little": "<", "big": ">"}[byteorder]
175NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]
176NP_NAT_OBJECTS = [
177 cls("NaT", unit)
178 for cls in [np.datetime64, np.timedelta64]
179 for unit in [
180 "Y",
181 "M",
182 "W",
183 "D",
184 "h",
185 "m",
186 "s",
187 "ms",
188 "us",
189 "ns",
190 "ps",
191 "fs",
192 "as",
193 ]
194]
196if not pa_version_under1p01: 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true
197 import pyarrow as pa
199 UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]
200 SIGNED_INT_PYARROW_DTYPES = [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
201 ALL_INT_PYARROW_DTYPES = UNSIGNED_INT_PYARROW_DTYPES + SIGNED_INT_PYARROW_DTYPES
203 FLOAT_PYARROW_DTYPES = [pa.float32(), pa.float64()]
204 STRING_PYARROW_DTYPES = [pa.string(), pa.utf8()]
206 TIME_PYARROW_DTYPES = [
207 pa.time32("s"),
208 pa.time32("ms"),
209 pa.time64("us"),
210 pa.time64("ns"),
211 ]
212 DATE_PYARROW_DTYPES = [pa.date32(), pa.date64()]
213 DATETIME_PYARROW_DTYPES = [
214 pa.timestamp(unit=unit, tz=tz)
215 for unit in ["s", "ms", "us", "ns"]
216 for tz in [None, "UTC", "US/Pacific", "US/Eastern"]
217 ]
218 TIMEDELTA_PYARROW_DTYPES = [pa.duration(unit) for unit in ["s", "ms", "us", "ns"]]
220 BOOL_PYARROW_DTYPES = [pa.bool_()]
222 # TODO: Add container like pyarrow types:
223 # https://arrow.apache.org/docs/python/api/datatypes.html#factory-functions
224 ALL_PYARROW_DTYPES = (
225 ALL_INT_PYARROW_DTYPES
226 + FLOAT_PYARROW_DTYPES
227 + TIME_PYARROW_DTYPES
228 + DATE_PYARROW_DTYPES
229 + DATETIME_PYARROW_DTYPES
230 + TIMEDELTA_PYARROW_DTYPES
231 + BOOL_PYARROW_DTYPES
232 )
235EMPTY_STRING_PATTERN = re.compile("^$")
237# set testing_mode
238_testing_mode_warnings = (DeprecationWarning, ResourceWarning)
241def set_testing_mode() -> None:
242 # set the testing mode filters
243 testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None")
244 if "deprecate" in testing_mode: 244 ↛ 245line 244 didn't jump to line 245, because the condition on line 244 was never true
245 for category in _testing_mode_warnings:
246 warnings.simplefilter("always", category)
249def reset_testing_mode() -> None:
250 # reset the testing mode filters
251 testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None")
252 if "deprecate" in testing_mode:
253 for category in _testing_mode_warnings:
254 warnings.simplefilter("ignore", category)
257set_testing_mode()
260def reset_display_options() -> None:
261 """
262 Reset the display options for printing and representing objects.
263 """
264 pd.reset_option("^display.", silent=True)
267# -----------------------------------------------------------------------------
268# Comparators
271def equalContents(arr1, arr2) -> bool:
272 """
273 Checks if the set of unique elements of arr1 and arr2 are equivalent.
274 """
275 return frozenset(arr1) == frozenset(arr2)
278def box_expected(expected, box_cls, transpose=True):
279 """
280 Helper function to wrap the expected output of a test in a given box_class.
282 Parameters
283 ----------
284 expected : np.ndarray, Index, Series
285 box_cls : {Index, Series, DataFrame}
287 Returns
288 -------
289 subclass of box_cls
290 """
291 if box_cls is pd.array:
292 if isinstance(expected, RangeIndex):
293 # pd.array would return an IntegerArray
294 expected = PandasArray(np.asarray(expected._values))
295 else:
296 expected = pd.array(expected)
297 elif box_cls is Index:
298 expected = Index._with_infer(expected)
299 elif box_cls is Series:
300 expected = Series(expected)
301 elif box_cls is DataFrame:
302 expected = Series(expected).to_frame()
303 if transpose:
304 # for vector operations, we need a DataFrame to be a single-row,
305 # not a single-column, in order to operate against non-DataFrame
306 # vectors of the same length. But convert to two rows to avoid
307 # single-row special cases in datetime arithmetic
308 expected = expected.T
309 expected = pd.concat([expected] * 2, ignore_index=True)
310 elif box_cls is np.ndarray or box_cls is np.array:
311 expected = np.array(expected)
312 elif box_cls is to_array:
313 expected = to_array(expected)
314 else:
315 raise NotImplementedError(box_cls)
316 return expected
319def to_array(obj):
320 """
321 Similar to pd.array, but does not cast numpy dtypes to nullable dtypes.
322 """
323 # temporary implementation until we get pd.array in place
324 dtype = getattr(obj, "dtype", None)
326 if dtype is None:
327 return np.asarray(obj)
329 return extract_array(obj, extract_numpy=True)
332# -----------------------------------------------------------------------------
333# Others
336def getCols(k) -> str:
337 return string.ascii_uppercase[:k]
340# make index
341def makeStringIndex(k=10, name=None) -> Index:
342 return Index(rands_array(nchars=10, size=k), name=name)
345def makeCategoricalIndex(k=10, n=3, name=None, **kwargs) -> CategoricalIndex:
346 """make a length k index or n categories"""
347 x = rands_array(nchars=4, size=n, replace=False)
348 return CategoricalIndex(
349 Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs
350 )
353def makeIntervalIndex(k=10, name=None, **kwargs) -> IntervalIndex:
354 """make a length k IntervalIndex"""
355 x = np.linspace(0, 100, num=(k + 1))
356 return IntervalIndex.from_breaks(x, name=name, **kwargs)
359def makeBoolIndex(k=10, name=None) -> Index:
360 if k == 1:
361 return Index([True], name=name)
362 elif k == 2:
363 return Index([False, True], name=name)
364 return Index([False, True] + [False] * (k - 2), name=name)
367def makeNumericIndex(k=10, name=None, *, dtype) -> NumericIndex:
368 dtype = pandas_dtype(dtype)
369 assert isinstance(dtype, np.dtype)
371 if is_integer_dtype(dtype):
372 values = np.arange(k, dtype=dtype)
373 if is_unsigned_integer_dtype(dtype):
374 values += 2 ** (dtype.itemsize * 8 - 1)
375 elif is_float_dtype(dtype):
376 values = np.random.random_sample(k) - np.random.random_sample(1)
377 values.sort()
378 values = values * (10 ** np.random.randint(0, 9))
379 else:
380 raise NotImplementedError(f"wrong dtype {dtype}")
382 return NumericIndex(values, dtype=dtype, name=name)
385def makeIntIndex(k=10, name=None) -> Int64Index:
386 base_idx = makeNumericIndex(k, name=name, dtype="int64")
387 return Int64Index(base_idx)
390def makeUIntIndex(k=10, name=None) -> UInt64Index:
391 base_idx = makeNumericIndex(k, name=name, dtype="uint64")
392 return UInt64Index(base_idx)
395def makeRangeIndex(k=10, name=None, **kwargs) -> RangeIndex:
396 return RangeIndex(0, k, 1, name=name, **kwargs)
399def makeFloatIndex(k=10, name=None) -> Float64Index:
400 base_idx = makeNumericIndex(k, name=name, dtype="float64")
401 return Float64Index(base_idx)
404def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex:
405 dt = datetime(2000, 1, 1)
406 dr = bdate_range(dt, periods=k, freq=freq, name=name)
407 return DatetimeIndex(dr, name=name, **kwargs)
410def makeTimedeltaIndex(k: int = 10, freq="D", name=None, **kwargs) -> TimedeltaIndex:
411 return pd.timedelta_range(start="1 day", periods=k, freq=freq, name=name, **kwargs)
414def makePeriodIndex(k: int = 10, name=None, **kwargs) -> PeriodIndex:
415 dt = datetime(2000, 1, 1)
416 return pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs)
419def makeMultiIndex(k=10, names=None, **kwargs):
420 N = (k // 2) + 1
421 rng = range(N)
422 mi = MultiIndex.from_product([("foo", "bar"), rng], names=names, **kwargs)
423 assert len(mi) >= k # GH#38795
424 return mi[:k]
427def index_subclass_makers_generator():
428 make_index_funcs = [
429 makeDateIndex,
430 makePeriodIndex,
431 makeTimedeltaIndex,
432 makeRangeIndex,
433 makeIntervalIndex,
434 makeCategoricalIndex,
435 makeMultiIndex,
436 ]
437 yield from make_index_funcs
440def all_timeseries_index_generator(k: int = 10) -> Iterable[Index]:
441 """
442 Generator which can be iterated over to get instances of all the classes
443 which represent time-series.
445 Parameters
446 ----------
447 k: length of each of the index instances
448 """
449 make_index_funcs: list[Callable[..., Index]] = [
450 makeDateIndex,
451 makePeriodIndex,
452 makeTimedeltaIndex,
453 ]
454 for make_index_func in make_index_funcs:
455 yield make_index_func(k=k)
458# make series
459def make_rand_series(name=None, dtype=np.float64) -> Series:
460 index = makeStringIndex(_N)
461 data = np.random.randn(_N)
462 with np.errstate(invalid="ignore"):
463 data = data.astype(dtype, copy=False)
464 return Series(data, index=index, name=name)
467def makeFloatSeries(name=None) -> Series:
468 return make_rand_series(name=name)
471def makeStringSeries(name=None) -> Series:
472 return make_rand_series(name=name)
475def makeObjectSeries(name=None) -> Series:
476 data = makeStringIndex(_N)
477 data = Index(data, dtype=object)
478 index = makeStringIndex(_N)
479 return Series(data, index=index, name=name)
482def getSeriesData() -> dict[str, Series]:
483 index = makeStringIndex(_N)
484 return {c: Series(np.random.randn(_N), index=index) for c in getCols(_K)}
487def makeTimeSeries(nper=None, freq="B", name=None) -> Series:
488 if nper is None:
489 nper = _N
490 return Series(
491 np.random.randn(nper), index=makeDateIndex(nper, freq=freq), name=name
492 )
495def makePeriodSeries(nper=None, name=None) -> Series:
496 if nper is None:
497 nper = _N
498 return Series(np.random.randn(nper), index=makePeriodIndex(nper), name=name)
501def getTimeSeriesData(nper=None, freq="B") -> dict[str, Series]:
502 return {c: makeTimeSeries(nper, freq) for c in getCols(_K)}
505def getPeriodData(nper=None) -> dict[str, Series]:
506 return {c: makePeriodSeries(nper) for c in getCols(_K)}
509# make frame
510def makeTimeDataFrame(nper=None, freq="B") -> DataFrame:
511 data = getTimeSeriesData(nper, freq)
512 return DataFrame(data)
515def makeDataFrame() -> DataFrame:
516 data = getSeriesData()
517 return DataFrame(data)
520def getMixedTypeDict():
521 index = Index(["a", "b", "c", "d", "e"])
523 data = {
524 "A": [0.0, 1.0, 2.0, 3.0, 4.0],
525 "B": [0.0, 1.0, 0.0, 1.0, 0.0],
526 "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
527 "D": bdate_range("1/1/2009", periods=5),
528 }
530 return index, data
533def makeMixedDataFrame() -> DataFrame:
534 return DataFrame(getMixedTypeDict()[1])
537def makePeriodFrame(nper=None) -> DataFrame:
538 data = getPeriodData(nper)
539 return DataFrame(data)
542def makeCustomIndex(
543 nentries,
544 nlevels,
545 prefix="#",
546 names: bool | str | list[str] | None = False,
547 ndupe_l=None,
548 idx_type=None,
549) -> Index:
550 """
551 Create an index/multindex with given dimensions, levels, names, etc'
553 nentries - number of entries in index
554 nlevels - number of levels (> 1 produces multindex)
555 prefix - a string prefix for labels
556 names - (Optional), bool or list of strings. if True will use default
557 names, if false will use no names, if a list is given, the name of
558 each level in the index will be taken from the list.
559 ndupe_l - (Optional), list of ints, the number of rows for which the
560 label will repeated at the corresponding level, you can specify just
561 the first few, the rest will use the default ndupe_l of 1.
562 len(ndupe_l) <= nlevels.
563 idx_type - "i"/"f"/"s"/"dt"/"p"/"td".
564 If idx_type is not None, `idx_nlevels` must be 1.
565 "i"/"f" creates an integer/float index,
566 "s" creates a string
567 "dt" create a datetime index.
568 "td" create a datetime index.
570 if unspecified, string labels will be generated.
571 """
572 if ndupe_l is None:
573 ndupe_l = [1] * nlevels
574 assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels
575 assert names is None or names is False or names is True or len(names) is nlevels
576 assert idx_type is None or (
577 idx_type in ("i", "f", "s", "u", "dt", "p", "td") and nlevels == 1
578 )
580 if names is True:
581 # build default names
582 names = [prefix + str(i) for i in range(nlevels)]
583 if names is False:
584 # pass None to index constructor for no name
585 names = None
587 # make singleton case uniform
588 if isinstance(names, str) and nlevels == 1:
589 names = [names]
591 # specific 1D index type requested?
592 idx_func_dict: dict[str, Callable[..., Index]] = {
593 "i": makeIntIndex,
594 "f": makeFloatIndex,
595 "s": makeStringIndex,
596 "dt": makeDateIndex,
597 "td": makeTimedeltaIndex,
598 "p": makePeriodIndex,
599 }
600 idx_func = idx_func_dict.get(idx_type)
601 if idx_func:
602 idx = idx_func(nentries)
603 # but we need to fill in the name
604 if names:
605 idx.name = names[0]
606 return idx
607 elif idx_type is not None:
608 raise ValueError(
609 f"{repr(idx_type)} is not a legal value for `idx_type`, "
610 "use 'i'/'f'/'s'/'dt'/'p'/'td'."
611 )
613 if len(ndupe_l) < nlevels:
614 ndupe_l.extend([1] * (nlevels - len(ndupe_l)))
615 assert len(ndupe_l) == nlevels
617 assert all(x > 0 for x in ndupe_l)
619 list_of_lists = []
620 for i in range(nlevels):
622 def keyfunc(x):
623 import re
625 numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_")
626 return [int(num) for num in numeric_tuple]
628 # build a list of lists to create the index from
629 div_factor = nentries // ndupe_l[i] + 1
631 # Deprecated since version 3.9: collections.Counter now supports []. See PEP 585
632 # and Generic Alias Type.
633 cnt: Counter[str] = collections.Counter()
634 for j in range(div_factor):
635 label = f"{prefix}_l{i}_g{j}"
636 cnt[label] = ndupe_l[i]
637 # cute Counter trick
638 result = sorted(cnt.elements(), key=keyfunc)[:nentries]
639 list_of_lists.append(result)
641 tuples = list(zip(*list_of_lists))
643 # convert tuples to index
644 if nentries == 1:
645 # we have a single level of tuples, i.e. a regular Index
646 name = None if names is None else names[0]
647 index = Index(tuples[0], name=name)
648 elif nlevels == 1:
649 name = None if names is None else names[0]
650 index = Index((x[0] for x in tuples), name=name)
651 else:
652 index = MultiIndex.from_tuples(tuples, names=names)
653 return index
656def makeCustomDataframe(
657 nrows,
658 ncols,
659 c_idx_names=True,
660 r_idx_names=True,
661 c_idx_nlevels=1,
662 r_idx_nlevels=1,
663 data_gen_f=None,
664 c_ndupe_l=None,
665 r_ndupe_l=None,
666 dtype=None,
667 c_idx_type=None,
668 r_idx_type=None,
669) -> DataFrame:
670 """
671 Create a DataFrame using supplied parameters.
673 Parameters
674 ----------
675 nrows, ncols - number of data rows/cols
676 c_idx_names, idx_names - False/True/list of strings, yields No names ,
677 default names or uses the provided names for the levels of the
678 corresponding index. You can provide a single string when
679 c_idx_nlevels ==1.
680 c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex
681 r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex
682 data_gen_f - a function f(row,col) which return the data value
683 at that position, the default generator used yields values of the form
684 "RxCy" based on position.
685 c_ndupe_l, r_ndupe_l - list of integers, determines the number
686 of duplicates for each label at a given level of the corresponding
687 index. The default `None` value produces a multiplicity of 1 across
688 all levels, i.e. a unique index. Will accept a partial list of length
689 N < idx_nlevels, for just the first N levels. If ndupe doesn't divide
690 nrows/ncol, the last label might have lower multiplicity.
691 dtype - passed to the DataFrame constructor as is, in case you wish to
692 have more control in conjunction with a custom `data_gen_f`
693 r_idx_type, c_idx_type - "i"/"f"/"s"/"dt"/"td".
694 If idx_type is not None, `idx_nlevels` must be 1.
695 "i"/"f" creates an integer/float index,
696 "s" creates a string index
697 "dt" create a datetime index.
698 "td" create a timedelta index.
700 if unspecified, string labels will be generated.
702 Examples
703 --------
704 # 5 row, 3 columns, default names on both, single index on both axis
705 >> makeCustomDataframe(5,3)
707 # make the data a random int between 1 and 100
708 >> mkdf(5,3,data_gen_f=lambda r,c:randint(1,100))
710 # 2-level multiindex on rows with each label duplicated
711 # twice on first level, default names on both axis, single
712 # index on both axis
713 >> a=makeCustomDataframe(5,3,r_idx_nlevels=2,r_ndupe_l=[2])
715 # DatetimeIndex on row, index with unicode labels on columns
716 # no names on either axis
717 >> a=makeCustomDataframe(5,3,c_idx_names=False,r_idx_names=False,
718 r_idx_type="dt",c_idx_type="u")
720 # 4-level multindex on rows with names provided, 2-level multindex
721 # on columns with default labels and default names.
722 >> a=makeCustomDataframe(5,3,r_idx_nlevels=4,
723 r_idx_names=["FEE","FIH","FOH","FUM"],
724 c_idx_nlevels=2)
726 >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
727 """
728 assert c_idx_nlevels > 0
729 assert r_idx_nlevels > 0
730 assert r_idx_type is None or (
731 r_idx_type in ("i", "f", "s", "dt", "p", "td") and r_idx_nlevels == 1
732 )
733 assert c_idx_type is None or (
734 c_idx_type in ("i", "f", "s", "dt", "p", "td") and c_idx_nlevels == 1
735 )
737 columns = makeCustomIndex(
738 ncols,
739 nlevels=c_idx_nlevels,
740 prefix="C",
741 names=c_idx_names,
742 ndupe_l=c_ndupe_l,
743 idx_type=c_idx_type,
744 )
745 index = makeCustomIndex(
746 nrows,
747 nlevels=r_idx_nlevels,
748 prefix="R",
749 names=r_idx_names,
750 ndupe_l=r_ndupe_l,
751 idx_type=r_idx_type,
752 )
754 # by default, generate data based on location
755 if data_gen_f is None:
756 data_gen_f = lambda r, c: f"R{r}C{c}"
758 data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)]
760 return DataFrame(data, index, columns, dtype=dtype)
763def _create_missing_idx(nrows, ncols, density, random_state=None):
764 if random_state is None:
765 random_state = np.random
766 else:
767 random_state = np.random.RandomState(random_state)
769 # below is cribbed from scipy.sparse
770 size = round((1 - density) * nrows * ncols)
771 # generate a few more to ensure unique values
772 min_rows = 5
773 fac = 1.02
774 extra_size = min(size + min_rows, fac * size)
776 def _gen_unique_rand(rng, _extra_size):
777 ind = rng.rand(int(_extra_size))
778 return np.unique(np.floor(ind * nrows * ncols))[:size]
780 ind = _gen_unique_rand(random_state, extra_size)
781 while ind.size < size:
782 extra_size *= 1.05
783 ind = _gen_unique_rand(random_state, extra_size)
785 j = np.floor(ind * 1.0 / nrows).astype(int)
786 i = (ind - j * nrows).astype(int)
787 return i.tolist(), j.tolist()
790def makeMissingDataframe(density=0.9, random_state=None) -> DataFrame:
791 df = makeDataFrame()
792 i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state)
793 df.values[i, j] = np.nan
794 return df
797class SubclassedSeries(Series):
798 _metadata = ["testattr", "name"]
800 @property
801 def _constructor(self):
802 # For testing, those properties return a generic callable, and not
803 # the actual class. In this case that is equivalent, but it is to
804 # ensure we don't rely on the property returning a class
805 # See https://github.com/pandas-dev/pandas/pull/46018 and
806 # https://github.com/pandas-dev/pandas/issues/32638 and linked issues
807 return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
809 @property
810 def _constructor_expanddim(self):
811 return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
814class SubclassedDataFrame(DataFrame):
815 _metadata = ["testattr"]
817 @property
818 def _constructor(self):
819 return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
821 @property
822 def _constructor_sliced(self):
823 return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
826class SubclassedCategorical(Categorical):
827 @property
828 def _constructor(self):
829 return SubclassedCategorical
832def _make_skipna_wrapper(alternative, skipna_alternative=None):
833 """
834 Create a function for calling on an array.
836 Parameters
837 ----------
838 alternative : function
839 The function to be called on the array with no NaNs.
840 Only used when 'skipna_alternative' is None.
841 skipna_alternative : function
842 The function to be called on the original array
844 Returns
845 -------
846 function
847 """
848 if skipna_alternative:
850 def skipna_wrapper(x):
851 return skipna_alternative(x.values)
853 else:
855 def skipna_wrapper(x):
856 nona = x.dropna()
857 if len(nona) == 0:
858 return np.nan
859 return alternative(nona)
861 return skipna_wrapper
864def convert_rows_list_to_csv_str(rows_list: list[str]) -> str:
865 """
866 Convert list of CSV rows to single CSV-formatted string for current OS.
868 This method is used for creating expected value of to_csv() method.
870 Parameters
871 ----------
872 rows_list : List[str]
873 Each element represents the row of csv.
875 Returns
876 -------
877 str
878 Expected output of to_csv() in current OS.
879 """
880 sep = os.linesep
881 return sep.join(rows_list) + sep
884def external_error_raised(expected_exception: type[Exception]) -> ContextManager:
885 """
886 Helper function to mark pytest.raises that have an external error message.
888 Parameters
889 ----------
890 expected_exception : Exception
891 Expected error to raise.
893 Returns
894 -------
895 Callable
896 Regular `pytest.raises` function with `match` equal to `None`.
897 """
898 import pytest
900 return pytest.raises(expected_exception, match=None) # noqa: PDF010
903cython_table = pd.core.common._cython_table.items()
906def get_cython_table_params(ndframe, func_names_and_expected):
907 """
908 Combine frame, functions from com._cython_table
909 keys and expected result.
911 Parameters
912 ----------
913 ndframe : DataFrame or Series
914 func_names_and_expected : Sequence of two items
915 The first item is a name of a NDFrame method ('sum', 'prod') etc.
916 The second item is the expected return value.
918 Returns
919 -------
920 list
921 List of three items (DataFrame, function, expected result)
922 """
923 results = []
924 for func_name, expected in func_names_and_expected:
925 results.append((ndframe, func_name, expected))
926 results += [
927 (ndframe, func, expected)
928 for func, name in cython_table
929 if name == func_name
930 ]
931 return results
934def get_op_from_name(op_name: str) -> Callable:
935 """
936 The operator function for a given op name.
938 Parameters
939 ----------
940 op_name : str
941 The op name, in form of "add" or "__add__".
943 Returns
944 -------
945 function
946 A function performing the operation.
947 """
948 short_opname = op_name.strip("_")
949 try:
950 op = getattr(operator, short_opname)
951 except AttributeError:
952 # Assume it is the reverse operator
953 rop = getattr(operator, short_opname[1:])
954 op = lambda x, y: rop(y, x)
956 return op
959# -----------------------------------------------------------------------------
960# Indexing test helpers
963def getitem(x):
964 return x
967def setitem(x):
968 return x
971def loc(x):
972 return x.loc
975def iloc(x):
976 return x.iloc
979def at(x):
980 return x.at
983def iat(x):
984 return x.iat
987# -----------------------------------------------------------------------------
990def shares_memory(left, right) -> bool:
991 """
992 Pandas-compat for np.shares_memory.
993 """
994 if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
995 return np.shares_memory(left, right)
996 elif isinstance(left, np.ndarray):
997 # Call with reversed args to get to unpacking logic below.
998 return shares_memory(right, left)
1000 if isinstance(left, RangeIndex):
1001 return False
1002 if isinstance(left, MultiIndex):
1003 return shares_memory(left._codes, right)
1004 if isinstance(left, (Index, Series)):
1005 return shares_memory(left._values, right)
1007 if isinstance(left, NDArrayBackedExtensionArray):
1008 return shares_memory(left._ndarray, right)
1009 if isinstance(left, pd.core.arrays.SparseArray):
1010 return shares_memory(left.sp_values, right)
1011 if isinstance(left, pd.core.arrays.IntervalArray):
1012 return shares_memory(left._left, right) or shares_memory(left._right, right)
1014 if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]":
1015 # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
1016 if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]":
1017 # error: "ExtensionArray" has no attribute "_data"
1018 left_pa_data = left._data # type: ignore[attr-defined]
1019 # error: "ExtensionArray" has no attribute "_data"
1020 right_pa_data = right._data # type: ignore[attr-defined]
1021 left_buf1 = left_pa_data.chunk(0).buffers()[1]
1022 right_buf1 = right_pa_data.chunk(0).buffers()[1]
1023 return left_buf1 == right_buf1
1025 if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):
1026 # By convention, we'll say these share memory if they share *either*
1027 # the _data or the _mask
1028 return np.shares_memory(left._data, right._data) or np.shares_memory(
1029 left._mask, right._mask
1030 )
1032 if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:
1033 arr = left._mgr.arrays[0]
1034 return shares_memory(arr, right)
1036 raise NotImplementedError(type(left), type(right))
1039__all__ = [
1040 "ALL_INT_EA_DTYPES",
1041 "ALL_INT_NUMPY_DTYPES",
1042 "ALL_NUMPY_DTYPES",
1043 "ALL_REAL_NUMPY_DTYPES",
1044 "all_timeseries_index_generator",
1045 "assert_almost_equal",
1046 "assert_attr_equal",
1047 "assert_categorical_equal",
1048 "assert_class_equal",
1049 "assert_contains_all",
1050 "assert_copy",
1051 "assert_datetime_array_equal",
1052 "assert_dict_equal",
1053 "assert_equal",
1054 "assert_extension_array_equal",
1055 "assert_frame_equal",
1056 "assert_index_equal",
1057 "assert_indexing_slices_equivalent",
1058 "assert_interval_array_equal",
1059 "assert_is_sorted",
1060 "assert_is_valid_plot_return_object",
1061 "assert_metadata_equivalent",
1062 "assert_numpy_array_equal",
1063 "assert_period_array_equal",
1064 "assert_produces_warning",
1065 "assert_series_equal",
1066 "assert_sp_array_equal",
1067 "assert_timedelta_array_equal",
1068 "at",
1069 "BOOL_DTYPES",
1070 "box_expected",
1071 "BYTES_DTYPES",
1072 "can_set_locale",
1073 "close",
1074 "COMPLEX_DTYPES",
1075 "convert_rows_list_to_csv_str",
1076 "DATETIME64_DTYPES",
1077 "decompress_file",
1078 "EMPTY_STRING_PATTERN",
1079 "ENDIAN",
1080 "ensure_clean",
1081 "ensure_clean_dir",
1082 "ensure_safe_environment_variables",
1083 "equalContents",
1084 "external_error_raised",
1085 "FLOAT_EA_DTYPES",
1086 "FLOAT_NUMPY_DTYPES",
1087 "getCols",
1088 "get_cython_table_params",
1089 "get_dtype",
1090 "getitem",
1091 "get_locales",
1092 "getMixedTypeDict",
1093 "get_obj",
1094 "get_op_from_name",
1095 "getPeriodData",
1096 "getSeriesData",
1097 "getTimeSeriesData",
1098 "iat",
1099 "iloc",
1100 "index_subclass_makers_generator",
1101 "loc",
1102 "makeBoolIndex",
1103 "makeCategoricalIndex",
1104 "makeCustomDataframe",
1105 "makeCustomIndex",
1106 "makeDataFrame",
1107 "makeDateIndex",
1108 "makeFloatIndex",
1109 "makeFloatSeries",
1110 "makeIntervalIndex",
1111 "makeIntIndex",
1112 "makeMissingDataframe",
1113 "makeMixedDataFrame",
1114 "makeMultiIndex",
1115 "makeNumericIndex",
1116 "makeObjectSeries",
1117 "makePeriodFrame",
1118 "makePeriodIndex",
1119 "makePeriodSeries",
1120 "make_rand_series",
1121 "makeRangeIndex",
1122 "makeStringIndex",
1123 "makeStringSeries",
1124 "makeTimeDataFrame",
1125 "makeTimedeltaIndex",
1126 "makeTimeSeries",
1127 "makeUIntIndex",
1128 "maybe_produces_warning",
1129 "NARROW_NP_DTYPES",
1130 "network",
1131 "NP_NAT_OBJECTS",
1132 "NULL_OBJECTS",
1133 "OBJECT_DTYPES",
1134 "raise_assert_detail",
1135 "randbool",
1136 "rands",
1137 "reset_display_options",
1138 "reset_testing_mode",
1139 "RNGContext",
1140 "round_trip_localpath",
1141 "round_trip_pathlib",
1142 "round_trip_pickle",
1143 "setitem",
1144 "set_locale",
1145 "set_testing_mode",
1146 "set_timezone",
1147 "shares_memory",
1148 "SIGNED_INT_EA_DTYPES",
1149 "SIGNED_INT_NUMPY_DTYPES",
1150 "STRING_DTYPES",
1151 "SubclassedCategorical",
1152 "SubclassedDataFrame",
1153 "SubclassedSeries",
1154 "TIMEDELTA64_DTYPES",
1155 "to_array",
1156 "UNSIGNED_INT_EA_DTYPES",
1157 "UNSIGNED_INT_NUMPY_DTYPES",
1158 "use_numexpr",
1159 "with_csv_dialect",
1160 "write_to_compressed",
1161]