Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/_testing/__init__.py: 26%

390 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import collections 

4from datetime import datetime 

5from decimal import Decimal 

6import operator 

7import os 

8import re 

9import string 

10from sys import byteorder 

11from typing import ( 

12 TYPE_CHECKING, 

13 Callable, 

14 ContextManager, 

15 Counter, 

16 Iterable, 

17) 

18import warnings 

19 

20import numpy as np 

21 

22from pandas._config.localization import ( 

23 can_set_locale, 

24 get_locales, 

25 set_locale, 

26) 

27 

28from pandas._typing import Dtype 

29from pandas.compat import pa_version_under1p01 

30 

31from pandas.core.dtypes.common import ( 

32 is_float_dtype, 

33 is_integer_dtype, 

34 is_sequence, 

35 is_unsigned_integer_dtype, 

36 pandas_dtype, 

37) 

38 

39import pandas as pd 

40from pandas import ( 

41 Categorical, 

42 CategoricalIndex, 

43 DataFrame, 

44 DatetimeIndex, 

45 Index, 

46 IntervalIndex, 

47 MultiIndex, 

48 RangeIndex, 

49 Series, 

50 bdate_range, 

51) 

52from pandas._testing._io import ( 

53 close, 

54 network, 

55 round_trip_localpath, 

56 round_trip_pathlib, 

57 round_trip_pickle, 

58 write_to_compressed, 

59) 

60from pandas._testing._random import ( 

61 randbool, 

62 rands, 

63 rands_array, 

64) 

65from pandas._testing._warnings import ( 

66 assert_produces_warning, 

67 maybe_produces_warning, 

68) 

69from pandas._testing.asserters import ( 

70 assert_almost_equal, 

71 assert_attr_equal, 

72 assert_categorical_equal, 

73 assert_class_equal, 

74 assert_contains_all, 

75 assert_copy, 

76 assert_datetime_array_equal, 

77 assert_dict_equal, 

78 assert_equal, 

79 assert_extension_array_equal, 

80 assert_frame_equal, 

81 assert_index_equal, 

82 assert_indexing_slices_equivalent, 

83 assert_interval_array_equal, 

84 assert_is_sorted, 

85 assert_is_valid_plot_return_object, 

86 assert_metadata_equivalent, 

87 assert_numpy_array_equal, 

88 assert_period_array_equal, 

89 assert_series_equal, 

90 assert_sp_array_equal, 

91 assert_timedelta_array_equal, 

92 raise_assert_detail, 

93) 

94from pandas._testing.compat import ( 

95 get_dtype, 

96 get_obj, 

97) 

98from pandas._testing.contexts import ( 

99 RNGContext, 

100 decompress_file, 

101 ensure_clean, 

102 ensure_clean_dir, 

103 ensure_safe_environment_variables, 

104 set_timezone, 

105 use_numexpr, 

106 with_csv_dialect, 

107) 

108from pandas.core.api import ( 

109 Float64Index, 

110 Int64Index, 

111 NumericIndex, 

112 UInt64Index, 

113) 

114from pandas.core.arrays import ( 

115 BaseMaskedArray, 

116 ExtensionArray, 

117 PandasArray, 

118) 

119from pandas.core.arrays._mixins import NDArrayBackedExtensionArray 

120from pandas.core.construction import extract_array 

121 

122if TYPE_CHECKING: 122 ↛ 123line 122 didn't jump to line 123, because the condition on line 122 was never true

123 from pandas import ( 

124 PeriodIndex, 

125 TimedeltaIndex, 

126 ) 

127 

128_N = 30 

129_K = 4 

130 

131UNSIGNED_INT_NUMPY_DTYPES: list[Dtype] = ["uint8", "uint16", "uint32", "uint64"] 

132UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"] 

133SIGNED_INT_NUMPY_DTYPES: list[Dtype] = [int, "int8", "int16", "int32", "int64"] 

134SIGNED_INT_EA_DTYPES: list[Dtype] = ["Int8", "Int16", "Int32", "Int64"] 

135ALL_INT_NUMPY_DTYPES = UNSIGNED_INT_NUMPY_DTYPES + SIGNED_INT_NUMPY_DTYPES 

136ALL_INT_EA_DTYPES = UNSIGNED_INT_EA_DTYPES + SIGNED_INT_EA_DTYPES 

137 

138FLOAT_NUMPY_DTYPES: list[Dtype] = [float, "float32", "float64"] 

139FLOAT_EA_DTYPES: list[Dtype] = ["Float32", "Float64"] 

140COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"] 

141STRING_DTYPES: list[Dtype] = [str, "str", "U"] 

142 

143DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"] 

144TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"] 

145 

146BOOL_DTYPES: list[Dtype] = [bool, "bool"] 

147BYTES_DTYPES: list[Dtype] = [bytes, "bytes"] 

148OBJECT_DTYPES: list[Dtype] = [object, "object"] 

149 

150ALL_REAL_NUMPY_DTYPES = FLOAT_NUMPY_DTYPES + ALL_INT_NUMPY_DTYPES 

151ALL_NUMPY_DTYPES = ( 

152 ALL_REAL_NUMPY_DTYPES 

153 + COMPLEX_DTYPES 

154 + STRING_DTYPES 

155 + DATETIME64_DTYPES 

156 + TIMEDELTA64_DTYPES 

157 + BOOL_DTYPES 

158 + OBJECT_DTYPES 

159 + BYTES_DTYPES 

160) 

161 

162NARROW_NP_DTYPES = [ 

163 np.float16, 

164 np.float32, 

165 np.int8, 

166 np.int16, 

167 np.int32, 

168 np.uint8, 

169 np.uint16, 

170 np.uint32, 

171] 

172 

173ENDIAN = {"little": "<", "big": ">"}[byteorder] 

174 

175NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")] 

176NP_NAT_OBJECTS = [ 

177 cls("NaT", unit) 

178 for cls in [np.datetime64, np.timedelta64] 

179 for unit in [ 

180 "Y", 

181 "M", 

182 "W", 

183 "D", 

184 "h", 

185 "m", 

186 "s", 

187 "ms", 

188 "us", 

189 "ns", 

190 "ps", 

191 "fs", 

192 "as", 

193 ] 

194] 

195 

196if not pa_version_under1p01: 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true

197 import pyarrow as pa 

198 

199 UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()] 

200 SIGNED_INT_PYARROW_DTYPES = [pa.int8(), pa.int16(), pa.int32(), pa.int64()] 

201 ALL_INT_PYARROW_DTYPES = UNSIGNED_INT_PYARROW_DTYPES + SIGNED_INT_PYARROW_DTYPES 

202 

203 FLOAT_PYARROW_DTYPES = [pa.float32(), pa.float64()] 

204 STRING_PYARROW_DTYPES = [pa.string(), pa.utf8()] 

205 

206 TIME_PYARROW_DTYPES = [ 

207 pa.time32("s"), 

208 pa.time32("ms"), 

209 pa.time64("us"), 

210 pa.time64("ns"), 

211 ] 

212 DATE_PYARROW_DTYPES = [pa.date32(), pa.date64()] 

213 DATETIME_PYARROW_DTYPES = [ 

214 pa.timestamp(unit=unit, tz=tz) 

215 for unit in ["s", "ms", "us", "ns"] 

216 for tz in [None, "UTC", "US/Pacific", "US/Eastern"] 

217 ] 

218 TIMEDELTA_PYARROW_DTYPES = [pa.duration(unit) for unit in ["s", "ms", "us", "ns"]] 

219 

220 BOOL_PYARROW_DTYPES = [pa.bool_()] 

221 

222 # TODO: Add container like pyarrow types: 

223 # https://arrow.apache.org/docs/python/api/datatypes.html#factory-functions 

224 ALL_PYARROW_DTYPES = ( 

225 ALL_INT_PYARROW_DTYPES 

226 + FLOAT_PYARROW_DTYPES 

227 + TIME_PYARROW_DTYPES 

228 + DATE_PYARROW_DTYPES 

229 + DATETIME_PYARROW_DTYPES 

230 + TIMEDELTA_PYARROW_DTYPES 

231 + BOOL_PYARROW_DTYPES 

232 ) 

233 

234 

235EMPTY_STRING_PATTERN = re.compile("^$") 

236 

237# set testing_mode 

238_testing_mode_warnings = (DeprecationWarning, ResourceWarning) 

239 

240 

241def set_testing_mode() -> None: 

242 # set the testing mode filters 

243 testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") 

244 if "deprecate" in testing_mode: 244 ↛ 245line 244 didn't jump to line 245, because the condition on line 244 was never true

245 for category in _testing_mode_warnings: 

246 warnings.simplefilter("always", category) 

247 

248 

249def reset_testing_mode() -> None: 

250 # reset the testing mode filters 

251 testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") 

252 if "deprecate" in testing_mode: 

253 for category in _testing_mode_warnings: 

254 warnings.simplefilter("ignore", category) 

255 

256 

257set_testing_mode() 

258 

259 

260def reset_display_options() -> None: 

261 """ 

262 Reset the display options for printing and representing objects. 

263 """ 

264 pd.reset_option("^display.", silent=True) 

265 

266 

267# ----------------------------------------------------------------------------- 

268# Comparators 

269 

270 

271def equalContents(arr1, arr2) -> bool: 

272 """ 

273 Checks if the set of unique elements of arr1 and arr2 are equivalent. 

274 """ 

275 return frozenset(arr1) == frozenset(arr2) 

276 

277 

278def box_expected(expected, box_cls, transpose=True): 

279 """ 

280 Helper function to wrap the expected output of a test in a given box_class. 

281 

282 Parameters 

283 ---------- 

284 expected : np.ndarray, Index, Series 

285 box_cls : {Index, Series, DataFrame} 

286 

287 Returns 

288 ------- 

289 subclass of box_cls 

290 """ 

291 if box_cls is pd.array: 

292 if isinstance(expected, RangeIndex): 

293 # pd.array would return an IntegerArray 

294 expected = PandasArray(np.asarray(expected._values)) 

295 else: 

296 expected = pd.array(expected) 

297 elif box_cls is Index: 

298 expected = Index._with_infer(expected) 

299 elif box_cls is Series: 

300 expected = Series(expected) 

301 elif box_cls is DataFrame: 

302 expected = Series(expected).to_frame() 

303 if transpose: 

304 # for vector operations, we need a DataFrame to be a single-row, 

305 # not a single-column, in order to operate against non-DataFrame 

306 # vectors of the same length. But convert to two rows to avoid 

307 # single-row special cases in datetime arithmetic 

308 expected = expected.T 

309 expected = pd.concat([expected] * 2, ignore_index=True) 

310 elif box_cls is np.ndarray or box_cls is np.array: 

311 expected = np.array(expected) 

312 elif box_cls is to_array: 

313 expected = to_array(expected) 

314 else: 

315 raise NotImplementedError(box_cls) 

316 return expected 

317 

318 

319def to_array(obj): 

320 """ 

321 Similar to pd.array, but does not cast numpy dtypes to nullable dtypes. 

322 """ 

323 # temporary implementation until we get pd.array in place 

324 dtype = getattr(obj, "dtype", None) 

325 

326 if dtype is None: 

327 return np.asarray(obj) 

328 

329 return extract_array(obj, extract_numpy=True) 

330 

331 

332# ----------------------------------------------------------------------------- 

333# Others 

334 

335 

336def getCols(k) -> str: 

337 return string.ascii_uppercase[:k] 

338 

339 

340# make index 

341def makeStringIndex(k=10, name=None) -> Index: 

342 return Index(rands_array(nchars=10, size=k), name=name) 

343 

344 

345def makeCategoricalIndex(k=10, n=3, name=None, **kwargs) -> CategoricalIndex: 

346 """make a length k index or n categories""" 

347 x = rands_array(nchars=4, size=n, replace=False) 

348 return CategoricalIndex( 

349 Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs 

350 ) 

351 

352 

353def makeIntervalIndex(k=10, name=None, **kwargs) -> IntervalIndex: 

354 """make a length k IntervalIndex""" 

355 x = np.linspace(0, 100, num=(k + 1)) 

356 return IntervalIndex.from_breaks(x, name=name, **kwargs) 

357 

358 

359def makeBoolIndex(k=10, name=None) -> Index: 

360 if k == 1: 

361 return Index([True], name=name) 

362 elif k == 2: 

363 return Index([False, True], name=name) 

364 return Index([False, True] + [False] * (k - 2), name=name) 

365 

366 

367def makeNumericIndex(k=10, name=None, *, dtype) -> NumericIndex: 

368 dtype = pandas_dtype(dtype) 

369 assert isinstance(dtype, np.dtype) 

370 

371 if is_integer_dtype(dtype): 

372 values = np.arange(k, dtype=dtype) 

373 if is_unsigned_integer_dtype(dtype): 

374 values += 2 ** (dtype.itemsize * 8 - 1) 

375 elif is_float_dtype(dtype): 

376 values = np.random.random_sample(k) - np.random.random_sample(1) 

377 values.sort() 

378 values = values * (10 ** np.random.randint(0, 9)) 

379 else: 

380 raise NotImplementedError(f"wrong dtype {dtype}") 

381 

382 return NumericIndex(values, dtype=dtype, name=name) 

383 

384 

385def makeIntIndex(k=10, name=None) -> Int64Index: 

386 base_idx = makeNumericIndex(k, name=name, dtype="int64") 

387 return Int64Index(base_idx) 

388 

389 

390def makeUIntIndex(k=10, name=None) -> UInt64Index: 

391 base_idx = makeNumericIndex(k, name=name, dtype="uint64") 

392 return UInt64Index(base_idx) 

393 

394 

395def makeRangeIndex(k=10, name=None, **kwargs) -> RangeIndex: 

396 return RangeIndex(0, k, 1, name=name, **kwargs) 

397 

398 

399def makeFloatIndex(k=10, name=None) -> Float64Index: 

400 base_idx = makeNumericIndex(k, name=name, dtype="float64") 

401 return Float64Index(base_idx) 

402 

403 

404def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex: 

405 dt = datetime(2000, 1, 1) 

406 dr = bdate_range(dt, periods=k, freq=freq, name=name) 

407 return DatetimeIndex(dr, name=name, **kwargs) 

408 

409 

410def makeTimedeltaIndex(k: int = 10, freq="D", name=None, **kwargs) -> TimedeltaIndex: 

411 return pd.timedelta_range(start="1 day", periods=k, freq=freq, name=name, **kwargs) 

412 

413 

414def makePeriodIndex(k: int = 10, name=None, **kwargs) -> PeriodIndex: 

415 dt = datetime(2000, 1, 1) 

416 return pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs) 

417 

418 

419def makeMultiIndex(k=10, names=None, **kwargs): 

420 N = (k // 2) + 1 

421 rng = range(N) 

422 mi = MultiIndex.from_product([("foo", "bar"), rng], names=names, **kwargs) 

423 assert len(mi) >= k # GH#38795 

424 return mi[:k] 

425 

426 

427def index_subclass_makers_generator(): 

428 make_index_funcs = [ 

429 makeDateIndex, 

430 makePeriodIndex, 

431 makeTimedeltaIndex, 

432 makeRangeIndex, 

433 makeIntervalIndex, 

434 makeCategoricalIndex, 

435 makeMultiIndex, 

436 ] 

437 yield from make_index_funcs 

438 

439 

440def all_timeseries_index_generator(k: int = 10) -> Iterable[Index]: 

441 """ 

442 Generator which can be iterated over to get instances of all the classes 

443 which represent time-series. 

444 

445 Parameters 

446 ---------- 

447 k: length of each of the index instances 

448 """ 

449 make_index_funcs: list[Callable[..., Index]] = [ 

450 makeDateIndex, 

451 makePeriodIndex, 

452 makeTimedeltaIndex, 

453 ] 

454 for make_index_func in make_index_funcs: 

455 yield make_index_func(k=k) 

456 

457 

458# make series 

459def make_rand_series(name=None, dtype=np.float64) -> Series: 

460 index = makeStringIndex(_N) 

461 data = np.random.randn(_N) 

462 with np.errstate(invalid="ignore"): 

463 data = data.astype(dtype, copy=False) 

464 return Series(data, index=index, name=name) 

465 

466 

467def makeFloatSeries(name=None) -> Series: 

468 return make_rand_series(name=name) 

469 

470 

471def makeStringSeries(name=None) -> Series: 

472 return make_rand_series(name=name) 

473 

474 

475def makeObjectSeries(name=None) -> Series: 

476 data = makeStringIndex(_N) 

477 data = Index(data, dtype=object) 

478 index = makeStringIndex(_N) 

479 return Series(data, index=index, name=name) 

480 

481 

482def getSeriesData() -> dict[str, Series]: 

483 index = makeStringIndex(_N) 

484 return {c: Series(np.random.randn(_N), index=index) for c in getCols(_K)} 

485 

486 

487def makeTimeSeries(nper=None, freq="B", name=None) -> Series: 

488 if nper is None: 

489 nper = _N 

490 return Series( 

491 np.random.randn(nper), index=makeDateIndex(nper, freq=freq), name=name 

492 ) 

493 

494 

495def makePeriodSeries(nper=None, name=None) -> Series: 

496 if nper is None: 

497 nper = _N 

498 return Series(np.random.randn(nper), index=makePeriodIndex(nper), name=name) 

499 

500 

501def getTimeSeriesData(nper=None, freq="B") -> dict[str, Series]: 

502 return {c: makeTimeSeries(nper, freq) for c in getCols(_K)} 

503 

504 

505def getPeriodData(nper=None) -> dict[str, Series]: 

506 return {c: makePeriodSeries(nper) for c in getCols(_K)} 

507 

508 

509# make frame 

510def makeTimeDataFrame(nper=None, freq="B") -> DataFrame: 

511 data = getTimeSeriesData(nper, freq) 

512 return DataFrame(data) 

513 

514 

515def makeDataFrame() -> DataFrame: 

516 data = getSeriesData() 

517 return DataFrame(data) 

518 

519 

520def getMixedTypeDict(): 

521 index = Index(["a", "b", "c", "d", "e"]) 

522 

523 data = { 

524 "A": [0.0, 1.0, 2.0, 3.0, 4.0], 

525 "B": [0.0, 1.0, 0.0, 1.0, 0.0], 

526 "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], 

527 "D": bdate_range("1/1/2009", periods=5), 

528 } 

529 

530 return index, data 

531 

532 

533def makeMixedDataFrame() -> DataFrame: 

534 return DataFrame(getMixedTypeDict()[1]) 

535 

536 

537def makePeriodFrame(nper=None) -> DataFrame: 

538 data = getPeriodData(nper) 

539 return DataFrame(data) 

540 

541 

542def makeCustomIndex( 

543 nentries, 

544 nlevels, 

545 prefix="#", 

546 names: bool | str | list[str] | None = False, 

547 ndupe_l=None, 

548 idx_type=None, 

549) -> Index: 

550 """ 

551 Create an index/multindex with given dimensions, levels, names, etc' 

552 

553 nentries - number of entries in index 

554 nlevels - number of levels (> 1 produces multindex) 

555 prefix - a string prefix for labels 

556 names - (Optional), bool or list of strings. if True will use default 

557 names, if false will use no names, if a list is given, the name of 

558 each level in the index will be taken from the list. 

559 ndupe_l - (Optional), list of ints, the number of rows for which the 

560 label will repeated at the corresponding level, you can specify just 

561 the first few, the rest will use the default ndupe_l of 1. 

562 len(ndupe_l) <= nlevels. 

563 idx_type - "i"/"f"/"s"/"dt"/"p"/"td". 

564 If idx_type is not None, `idx_nlevels` must be 1. 

565 "i"/"f" creates an integer/float index, 

566 "s" creates a string 

567 "dt" create a datetime index. 

568 "td" create a datetime index. 

569 

570 if unspecified, string labels will be generated. 

571 """ 

572 if ndupe_l is None: 

573 ndupe_l = [1] * nlevels 

574 assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels 

575 assert names is None or names is False or names is True or len(names) is nlevels 

576 assert idx_type is None or ( 

577 idx_type in ("i", "f", "s", "u", "dt", "p", "td") and nlevels == 1 

578 ) 

579 

580 if names is True: 

581 # build default names 

582 names = [prefix + str(i) for i in range(nlevels)] 

583 if names is False: 

584 # pass None to index constructor for no name 

585 names = None 

586 

587 # make singleton case uniform 

588 if isinstance(names, str) and nlevels == 1: 

589 names = [names] 

590 

591 # specific 1D index type requested? 

592 idx_func_dict: dict[str, Callable[..., Index]] = { 

593 "i": makeIntIndex, 

594 "f": makeFloatIndex, 

595 "s": makeStringIndex, 

596 "dt": makeDateIndex, 

597 "td": makeTimedeltaIndex, 

598 "p": makePeriodIndex, 

599 } 

600 idx_func = idx_func_dict.get(idx_type) 

601 if idx_func: 

602 idx = idx_func(nentries) 

603 # but we need to fill in the name 

604 if names: 

605 idx.name = names[0] 

606 return idx 

607 elif idx_type is not None: 

608 raise ValueError( 

609 f"{repr(idx_type)} is not a legal value for `idx_type`, " 

610 "use 'i'/'f'/'s'/'dt'/'p'/'td'." 

611 ) 

612 

613 if len(ndupe_l) < nlevels: 

614 ndupe_l.extend([1] * (nlevels - len(ndupe_l))) 

615 assert len(ndupe_l) == nlevels 

616 

617 assert all(x > 0 for x in ndupe_l) 

618 

619 list_of_lists = [] 

620 for i in range(nlevels): 

621 

622 def keyfunc(x): 

623 import re 

624 

625 numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_") 

626 return [int(num) for num in numeric_tuple] 

627 

628 # build a list of lists to create the index from 

629 div_factor = nentries // ndupe_l[i] + 1 

630 

631 # Deprecated since version 3.9: collections.Counter now supports []. See PEP 585 

632 # and Generic Alias Type. 

633 cnt: Counter[str] = collections.Counter() 

634 for j in range(div_factor): 

635 label = f"{prefix}_l{i}_g{j}" 

636 cnt[label] = ndupe_l[i] 

637 # cute Counter trick 

638 result = sorted(cnt.elements(), key=keyfunc)[:nentries] 

639 list_of_lists.append(result) 

640 

641 tuples = list(zip(*list_of_lists)) 

642 

643 # convert tuples to index 

644 if nentries == 1: 

645 # we have a single level of tuples, i.e. a regular Index 

646 name = None if names is None else names[0] 

647 index = Index(tuples[0], name=name) 

648 elif nlevels == 1: 

649 name = None if names is None else names[0] 

650 index = Index((x[0] for x in tuples), name=name) 

651 else: 

652 index = MultiIndex.from_tuples(tuples, names=names) 

653 return index 

654 

655 

656def makeCustomDataframe( 

657 nrows, 

658 ncols, 

659 c_idx_names=True, 

660 r_idx_names=True, 

661 c_idx_nlevels=1, 

662 r_idx_nlevels=1, 

663 data_gen_f=None, 

664 c_ndupe_l=None, 

665 r_ndupe_l=None, 

666 dtype=None, 

667 c_idx_type=None, 

668 r_idx_type=None, 

669) -> DataFrame: 

670 """ 

671 Create a DataFrame using supplied parameters. 

672 

673 Parameters 

674 ---------- 

675 nrows, ncols - number of data rows/cols 

676 c_idx_names, idx_names - False/True/list of strings, yields No names , 

677 default names or uses the provided names for the levels of the 

678 corresponding index. You can provide a single string when 

679 c_idx_nlevels ==1. 

680 c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex 

681 r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex 

682 data_gen_f - a function f(row,col) which return the data value 

683 at that position, the default generator used yields values of the form 

684 "RxCy" based on position. 

685 c_ndupe_l, r_ndupe_l - list of integers, determines the number 

686 of duplicates for each label at a given level of the corresponding 

687 index. The default `None` value produces a multiplicity of 1 across 

688 all levels, i.e. a unique index. Will accept a partial list of length 

689 N < idx_nlevels, for just the first N levels. If ndupe doesn't divide 

690 nrows/ncol, the last label might have lower multiplicity. 

691 dtype - passed to the DataFrame constructor as is, in case you wish to 

692 have more control in conjunction with a custom `data_gen_f` 

693 r_idx_type, c_idx_type - "i"/"f"/"s"/"dt"/"td". 

694 If idx_type is not None, `idx_nlevels` must be 1. 

695 "i"/"f" creates an integer/float index, 

696 "s" creates a string index 

697 "dt" create a datetime index. 

698 "td" create a timedelta index. 

699 

700 if unspecified, string labels will be generated. 

701 

702 Examples 

703 -------- 

704 # 5 row, 3 columns, default names on both, single index on both axis 

705 >> makeCustomDataframe(5,3) 

706 

707 # make the data a random int between 1 and 100 

708 >> mkdf(5,3,data_gen_f=lambda r,c:randint(1,100)) 

709 

710 # 2-level multiindex on rows with each label duplicated 

711 # twice on first level, default names on both axis, single 

712 # index on both axis 

713 >> a=makeCustomDataframe(5,3,r_idx_nlevels=2,r_ndupe_l=[2]) 

714 

715 # DatetimeIndex on row, index with unicode labels on columns 

716 # no names on either axis 

717 >> a=makeCustomDataframe(5,3,c_idx_names=False,r_idx_names=False, 

718 r_idx_type="dt",c_idx_type="u") 

719 

720 # 4-level multindex on rows with names provided, 2-level multindex 

721 # on columns with default labels and default names. 

722 >> a=makeCustomDataframe(5,3,r_idx_nlevels=4, 

723 r_idx_names=["FEE","FIH","FOH","FUM"], 

724 c_idx_nlevels=2) 

725 

726 >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) 

727 """ 

728 assert c_idx_nlevels > 0 

729 assert r_idx_nlevels > 0 

730 assert r_idx_type is None or ( 

731 r_idx_type in ("i", "f", "s", "dt", "p", "td") and r_idx_nlevels == 1 

732 ) 

733 assert c_idx_type is None or ( 

734 c_idx_type in ("i", "f", "s", "dt", "p", "td") and c_idx_nlevels == 1 

735 ) 

736 

737 columns = makeCustomIndex( 

738 ncols, 

739 nlevels=c_idx_nlevels, 

740 prefix="C", 

741 names=c_idx_names, 

742 ndupe_l=c_ndupe_l, 

743 idx_type=c_idx_type, 

744 ) 

745 index = makeCustomIndex( 

746 nrows, 

747 nlevels=r_idx_nlevels, 

748 prefix="R", 

749 names=r_idx_names, 

750 ndupe_l=r_ndupe_l, 

751 idx_type=r_idx_type, 

752 ) 

753 

754 # by default, generate data based on location 

755 if data_gen_f is None: 

756 data_gen_f = lambda r, c: f"R{r}C{c}" 

757 

758 data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] 

759 

760 return DataFrame(data, index, columns, dtype=dtype) 

761 

762 

763def _create_missing_idx(nrows, ncols, density, random_state=None): 

764 if random_state is None: 

765 random_state = np.random 

766 else: 

767 random_state = np.random.RandomState(random_state) 

768 

769 # below is cribbed from scipy.sparse 

770 size = round((1 - density) * nrows * ncols) 

771 # generate a few more to ensure unique values 

772 min_rows = 5 

773 fac = 1.02 

774 extra_size = min(size + min_rows, fac * size) 

775 

776 def _gen_unique_rand(rng, _extra_size): 

777 ind = rng.rand(int(_extra_size)) 

778 return np.unique(np.floor(ind * nrows * ncols))[:size] 

779 

780 ind = _gen_unique_rand(random_state, extra_size) 

781 while ind.size < size: 

782 extra_size *= 1.05 

783 ind = _gen_unique_rand(random_state, extra_size) 

784 

785 j = np.floor(ind * 1.0 / nrows).astype(int) 

786 i = (ind - j * nrows).astype(int) 

787 return i.tolist(), j.tolist() 

788 

789 

790def makeMissingDataframe(density=0.9, random_state=None) -> DataFrame: 

791 df = makeDataFrame() 

792 i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state) 

793 df.values[i, j] = np.nan 

794 return df 

795 

796 

797class SubclassedSeries(Series): 

798 _metadata = ["testattr", "name"] 

799 

800 @property 

801 def _constructor(self): 

802 # For testing, those properties return a generic callable, and not 

803 # the actual class. In this case that is equivalent, but it is to 

804 # ensure we don't rely on the property returning a class 

805 # See https://github.com/pandas-dev/pandas/pull/46018 and 

806 # https://github.com/pandas-dev/pandas/issues/32638 and linked issues 

807 return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) 

808 

809 @property 

810 def _constructor_expanddim(self): 

811 return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs) 

812 

813 

814class SubclassedDataFrame(DataFrame): 

815 _metadata = ["testattr"] 

816 

817 @property 

818 def _constructor(self): 

819 return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs) 

820 

821 @property 

822 def _constructor_sliced(self): 

823 return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs) 

824 

825 

826class SubclassedCategorical(Categorical): 

827 @property 

828 def _constructor(self): 

829 return SubclassedCategorical 

830 

831 

832def _make_skipna_wrapper(alternative, skipna_alternative=None): 

833 """ 

834 Create a function for calling on an array. 

835 

836 Parameters 

837 ---------- 

838 alternative : function 

839 The function to be called on the array with no NaNs. 

840 Only used when 'skipna_alternative' is None. 

841 skipna_alternative : function 

842 The function to be called on the original array 

843 

844 Returns 

845 ------- 

846 function 

847 """ 

848 if skipna_alternative: 

849 

850 def skipna_wrapper(x): 

851 return skipna_alternative(x.values) 

852 

853 else: 

854 

855 def skipna_wrapper(x): 

856 nona = x.dropna() 

857 if len(nona) == 0: 

858 return np.nan 

859 return alternative(nona) 

860 

861 return skipna_wrapper 

862 

863 

864def convert_rows_list_to_csv_str(rows_list: list[str]) -> str: 

865 """ 

866 Convert list of CSV rows to single CSV-formatted string for current OS. 

867 

868 This method is used for creating expected value of to_csv() method. 

869 

870 Parameters 

871 ---------- 

872 rows_list : List[str] 

873 Each element represents the row of csv. 

874 

875 Returns 

876 ------- 

877 str 

878 Expected output of to_csv() in current OS. 

879 """ 

880 sep = os.linesep 

881 return sep.join(rows_list) + sep 

882 

883 

884def external_error_raised(expected_exception: type[Exception]) -> ContextManager: 

885 """ 

886 Helper function to mark pytest.raises that have an external error message. 

887 

888 Parameters 

889 ---------- 

890 expected_exception : Exception 

891 Expected error to raise. 

892 

893 Returns 

894 ------- 

895 Callable 

896 Regular `pytest.raises` function with `match` equal to `None`. 

897 """ 

898 import pytest 

899 

900 return pytest.raises(expected_exception, match=None) # noqa: PDF010 

901 

902 

903cython_table = pd.core.common._cython_table.items() 

904 

905 

906def get_cython_table_params(ndframe, func_names_and_expected): 

907 """ 

908 Combine frame, functions from com._cython_table 

909 keys and expected result. 

910 

911 Parameters 

912 ---------- 

913 ndframe : DataFrame or Series 

914 func_names_and_expected : Sequence of two items 

915 The first item is a name of a NDFrame method ('sum', 'prod') etc. 

916 The second item is the expected return value. 

917 

918 Returns 

919 ------- 

920 list 

921 List of three items (DataFrame, function, expected result) 

922 """ 

923 results = [] 

924 for func_name, expected in func_names_and_expected: 

925 results.append((ndframe, func_name, expected)) 

926 results += [ 

927 (ndframe, func, expected) 

928 for func, name in cython_table 

929 if name == func_name 

930 ] 

931 return results 

932 

933 

934def get_op_from_name(op_name: str) -> Callable: 

935 """ 

936 The operator function for a given op name. 

937 

938 Parameters 

939 ---------- 

940 op_name : str 

941 The op name, in form of "add" or "__add__". 

942 

943 Returns 

944 ------- 

945 function 

946 A function performing the operation. 

947 """ 

948 short_opname = op_name.strip("_") 

949 try: 

950 op = getattr(operator, short_opname) 

951 except AttributeError: 

952 # Assume it is the reverse operator 

953 rop = getattr(operator, short_opname[1:]) 

954 op = lambda x, y: rop(y, x) 

955 

956 return op 

957 

958 

959# ----------------------------------------------------------------------------- 

960# Indexing test helpers 

961 

962 

963def getitem(x): 

964 return x 

965 

966 

967def setitem(x): 

968 return x 

969 

970 

971def loc(x): 

972 return x.loc 

973 

974 

975def iloc(x): 

976 return x.iloc 

977 

978 

979def at(x): 

980 return x.at 

981 

982 

983def iat(x): 

984 return x.iat 

985 

986 

987# ----------------------------------------------------------------------------- 

988 

989 

990def shares_memory(left, right) -> bool: 

991 """ 

992 Pandas-compat for np.shares_memory. 

993 """ 

994 if isinstance(left, np.ndarray) and isinstance(right, np.ndarray): 

995 return np.shares_memory(left, right) 

996 elif isinstance(left, np.ndarray): 

997 # Call with reversed args to get to unpacking logic below. 

998 return shares_memory(right, left) 

999 

1000 if isinstance(left, RangeIndex): 

1001 return False 

1002 if isinstance(left, MultiIndex): 

1003 return shares_memory(left._codes, right) 

1004 if isinstance(left, (Index, Series)): 

1005 return shares_memory(left._values, right) 

1006 

1007 if isinstance(left, NDArrayBackedExtensionArray): 

1008 return shares_memory(left._ndarray, right) 

1009 if isinstance(left, pd.core.arrays.SparseArray): 

1010 return shares_memory(left.sp_values, right) 

1011 if isinstance(left, pd.core.arrays.IntervalArray): 

1012 return shares_memory(left._left, right) or shares_memory(left._right, right) 

1013 

1014 if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]": 

1015 # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669 

1016 if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]": 

1017 # error: "ExtensionArray" has no attribute "_data" 

1018 left_pa_data = left._data # type: ignore[attr-defined] 

1019 # error: "ExtensionArray" has no attribute "_data" 

1020 right_pa_data = right._data # type: ignore[attr-defined] 

1021 left_buf1 = left_pa_data.chunk(0).buffers()[1] 

1022 right_buf1 = right_pa_data.chunk(0).buffers()[1] 

1023 return left_buf1 == right_buf1 

1024 

1025 if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray): 

1026 # By convention, we'll say these share memory if they share *either* 

1027 # the _data or the _mask 

1028 return np.shares_memory(left._data, right._data) or np.shares_memory( 

1029 left._mask, right._mask 

1030 ) 

1031 

1032 if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1: 

1033 arr = left._mgr.arrays[0] 

1034 return shares_memory(arr, right) 

1035 

1036 raise NotImplementedError(type(left), type(right)) 

1037 

1038 

1039__all__ = [ 

1040 "ALL_INT_EA_DTYPES", 

1041 "ALL_INT_NUMPY_DTYPES", 

1042 "ALL_NUMPY_DTYPES", 

1043 "ALL_REAL_NUMPY_DTYPES", 

1044 "all_timeseries_index_generator", 

1045 "assert_almost_equal", 

1046 "assert_attr_equal", 

1047 "assert_categorical_equal", 

1048 "assert_class_equal", 

1049 "assert_contains_all", 

1050 "assert_copy", 

1051 "assert_datetime_array_equal", 

1052 "assert_dict_equal", 

1053 "assert_equal", 

1054 "assert_extension_array_equal", 

1055 "assert_frame_equal", 

1056 "assert_index_equal", 

1057 "assert_indexing_slices_equivalent", 

1058 "assert_interval_array_equal", 

1059 "assert_is_sorted", 

1060 "assert_is_valid_plot_return_object", 

1061 "assert_metadata_equivalent", 

1062 "assert_numpy_array_equal", 

1063 "assert_period_array_equal", 

1064 "assert_produces_warning", 

1065 "assert_series_equal", 

1066 "assert_sp_array_equal", 

1067 "assert_timedelta_array_equal", 

1068 "at", 

1069 "BOOL_DTYPES", 

1070 "box_expected", 

1071 "BYTES_DTYPES", 

1072 "can_set_locale", 

1073 "close", 

1074 "COMPLEX_DTYPES", 

1075 "convert_rows_list_to_csv_str", 

1076 "DATETIME64_DTYPES", 

1077 "decompress_file", 

1078 "EMPTY_STRING_PATTERN", 

1079 "ENDIAN", 

1080 "ensure_clean", 

1081 "ensure_clean_dir", 

1082 "ensure_safe_environment_variables", 

1083 "equalContents", 

1084 "external_error_raised", 

1085 "FLOAT_EA_DTYPES", 

1086 "FLOAT_NUMPY_DTYPES", 

1087 "getCols", 

1088 "get_cython_table_params", 

1089 "get_dtype", 

1090 "getitem", 

1091 "get_locales", 

1092 "getMixedTypeDict", 

1093 "get_obj", 

1094 "get_op_from_name", 

1095 "getPeriodData", 

1096 "getSeriesData", 

1097 "getTimeSeriesData", 

1098 "iat", 

1099 "iloc", 

1100 "index_subclass_makers_generator", 

1101 "loc", 

1102 "makeBoolIndex", 

1103 "makeCategoricalIndex", 

1104 "makeCustomDataframe", 

1105 "makeCustomIndex", 

1106 "makeDataFrame", 

1107 "makeDateIndex", 

1108 "makeFloatIndex", 

1109 "makeFloatSeries", 

1110 "makeIntervalIndex", 

1111 "makeIntIndex", 

1112 "makeMissingDataframe", 

1113 "makeMixedDataFrame", 

1114 "makeMultiIndex", 

1115 "makeNumericIndex", 

1116 "makeObjectSeries", 

1117 "makePeriodFrame", 

1118 "makePeriodIndex", 

1119 "makePeriodSeries", 

1120 "make_rand_series", 

1121 "makeRangeIndex", 

1122 "makeStringIndex", 

1123 "makeStringSeries", 

1124 "makeTimeDataFrame", 

1125 "makeTimedeltaIndex", 

1126 "makeTimeSeries", 

1127 "makeUIntIndex", 

1128 "maybe_produces_warning", 

1129 "NARROW_NP_DTYPES", 

1130 "network", 

1131 "NP_NAT_OBJECTS", 

1132 "NULL_OBJECTS", 

1133 "OBJECT_DTYPES", 

1134 "raise_assert_detail", 

1135 "randbool", 

1136 "rands", 

1137 "reset_display_options", 

1138 "reset_testing_mode", 

1139 "RNGContext", 

1140 "round_trip_localpath", 

1141 "round_trip_pathlib", 

1142 "round_trip_pickle", 

1143 "setitem", 

1144 "set_locale", 

1145 "set_testing_mode", 

1146 "set_timezone", 

1147 "shares_memory", 

1148 "SIGNED_INT_EA_DTYPES", 

1149 "SIGNED_INT_NUMPY_DTYPES", 

1150 "STRING_DTYPES", 

1151 "SubclassedCategorical", 

1152 "SubclassedDataFrame", 

1153 "SubclassedSeries", 

1154 "TIMEDELTA64_DTYPES", 

1155 "to_array", 

1156 "UNSIGNED_INT_EA_DTYPES", 

1157 "UNSIGNED_INT_NUMPY_DTYPES", 

1158 "use_numexpr", 

1159 "with_csv_dialect", 

1160 "write_to_compressed", 

1161]