Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/_testing/__init_

1from __future__ import annotations

3import collections

4from datetime import datetime

5from decimal import Decimal

6import operator

7import os

8import re

9import string

10from sys import byteorder

11from typing import (

12 TYPE_CHECKING,

13 Callable,

14 ContextManager,

15 Counter,

16 Iterable,

17)

18import warnings

20import numpy as np

22from pandas._config.localization import (

23 can_set_locale,

24 get_locales,

25 set_locale,

26)

28from pandas._typing import Dtype

29from pandas.compat import pa_version_under1p01

31from pandas.core.dtypes.common import (

32 is_float_dtype,

33 is_integer_dtype,

34 is_sequence,

35 is_unsigned_integer_dtype,

36 pandas_dtype,

37)

39import pandas as pd

40from pandas import (

41 Categorical,

42 CategoricalIndex,

43 DataFrame,

44 DatetimeIndex,

45 Index,

46 IntervalIndex,

47 MultiIndex,

48 RangeIndex,

49 Series,

50 bdate_range,

51)

52from pandas._testing._io import (

53 close,

54 network,

55 round_trip_localpath,

56 round_trip_pathlib,

57 round_trip_pickle,

58 write_to_compressed,

59)

60from pandas._testing._random import (

61 randbool,

62 rands,

63 rands_array,

64)

65from pandas._testing._warnings import (

66 assert_produces_warning,

67 maybe_produces_warning,

68)

69from pandas._testing.asserters import (

70 assert_almost_equal,

71 assert_attr_equal,

72 assert_categorical_equal,

73 assert_class_equal,

74 assert_contains_all,

75 assert_copy,

76 assert_datetime_array_equal,

77 assert_dict_equal,

78 assert_equal,

79 assert_extension_array_equal,

80 assert_frame_equal,

81 assert_index_equal,

82 assert_indexing_slices_equivalent,

83 assert_interval_array_equal,

84 assert_is_sorted,

85 assert_is_valid_plot_return_object,

86 assert_metadata_equivalent,

87 assert_numpy_array_equal,

88 assert_period_array_equal,

89 assert_series_equal,

90 assert_sp_array_equal,

91 assert_timedelta_array_equal,

92 raise_assert_detail,

93)

94from pandas._testing.compat import (

95 get_dtype,

96 get_obj,

97)

98from pandas._testing.contexts import (

99 RNGContext,

100 decompress_file,

101 ensure_clean,

102 ensure_clean_dir,

103 ensure_safe_environment_variables,

104 set_timezone,

105 use_numexpr,

106 with_csv_dialect,

107)

108from pandas.core.api import (

109 Float64Index,

110 Int64Index,

111 NumericIndex,

112 UInt64Index,

113)

114from pandas.core.arrays import (

115 BaseMaskedArray,

116 ExtensionArray,

117 PandasArray,

118)

119from pandas.core.arrays._mixins import NDArrayBackedExtensionArray

120from pandas.core.construction import extract_array

121

122if TYPE_CHECKING: 122 ↛ 123line 122 didn't jump to line 123, because the condition on line 122 was never true

123 from pandas import (

124 PeriodIndex,

125 TimedeltaIndex,

126 )

127

128_N = 30

129_K = 4

130

131UNSIGNED_INT_NUMPY_DTYPES: list[Dtype] = ["uint8", "uint16", "uint32", "uint64"]

132UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"]

133SIGNED_INT_NUMPY_DTYPES: list[Dtype] = [int, "int8", "int16", "int32", "int64"]

134SIGNED_INT_EA_DTYPES: list[Dtype] = ["Int8", "Int16", "Int32", "Int64"]

135ALL_INT_NUMPY_DTYPES = UNSIGNED_INT_NUMPY_DTYPES + SIGNED_INT_NUMPY_DTYPES

136ALL_INT_EA_DTYPES = UNSIGNED_INT_EA_DTYPES + SIGNED_INT_EA_DTYPES

137

138FLOAT_NUMPY_DTYPES: list[Dtype] = [float, "float32", "float64"]

139FLOAT_EA_DTYPES: list[Dtype] = ["Float32", "Float64"]

140COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]

141STRING_DTYPES: list[Dtype] = [str, "str", "U"]

142

143DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"]

144TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"]

145

146BOOL_DTYPES: list[Dtype] = [bool, "bool"]

147BYTES_DTYPES: list[Dtype] = [bytes, "bytes"]

148OBJECT_DTYPES: list[Dtype] = [object, "object"]

149

150ALL_REAL_NUMPY_DTYPES = FLOAT_NUMPY_DTYPES + ALL_INT_NUMPY_DTYPES

151ALL_NUMPY_DTYPES = (

152 ALL_REAL_NUMPY_DTYPES

153 + COMPLEX_DTYPES

154 + STRING_DTYPES

155 + DATETIME64_DTYPES

156 + TIMEDELTA64_DTYPES

157 + BOOL_DTYPES

158 + OBJECT_DTYPES

159 + BYTES_DTYPES

160)

161

162NARROW_NP_DTYPES = [

163 np.float16,

164 np.float32,

165 np.int8,

166 np.int16,

167 np.int32,

168 np.uint8,

169 np.uint16,

170 np.uint32,

171]

172

173ENDIAN = {"little": "<", "big": ">"}[byteorder]

174

175NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]

176NP_NAT_OBJECTS = [

177 cls("NaT", unit)

178 for cls in [np.datetime64, np.timedelta64]

179 for unit in [

180 "Y",

181 "M",

182 "W",

183 "D",

184 "h",

185 "m",

186 "s",

187 "ms",

188 "us",

189 "ns",

190 "ps",

191 "fs",

192 "as",

193 ]

194]

195

196if not pa_version_under1p01: 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true

197 import pyarrow as pa

198

199 UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]

200 SIGNED_INT_PYARROW_DTYPES = [pa.int8(), pa.int16(), pa.int32(), pa.int64()]

201 ALL_INT_PYARROW_DTYPES = UNSIGNED_INT_PYARROW_DTYPES + SIGNED_INT_PYARROW_DTYPES

202

203 FLOAT_PYARROW_DTYPES = [pa.float32(), pa.float64()]

204 STRING_PYARROW_DTYPES = [pa.string(), pa.utf8()]

205

206 TIME_PYARROW_DTYPES = [

207 pa.time32("s"),

208 pa.time32("ms"),

209 pa.time64("us"),

210 pa.time64("ns"),

211 ]

212 DATE_PYARROW_DTYPES = [pa.date32(), pa.date64()]

213 DATETIME_PYARROW_DTYPES = [

214 pa.timestamp(unit=unit, tz=tz)

215 for unit in ["s", "ms", "us", "ns"]

216 for tz in [None, "UTC", "US/Pacific", "US/Eastern"]

217 ]

218 TIMEDELTA_PYARROW_DTYPES = [pa.duration(unit) for unit in ["s", "ms", "us", "ns"]]

219

220 BOOL_PYARROW_DTYPES = [pa.bool_()]

221

222 # TODO: Add container like pyarrow types:

223 # https://arrow.apache.org/docs/python/api/datatypes.html#factory-functions

224 ALL_PYARROW_DTYPES = (

225 ALL_INT_PYARROW_DTYPES

226 + FLOAT_PYARROW_DTYPES

227 + TIME_PYARROW_DTYPES

228 + DATE_PYARROW_DTYPES

229 + DATETIME_PYARROW_DTYPES

230 + TIMEDELTA_PYARROW_DTYPES

231 + BOOL_PYARROW_DTYPES

232 )

233

234

235EMPTY_STRING_PATTERN = re.compile("^$")

236

237# set testing_mode

238_testing_mode_warnings = (DeprecationWarning, ResourceWarning)

239

240

241def set_testing_mode() -> None:

242 # set the testing mode filters

243 testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None")

244 if "deprecate" in testing_mode: 244 ↛ 245line 244 didn't jump to line 245, because the condition on line 244 was never true

245 for category in _testing_mode_warnings:

246 warnings.simplefilter("always", category)

247

248

249def reset_testing_mode() -> None:

250 # reset the testing mode filters

251 testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None")

252 if "deprecate" in testing_mode:

253 for category in _testing_mode_warnings:

254 warnings.simplefilter("ignore", category)

255

256

257set_testing_mode()

258

259

260def reset_display_options() -> None:

261 """

262 Reset the display options for printing and representing objects.

263 """

264 pd.reset_option("^display.", silent=True)

265

266

267# -----------------------------------------------------------------------------

268# Comparators

269

270

271def equalContents(arr1, arr2) -> bool:

272 """

273 Checks if the set of unique elements of arr1 and arr2 are equivalent.

274 """

275 return frozenset(arr1) == frozenset(arr2)

276

277

278def box_expected(expected, box_cls, transpose=True):

279 """

280 Helper function to wrap the expected output of a test in a given box_class.

281

282 Parameters

283 ----------

284 expected : np.ndarray, Index, Series

285 box_cls : {Index, Series, DataFrame}

286

287 Returns

288 -------

289 subclass of box_cls

290 """

291 if box_cls is pd.array:

292 if isinstance(expected, RangeIndex):

293 # pd.array would return an IntegerArray

294 expected = PandasArray(np.asarray(expected._values))

295 else:

296 expected = pd.array(expected)

297 elif box_cls is Index:

298 expected = Index._with_infer(expected)

299 elif box_cls is Series:

300 expected = Series(expected)

301 elif box_cls is DataFrame:

302 expected = Series(expected).to_frame()

303 if transpose:

304 # for vector operations, we need a DataFrame to be a single-row,

305 # not a single-column, in order to operate against non-DataFrame

306 # vectors of the same length. But convert to two rows to avoid

307 # single-row special cases in datetime arithmetic

308 expected = expected.T

309 expected = pd.concat([expected] * 2, ignore_index=True)

310 elif box_cls is np.ndarray or box_cls is np.array:

311 expected = np.array(expected)

312 elif box_cls is to_array:

313 expected = to_array(expected)

314 else:

315 raise NotImplementedError(box_cls)

316 return expected

317

318

319def to_array(obj):

320 """

321 Similar to pd.array, but does not cast numpy dtypes to nullable dtypes.

322 """

323 # temporary implementation until we get pd.array in place

324 dtype = getattr(obj, "dtype", None)

325

326 if dtype is None:

327 return np.asarray(obj)

328

329 return extract_array(obj, extract_numpy=True)

330

331

332# -----------------------------------------------------------------------------

333# Others

334

335

336def getCols(k) -> str:

337 return string.ascii_uppercase[:k]

338

339

340# make index

341def makeStringIndex(k=10, name=None) -> Index:

342 return Index(rands_array(nchars=10, size=k), name=name)

343

344

345def makeCategoricalIndex(k=10, n=3, name=None, **kwargs) -> CategoricalIndex:

346 """make a length k index or n categories"""

347 x = rands_array(nchars=4, size=n, replace=False)

348 return CategoricalIndex(

349 Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs

350 )

351

352

353def makeIntervalIndex(k=10, name=None, **kwargs) -> IntervalIndex:

354 """make a length k IntervalIndex"""

355 x = np.linspace(0, 100, num=(k + 1))

356 return IntervalIndex.from_breaks(x, name=name, **kwargs)

357

358

359def makeBoolIndex(k=10, name=None) -> Index:

360 if k == 1:

361 return Index([True], name=name)

362 elif k == 2:

363 return Index([False, True], name=name)

364 return Index([False, True] + [False] * (k - 2), name=name)

365

366

367def makeNumericIndex(k=10, name=None, *, dtype) -> NumericIndex:

368 dtype = pandas_dtype(dtype)

369 assert isinstance(dtype, np.dtype)

370

371 if is_integer_dtype(dtype):

372 values = np.arange(k, dtype=dtype)

373 if is_unsigned_integer_dtype(dtype):

374 values += 2 ** (dtype.itemsize * 8 - 1)

375 elif is_float_dtype(dtype):

376 values = np.random.random_sample(k) - np.random.random_sample(1)

377 values.sort()

378 values = values * (10 ** np.random.randint(0, 9))

379 else:

380 raise NotImplementedError(f"wrong dtype {dtype}")

381

382 return NumericIndex(values, dtype=dtype, name=name)

383

384

385def makeIntIndex(k=10, name=None) -> Int64Index:

386 base_idx = makeNumericIndex(k, name=name, dtype="int64")

387 return Int64Index(base_idx)

388

389

390def makeUIntIndex(k=10, name=None) -> UInt64Index:

391 base_idx = makeNumericIndex(k, name=name, dtype="uint64")

392 return UInt64Index(base_idx)

393

394

395def makeRangeIndex(k=10, name=None, **kwargs) -> RangeIndex:

396 return RangeIndex(0, k, 1, name=name, **kwargs)

397

398

399def makeFloatIndex(k=10, name=None) -> Float64Index:

400 base_idx = makeNumericIndex(k, name=name, dtype="float64")

401 return Float64Index(base_idx)

402

403

404def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex:

405 dt = datetime(2000, 1, 1)

406 dr = bdate_range(dt, periods=k, freq=freq, name=name)

407 return DatetimeIndex(dr, name=name, **kwargs)

408

409

410def makeTimedeltaIndex(k: int = 10, freq="D", name=None, **kwargs) -> TimedeltaIndex:

411 return pd.timedelta_range(start="1 day", periods=k, freq=freq, name=name, **kwargs)

412

413

414def makePeriodIndex(k: int = 10, name=None, **kwargs) -> PeriodIndex:

415 dt = datetime(2000, 1, 1)

416 return pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs)

417

418

419def makeMultiIndex(k=10, names=None, **kwargs):

420 N = (k // 2) + 1

421 rng = range(N)

422 mi = MultiIndex.from_product([("foo", "bar"), rng], names=names, **kwargs)

423 assert len(mi) >= k # GH#38795

424 return mi[:k]

425

426

427def index_subclass_makers_generator():

428 make_index_funcs = [

429 makeDateIndex,

430 makePeriodIndex,

431 makeTimedeltaIndex,

432 makeRangeIndex,

433 makeIntervalIndex,

434 makeCategoricalIndex,

435 makeMultiIndex,

436 ]

437 yield from make_index_funcs

438

439

440def all_timeseries_index_generator(k: int = 10) -> Iterable[Index]:

441 """

442 Generator which can be iterated over to get instances of all the classes

443 which represent time-series.

444

445 Parameters

446 ----------

447 k: length of each of the index instances

448 """

449 make_index_funcs: list[Callable[..., Index]] = [

450 makeDateIndex,

451 makePeriodIndex,

452 makeTimedeltaIndex,

453 ]

454 for make_index_func in make_index_funcs:

455 yield make_index_func(k=k)

456

457

458# make series

459def make_rand_series(name=None, dtype=np.float64) -> Series:

460 index = makeStringIndex(_N)

461 data = np.random.randn(_N)

462 with np.errstate(invalid="ignore"):

463 data = data.astype(dtype, copy=False)

464 return Series(data, index=index, name=name)

465

466

467def makeFloatSeries(name=None) -> Series:

468 return make_rand_series(name=name)

469

470

471def makeStringSeries(name=None) -> Series:

472 return make_rand_series(name=name)

473

474

475def makeObjectSeries(name=None) -> Series:

476 data = makeStringIndex(_N)

477 data = Index(data, dtype=object)

478 index = makeStringIndex(_N)

479 return Series(data, index=index, name=name)

480

481

482def getSeriesData() -> dict[str, Series]:

483 index = makeStringIndex(_N)

484 return {c: Series(np.random.randn(_N), index=index) for c in getCols(_K)}

485

486

487def makeTimeSeries(nper=None, freq="B", name=None) -> Series:

488 if nper is None:

489 nper = _N

490 return Series(

491 np.random.randn(nper), index=makeDateIndex(nper, freq=freq), name=name

492 )

493

494

495def makePeriodSeries(nper=None, name=None) -> Series:

496 if nper is None:

497 nper = _N

498 return Series(np.random.randn(nper), index=makePeriodIndex(nper), name=name)

499

500

501def getTimeSeriesData(nper=None, freq="B") -> dict[str, Series]:

502 return {c: makeTimeSeries(nper, freq) for c in getCols(_K)}

503

504

505def getPeriodData(nper=None) -> dict[str, Series]:

506 return {c: makePeriodSeries(nper) for c in getCols(_K)}

507

508

509# make frame

510def makeTimeDataFrame(nper=None, freq="B") -> DataFrame:

511 data = getTimeSeriesData(nper, freq)

512 return DataFrame(data)

513

514

515def makeDataFrame() -> DataFrame:

516 data = getSeriesData()

517 return DataFrame(data)

518

519

520def getMixedTypeDict():

521 index = Index(["a", "b", "c", "d", "e"])

522

523 data = {

524 "A": [0.0, 1.0, 2.0, 3.0, 4.0],

525 "B": [0.0, 1.0, 0.0, 1.0, 0.0],

526 "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],

527 "D": bdate_range("1/1/2009", periods=5),

528 }

529

530 return index, data

531

532

533def makeMixedDataFrame() -> DataFrame:

534 return DataFrame(getMixedTypeDict()[1])

535

536

537def makePeriodFrame(nper=None) -> DataFrame:

538 data = getPeriodData(nper)

539 return DataFrame(data)

540

541

542def makeCustomIndex(

543 nentries,

544 nlevels,

545 prefix="#",

546 names: bool | str | list[str] | None = False,

547 ndupe_l=None,

548 idx_type=None,

549) -> Index:

550 """

551 Create an index/multindex with given dimensions, levels, names, etc'

552

553 nentries - number of entries in index

554 nlevels - number of levels (> 1 produces multindex)

555 prefix - a string prefix for labels

556 names - (Optional), bool or list of strings. if True will use default

557 names, if false will use no names, if a list is given, the name of

558 each level in the index will be taken from the list.

559 ndupe_l - (Optional), list of ints, the number of rows for which the

560 label will repeated at the corresponding level, you can specify just

561 the first few, the rest will use the default ndupe_l of 1.

562 len(ndupe_l) <= nlevels.

563 idx_type - "i"/"f"/"s"/"dt"/"p"/"td".

564 If idx_type is not None, `idx_nlevels` must be 1.

565 "i"/"f" creates an integer/float index,

566 "s" creates a string

567 "dt" create a datetime index.

568 "td" create a datetime index.

569

570 if unspecified, string labels will be generated.

571 """

572 if ndupe_l is None:

573 ndupe_l = [1] * nlevels

574 assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels

575 assert names is None or names is False or names is True or len(names) is nlevels

576 assert idx_type is None or (

577 idx_type in ("i", "f", "s", "u", "dt", "p", "td") and nlevels == 1

578 )

579

580 if names is True:

581 # build default names

582 names = [prefix + str(i) for i in range(nlevels)]

583 if names is False:

584 # pass None to index constructor for no name

585 names = None

586

587 # make singleton case uniform

588 if isinstance(names, str) and nlevels == 1:

589 names = [names]

590

591 # specific 1D index type requested?

592 idx_func_dict: dict[str, Callable[..., Index]] = {

593 "i": makeIntIndex,

594 "f": makeFloatIndex,

595 "s": makeStringIndex,

596 "dt": makeDateIndex,

597 "td": makeTimedeltaIndex,

598 "p": makePeriodIndex,

599 }

600 idx_func = idx_func_dict.get(idx_type)

601 if idx_func:

602 idx = idx_func(nentries)

603 # but we need to fill in the name

604 if names:

605 idx.name = names[0]

606 return idx

607 elif idx_type is not None:

608 raise ValueError(

609 f"{repr(idx_type)} is not a legal value for `idx_type`, "

610 "use 'i'/'f'/'s'/'dt'/'p'/'td'."

611 )

612

613 if len(ndupe_l) < nlevels:

614 ndupe_l.extend([1] * (nlevels - len(ndupe_l)))

615 assert len(ndupe_l) == nlevels

616

617 assert all(x > 0 for x in ndupe_l)

618

619 list_of_lists = []

620 for i in range(nlevels):

621

622 def keyfunc(x):

623 import re

624

625 numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_")

626 return [int(num) for num in numeric_tuple]

627

628 # build a list of lists to create the index from

629 div_factor = nentries // ndupe_l[i] + 1

630

631 # Deprecated since version 3.9: collections.Counter now supports []. See PEP 585

632 # and Generic Alias Type.

633 cnt: Counter[str] = collections.Counter()

634 for j in range(div_factor):

635 label = f"{prefix}_l{i}_g{j}"

636 cnt[label] = ndupe_l[i]

637 # cute Counter trick

638 result = sorted(cnt.elements(), key=keyfunc)[:nentries]

639 list_of_lists.append(result)

640

641 tuples = list(zip(*list_of_lists))

642

643 # convert tuples to index

644 if nentries == 1:

645 # we have a single level of tuples, i.e. a regular Index

646 name = None if names is None else names[0]

647 index = Index(tuples[0], name=name)

648 elif nlevels == 1:

649 name = None if names is None else names[0]

650 index = Index((x[0] for x in tuples), name=name)

651 else:

652 index = MultiIndex.from_tuples(tuples, names=names)

653 return index

654

655

656def makeCustomDataframe(

657 nrows,

658 ncols,

659 c_idx_names=True,

660 r_idx_names=True,

661 c_idx_nlevels=1,

662 r_idx_nlevels=1,

663 data_gen_f=None,

664 c_ndupe_l=None,

665 r_ndupe_l=None,

666 dtype=None,

667 c_idx_type=None,

668 r_idx_type=None,

669) -> DataFrame:

670 """

671 Create a DataFrame using supplied parameters.

672

673 Parameters

674 ----------

675 nrows, ncols - number of data rows/cols

676 c_idx_names, idx_names - False/True/list of strings, yields No names ,

677 default names or uses the provided names for the levels of the

678 corresponding index. You can provide a single string when

679 c_idx_nlevels ==1.

680 c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex

681 r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex

682 data_gen_f - a function f(row,col) which return the data value

683 at that position, the default generator used yields values of the form

684 "RxCy" based on position.

685 c_ndupe_l, r_ndupe_l - list of integers, determines the number

686 of duplicates for each label at a given level of the corresponding

687 index. The default `None` value produces a multiplicity of 1 across

688 all levels, i.e. a unique index. Will accept a partial list of length

689 N < idx_nlevels, for just the first N levels. If ndupe doesn't divide

690 nrows/ncol, the last label might have lower multiplicity.

691 dtype - passed to the DataFrame constructor as is, in case you wish to

692 have more control in conjunction with a custom `data_gen_f`

693 r_idx_type, c_idx_type - "i"/"f"/"s"/"dt"/"td".

694 If idx_type is not None, `idx_nlevels` must be 1.

695 "i"/"f" creates an integer/float index,

696 "s" creates a string index

697 "dt" create a datetime index.

698 "td" create a timedelta index.

699

700 if unspecified, string labels will be generated.

701

702 Examples

703 --------

704 # 5 row, 3 columns, default names on both, single index on both axis

705 >> makeCustomDataframe(5,3)

706

707 # make the data a random int between 1 and 100

708 >> mkdf(5,3,data_gen_f=lambda r,c:randint(1,100))

709

710 # 2-level multiindex on rows with each label duplicated

711 # twice on first level, default names on both axis, single

712 # index on both axis

713 >> a=makeCustomDataframe(5,3,r_idx_nlevels=2,r_ndupe_l=[2])

714

715 # DatetimeIndex on row, index with unicode labels on columns

716 # no names on either axis

717 >> a=makeCustomDataframe(5,3,c_idx_names=False,r_idx_names=False,

718 r_idx_type="dt",c_idx_type="u")

719

720 # 4-level multindex on rows with names provided, 2-level multindex

721 # on columns with default labels and default names.

722 >> a=makeCustomDataframe(5,3,r_idx_nlevels=4,

723 r_idx_names=["FEE","FIH","FOH","FUM"],

724 c_idx_nlevels=2)

725

726 >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)

727 """

728 assert c_idx_nlevels > 0

729 assert r_idx_nlevels > 0

730 assert r_idx_type is None or (

731 r_idx_type in ("i", "f", "s", "dt", "p", "td") and r_idx_nlevels == 1

732 )

733 assert c_idx_type is None or (

734 c_idx_type in ("i", "f", "s", "dt", "p", "td") and c_idx_nlevels == 1

735 )

736

737 columns = makeCustomIndex(

738 ncols,

739 nlevels=c_idx_nlevels,

740 prefix="C",

741 names=c_idx_names,

742 ndupe_l=c_ndupe_l,

743 idx_type=c_idx_type,

744 )

745 index = makeCustomIndex(

746 nrows,

747 nlevels=r_idx_nlevels,

748 prefix="R",

749 names=r_idx_names,

750 ndupe_l=r_ndupe_l,

751 idx_type=r_idx_type,

752 )

753

754 # by default, generate data based on location

755 if data_gen_f is None:

756 data_gen_f = lambda r, c: f"R{r}C{c}"

757

758 data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)]

759

760 return DataFrame(data, index, columns, dtype=dtype)

761

762

763def _create_missing_idx(nrows, ncols, density, random_state=None):

764 if random_state is None:

765 random_state = np.random

766 else:

767 random_state = np.random.RandomState(random_state)

768

769 # below is cribbed from scipy.sparse

770 size = round((1 - density) * nrows * ncols)

771 # generate a few more to ensure unique values

772 min_rows = 5

773 fac = 1.02

774 extra_size = min(size + min_rows, fac * size)

775

776 def _gen_unique_rand(rng, _extra_size):

777 ind = rng.rand(int(_extra_size))

778 return np.unique(np.floor(ind * nrows * ncols))[:size]

779

780 ind = _gen_unique_rand(random_state, extra_size)

781 while ind.size < size:

782 extra_size *= 1.05

783 ind = _gen_unique_rand(random_state, extra_size)

784

785 j = np.floor(ind * 1.0 / nrows).astype(int)

786 i = (ind - j * nrows).astype(int)

787 return i.tolist(), j.tolist()

788

789

790def makeMissingDataframe(density=0.9, random_state=None) -> DataFrame:

791 df = makeDataFrame()

792 i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state)

793 df.values[i, j] = np.nan

794 return df

795

796

797class SubclassedSeries(Series):

798 _metadata = ["testattr", "name"]

799

800 @property

801 def _constructor(self):

802 # For testing, those properties return a generic callable, and not

803 # the actual class. In this case that is equivalent, but it is to

804 # ensure we don't rely on the property returning a class

805 # See https://github.com/pandas-dev/pandas/pull/46018 and

806 # https://github.com/pandas-dev/pandas/issues/32638 and linked issues

807 return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)

808

809 @property

810 def _constructor_expanddim(self):

811 return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)

812

813

814class SubclassedDataFrame(DataFrame):

815 _metadata = ["testattr"]

816

817 @property

818 def _constructor(self):

819 return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)

820

821 @property

822 def _constructor_sliced(self):

823 return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)

824

825

826class SubclassedCategorical(Categorical):

827 @property

828 def _constructor(self):

829 return SubclassedCategorical

830

831

832def _make_skipna_wrapper(alternative, skipna_alternative=None):

833 """

834 Create a function for calling on an array.

835

836 Parameters

837 ----------

838 alternative : function

839 The function to be called on the array with no NaNs.

840 Only used when 'skipna_alternative' is None.

841 skipna_alternative : function

842 The function to be called on the original array

843

844 Returns

845 -------

846 function

847 """

848 if skipna_alternative:

849

850 def skipna_wrapper(x):

851 return skipna_alternative(x.values)

852

853 else:

854

855 def skipna_wrapper(x):

856 nona = x.dropna()

857 if len(nona) == 0:

858 return np.nan

859 return alternative(nona)

860

861 return skipna_wrapper

862

863

864def convert_rows_list_to_csv_str(rows_list: list[str]) -> str:

865 """

866 Convert list of CSV rows to single CSV-formatted string for current OS.

867

868 This method is used for creating expected value of to_csv() method.

869

870 Parameters

871 ----------

872 rows_list : List[str]

873 Each element represents the row of csv.

874

875 Returns

876 -------

877 str

878 Expected output of to_csv() in current OS.

879 """

880 sep = os.linesep

881 return sep.join(rows_list) + sep

882

883

884def external_error_raised(expected_exception: type[Exception]) -> ContextManager:

885 """

886 Helper function to mark pytest.raises that have an external error message.

887

888 Parameters

889 ----------

890 expected_exception : Exception

891 Expected error to raise.

892

893 Returns

894 -------

895 Callable

896 Regular `pytest.raises` function with `match` equal to `None`.

897 """

898 import pytest

899

900 return pytest.raises(expected_exception, match=None) # noqa: PDF010

901

902

903cython_table = pd.core.common._cython_table.items()

904

905

906def get_cython_table_params(ndframe, func_names_and_expected):

907 """

908 Combine frame, functions from com._cython_table

909 keys and expected result.

910

911 Parameters

912 ----------

913 ndframe : DataFrame or Series

914 func_names_and_expected : Sequence of two items

915 The first item is a name of a NDFrame method ('sum', 'prod') etc.

916 The second item is the expected return value.

917

918 Returns

919 -------

920 list

921 List of three items (DataFrame, function, expected result)

922 """

923 results = []

924 for func_name, expected in func_names_and_expected:

925 results.append((ndframe, func_name, expected))

926 results += [

927 (ndframe, func, expected)

928 for func, name in cython_table

929 if name == func_name

930 ]

931 return results

932

933

934def get_op_from_name(op_name: str) -> Callable:

935 """

936 The operator function for a given op name.

937

938 Parameters

939 ----------

940 op_name : str

941 The op name, in form of "add" or "__add__".

942

943 Returns

944 -------

945 function

946 A function performing the operation.

947 """

948 short_opname = op_name.strip("_")

949 try:

950 op = getattr(operator, short_opname)

951 except AttributeError:

952 # Assume it is the reverse operator

953 rop = getattr(operator, short_opname[1:])

954 op = lambda x, y: rop(y, x)

955

956 return op

957

958

959# -----------------------------------------------------------------------------

960# Indexing test helpers

961

962

963def getitem(x):

964 return x

965

966

967def setitem(x):

968 return x

969

970

971def loc(x):

972 return x.loc

973

974

975def iloc(x):

976 return x.iloc

977

978

979def at(x):

980 return x.at

981

982

983def iat(x):

984 return x.iat

985

986

987# -----------------------------------------------------------------------------

988

989

990def shares_memory(left, right) -> bool:

991 """

992 Pandas-compat for np.shares_memory.

993 """

994 if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):

995 return np.shares_memory(left, right)

996 elif isinstance(left, np.ndarray):

997 # Call with reversed args to get to unpacking logic below.

998 return shares_memory(right, left)

999

1000 if isinstance(left, RangeIndex):

1001 return False

1002 if isinstance(left, MultiIndex):

1003 return shares_memory(left._codes, right)

1004 if isinstance(left, (Index, Series)):

1005 return shares_memory(left._values, right)

1006

1007 if isinstance(left, NDArrayBackedExtensionArray):

1008 return shares_memory(left._ndarray, right)

1009 if isinstance(left, pd.core.arrays.SparseArray):

1010 return shares_memory(left.sp_values, right)

1011 if isinstance(left, pd.core.arrays.IntervalArray):

1012 return shares_memory(left._left, right) or shares_memory(left._right, right)

1013

1014 if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]":

1015 # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669

1016 if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]":

1017 # error: "ExtensionArray" has no attribute "_data"

1018 left_pa_data = left._data # type: ignore[attr-defined]

1019 # error: "ExtensionArray" has no attribute "_data"

1020 right_pa_data = right._data # type: ignore[attr-defined]

1021 left_buf1 = left_pa_data.chunk(0).buffers()[1]

1022 right_buf1 = right_pa_data.chunk(0).buffers()[1]

1023 return left_buf1 == right_buf1

1024

1025 if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):

1026 # By convention, we'll say these share memory if they share *either*

1027 # the _data or the _mask

1028 return np.shares_memory(left._data, right._data) or np.shares_memory(

1029 left._mask, right._mask

1030 )

1031

1032 if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:

1033 arr = left._mgr.arrays[0]

1034 return shares_memory(arr, right)

1035

1036 raise NotImplementedError(type(left), type(right))

1037

1038

1039__all__ = [

1040 "ALL_INT_EA_DTYPES",

1041 "ALL_INT_NUMPY_DTYPES",

1042 "ALL_NUMPY_DTYPES",

1043 "ALL_REAL_NUMPY_DTYPES",

1044 "all_timeseries_index_generator",

1045 "assert_almost_equal",

1046 "assert_attr_equal",

1047 "assert_categorical_equal",

1048 "assert_class_equal",

1049 "assert_contains_all",

1050 "assert_copy",

1051 "assert_datetime_array_equal",

1052 "assert_dict_equal",

1053 "assert_equal",

1054 "assert_extension_array_equal",

1055 "assert_frame_equal",

1056 "assert_index_equal",

1057 "assert_indexing_slices_equivalent",

1058 "assert_interval_array_equal",

1059 "assert_is_sorted",

1060 "assert_is_valid_plot_return_object",

1061 "assert_metadata_equivalent",

1062 "assert_numpy_array_equal",

1063 "assert_period_array_equal",

1064 "assert_produces_warning",

1065 "assert_series_equal",

1066 "assert_sp_array_equal",

1067 "assert_timedelta_array_equal",

1068 "at",

1069 "BOOL_DTYPES",

1070 "box_expected",

1071 "BYTES_DTYPES",

1072 "can_set_locale",

1073 "close",

1074 "COMPLEX_DTYPES",

1075 "convert_rows_list_to_csv_str",

1076 "DATETIME64_DTYPES",

1077 "decompress_file",

1078 "EMPTY_STRING_PATTERN",

1079 "ENDIAN",

1080 "ensure_clean",

1081 "ensure_clean_dir",

1082 "ensure_safe_environment_variables",

1083 "equalContents",

1084 "external_error_raised",

1085 "FLOAT_EA_DTYPES",

1086 "FLOAT_NUMPY_DTYPES",

1087 "getCols",

1088 "get_cython_table_params",

1089 "get_dtype",

1090 "getitem",

1091 "get_locales",

1092 "getMixedTypeDict",

1093 "get_obj",

1094 "get_op_from_name",

1095 "getPeriodData",

1096 "getSeriesData",

1097 "getTimeSeriesData",

1098 "iat",

1099 "iloc",

1100 "index_subclass_makers_generator",

1101 "loc",

1102 "makeBoolIndex",

1103 "makeCategoricalIndex",

1104 "makeCustomDataframe",

1105 "makeCustomIndex",

1106 "makeDataFrame",

1107 "makeDateIndex",

1108 "makeFloatIndex",

1109 "makeFloatSeries",

1110 "makeIntervalIndex",

1111 "makeIntIndex",

1112 "makeMissingDataframe",

1113 "makeMixedDataFrame",

1114 "makeMultiIndex",

1115 "makeNumericIndex",

1116 "makeObjectSeries",

1117 "makePeriodFrame",

1118 "makePeriodIndex",

1119 "makePeriodSeries",

1120 "make_rand_series",

1121 "makeRangeIndex",

1122 "makeStringIndex",

1123 "makeStringSeries",

1124 "makeTimeDataFrame",

1125 "makeTimedeltaIndex",

1126 "makeTimeSeries",

1127 "makeUIntIndex",

1128 "maybe_produces_warning",

1129 "NARROW_NP_DTYPES",

1130 "network",

1131 "NP_NAT_OBJECTS",

1132 "NULL_OBJECTS",

1133 "OBJECT_DTYPES",

1134 "raise_assert_detail",

1135 "randbool",

1136 "rands",

1137 "reset_display_options",

1138 "reset_testing_mode",

1139 "RNGContext",

1140 "round_trip_localpath",

1141 "round_trip_pathlib",

1142 "round_trip_pickle",

1143 "setitem",

1144 "set_locale",

1145 "set_testing_mode",

1146 "set_timezone",

1147 "shares_memory",

1148 "SIGNED_INT_EA_DTYPES",

1149 "SIGNED_INT_NUMPY_DTYPES",

1150 "STRING_DTYPES",

1151 "SubclassedCategorical",

1152 "SubclassedDataFrame",

1153 "SubclassedSeries",

1154 "TIMEDELTA64_DTYPES",

1155 "to_array",

1156 "UNSIGNED_INT_EA_DTYPES",

1157 "UNSIGNED_INT_NUMPY_DTYPES",

1158 "use_numexpr",

1159 "with_csv_dialect",

1160 "write_to_compressed",

1161]

Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/_testing/init.py: 26%

390 statements