Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/internals/array

1"""

2Experimental manager based on storing a collection of 1D arrays

3"""

4from __future__ import annotations

6from typing import (

7 TYPE_CHECKING,

8 Any,

9 Callable,

10 Hashable,

11 Literal,

12 TypeVar,

13)

15import numpy as np

17from pandas._libs import (

18 NaT,

19 algos as libalgos,

20 lib,

21)

22from pandas._typing import (

23 ArrayLike,

24 DtypeObj,

25 npt,

26)

27from pandas.util._validators import validate_bool_kwarg

29from pandas.core.dtypes.astype import astype_array_safe

30from pandas.core.dtypes.cast import (

31 ensure_dtype_can_hold_na,

32 infer_dtype_from_scalar,

33 soft_convert_objects,

34)

35from pandas.core.dtypes.common import (

36 ensure_platform_int,

37 is_datetime64_ns_dtype,

38 is_dtype_equal,

39 is_extension_array_dtype,

40 is_integer,

41 is_numeric_dtype,

42 is_object_dtype,

43 is_timedelta64_ns_dtype,

44)

45from pandas.core.dtypes.dtypes import (

46 ExtensionDtype,

47 PandasDtype,

48)

49from pandas.core.dtypes.generic import (

50 ABCDataFrame,

51 ABCSeries,

52)

53from pandas.core.dtypes.inference import is_inferred_bool_dtype

54from pandas.core.dtypes.missing import (

55 array_equals,

56 isna,

57 na_value_for_dtype,

58)

60import pandas.core.algorithms as algos

61from pandas.core.array_algos.quantile import quantile_compat

62from pandas.core.array_algos.take import take_1d

63from pandas.core.arrays import (

64 DatetimeArray,

65 ExtensionArray,

66 PandasArray,

67 TimedeltaArray,

68)

69from pandas.core.arrays.sparse import SparseDtype

70from pandas.core.construction import (

71 ensure_wrapped_if_datetimelike,

72 extract_array,

73 sanitize_array,

74)

75from pandas.core.indexers import (

76 maybe_convert_indices,

77 validate_indices,

78)

79from pandas.core.indexes.api import (

80 Index,

81 ensure_index,

82)

83from pandas.core.internals.base import (

84 DataManager,

85 SingleDataManager,

86 interleaved_dtype,

87)

88from pandas.core.internals.blocks import (

89 ensure_block_shape,

90 external_values,

91 extract_pandas_array,

92 maybe_coerce_values,

93 new_block,

94 to_native_types,

95)

97if TYPE_CHECKING: 97 ↛ 98line 97 didn't jump to line 98, because the condition on line 97 was never true

98 from pandas import Float64Index

100

101T = TypeVar("T", bound="BaseArrayManager")

102

103

104class BaseArrayManager(DataManager):

105 """

106 Core internal data structure to implement DataFrame and Series.

107

108 Alternative to the BlockManager, storing a list of 1D arrays instead of

109 Blocks.

110

111 This is *not* a public API class

112

113 Parameters

114 ----------

115 arrays : Sequence of arrays

116 axes : Sequence of Index

117 verify_integrity : bool, default True

118

119 """

120

121 __slots__ = [

122 "_axes", # private attribute, because 'axes' has different order, see below

123 "arrays",

124 ]

125

126 arrays: list[np.ndarray | ExtensionArray]

127 _axes: list[Index]

128

129 def __init__(

130 self,

131 arrays: list[np.ndarray | ExtensionArray],

132 axes: list[Index],

133 verify_integrity: bool = True,

134 ) -> None:

135 raise NotImplementedError

136

137 def make_empty(self: T, axes=None) -> T:

138 """Return an empty ArrayManager with the items axis of len 0 (no columns)"""

139 if axes is None:

140 axes = [self.axes[1:], Index([])]

141

142 arrays: list[np.ndarray | ExtensionArray] = []

143 return type(self)(arrays, axes)

144

145 @property

146 def items(self) -> Index:

147 return self._axes[-1]

148

149 @property

150 # error: Signature of "axes" incompatible with supertype "DataManager"

151 def axes(self) -> list[Index]: # type: ignore[override]

152 # mypy doesn't work to override attribute with property

153 # see https://github.com/python/mypy/issues/4125

154 """Axes is BlockManager-compatible order (columns, rows)"""

155 return [self._axes[1], self._axes[0]]

156

157 @property

158 def shape_proper(self) -> tuple[int, ...]:

159 # this returns (n_rows, n_columns)

160 return tuple(len(ax) for ax in self._axes)

161

162 @staticmethod

163 def _normalize_axis(axis: int) -> int:

164 # switch axis

165 axis = 1 if axis == 0 else 0

166 return axis

167

168 def set_axis(self, axis: int, new_labels: Index) -> None:

169 # Caller is responsible for ensuring we have an Index object.

170 self._validate_set_axis(axis, new_labels)

171 axis = self._normalize_axis(axis)

172 self._axes[axis] = new_labels

173

174 def get_dtypes(self) -> np.ndarray:

175 return np.array([arr.dtype for arr in self.arrays], dtype="object")

176

177 def __getstate__(self):

178 return self.arrays, self._axes

179

180 def __setstate__(self, state) -> None:

181 self.arrays = state[0]

182 self._axes = state[1]

183

184 def __repr__(self) -> str:

185 output = type(self).__name__

186 output += f"\nIndex: {self._axes[0]}"

187 if self.ndim == 2:

188 output += f"\nColumns: {self._axes[1]}"

189 output += f"\n{len(self.arrays)} arrays:"

190 for arr in self.arrays:

191 output += f"\n{arr.dtype}"

192 return output

193

194 def apply(

195 self: T,

196 f,

197 align_keys: list[str] | None = None,

198 ignore_failures: bool = False,

199 **kwargs,

200 ) -> T:

201 """

202 Iterate over the arrays, collect and create a new ArrayManager.

203

204 Parameters

205 ----------

206 f : str or callable

207 Name of the Array method to apply.

208 align_keys: List[str] or None, default None

209 ignore_failures: bool, default False

210 **kwargs

211 Keywords to pass to `f`

212

213 Returns

214 -------

215 ArrayManager

216 """

217 assert "filter" not in kwargs

218

219 align_keys = align_keys or []

220 result_arrays: list[np.ndarray] = []

221 result_indices: list[int] = []

222 # fillna: Series/DataFrame is responsible for making sure value is aligned

223

224 aligned_args = {k: kwargs[k] for k in align_keys}

225

226 if f == "apply":

227 f = kwargs.pop("func")

228

229 for i, arr in enumerate(self.arrays):

230

231 if aligned_args:

232

233 for k, obj in aligned_args.items():

234 if isinstance(obj, (ABCSeries, ABCDataFrame)):

235 # The caller is responsible for ensuring that

236 # obj.axes[-1].equals(self.items)

237 if obj.ndim == 1:

238 kwargs[k] = obj.iloc[i]

239 else:

240 kwargs[k] = obj.iloc[:, i]._values

241 else:

242 # otherwise we have an array-like

243 kwargs[k] = obj[i]

244

245 try:

246 if callable(f):

247 applied = f(arr, **kwargs)

248 else:

249 applied = getattr(arr, f)(**kwargs)

250 except (TypeError, NotImplementedError):

251 if not ignore_failures:

252 raise

253 continue

254 # if not isinstance(applied, ExtensionArray):

255 # # TODO not all EA operations return new EAs (eg astype)

256 # applied = array(applied)

257 result_arrays.append(applied)

258 result_indices.append(i)

259

260 new_axes: list[Index]

261 if ignore_failures:

262 # TODO copy?

263 new_axes = [self._axes[0], self._axes[1][result_indices]]

264 else:

265 new_axes = self._axes

266

267 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";

268 # expected "List[Union[ndarray, ExtensionArray]]"

269 return type(self)(result_arrays, new_axes) # type: ignore[arg-type]

270

271 def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T:

272 # switch axis to follow BlockManager logic

273 if swap_axis and "axis" in kwargs and self.ndim == 2:

274 kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0

275

276 align_keys = align_keys or []

277 aligned_args = {k: kwargs[k] for k in align_keys}

278

279 result_arrays = []

280

281 for i, arr in enumerate(self.arrays):

282

283 if aligned_args:

284 for k, obj in aligned_args.items():

285 if isinstance(obj, (ABCSeries, ABCDataFrame)):

286 # The caller is responsible for ensuring that

287 # obj.axes[-1].equals(self.items)

288 if obj.ndim == 1:

289 if self.ndim == 2:

290 kwargs[k] = obj.iloc[slice(i, i + 1)]._values

291 else:

292 kwargs[k] = obj.iloc[:]._values

293 else:

294 kwargs[k] = obj.iloc[:, [i]]._values

295 else:

296 # otherwise we have an ndarray

297 if obj.ndim == 2:

298 kwargs[k] = obj[[i]]

299

300 if isinstance(arr.dtype, np.dtype) and not isinstance(arr, np.ndarray):

301 # i.e. TimedeltaArray, DatetimeArray with tz=None. Need to

302 # convert for the Block constructors.

303 arr = np.asarray(arr)

304

305 if self.ndim == 2:

306 arr = ensure_block_shape(arr, 2)

307 block = new_block(arr, placement=slice(0, 1, 1), ndim=2)

308 else:

309 block = new_block(arr, placement=slice(0, len(self), 1), ndim=1)

310

311 applied = getattr(block, f)(**kwargs)

312 if isinstance(applied, list):

313 applied = applied[0]

314 arr = applied.values

315 if self.ndim == 2 and arr.ndim == 2:

316 # 2D for np.ndarray or DatetimeArray/TimedeltaArray

317 assert len(arr) == 1

318 # error: No overload variant of "__getitem__" of "ExtensionArray"

319 # matches argument type "Tuple[int, slice]"

320 arr = arr[0, :] # type: ignore[call-overload]

321 result_arrays.append(arr)

322

323 return type(self)(result_arrays, self._axes)

324

325 def where(self: T, other, cond, align: bool) -> T:

326 if align:

327 align_keys = ["other", "cond"]

328 else:

329 align_keys = ["cond"]

330 other = extract_array(other, extract_numpy=True)

331

332 return self.apply_with_block(

333 "where",

334 align_keys=align_keys,

335 other=other,

336 cond=cond,

337 )

338

339 def setitem(self: T, indexer, value) -> T:

340 return self.apply_with_block("setitem", indexer=indexer, value=value)

341

342 def putmask(self: T, mask, new, align: bool = True) -> T:

343 if align:

344 align_keys = ["new", "mask"]

345 else:

346 align_keys = ["mask"]

347 new = extract_array(new, extract_numpy=True)

348

349 return self.apply_with_block(

350 "putmask",

351 align_keys=align_keys,

352 mask=mask,

353 new=new,

354 )

355

356 def diff(self: T, n: int, axis: int) -> T:

357 if axis == 1:

358 # DataFrame only calls this for n=0, in which case performing it

359 # with axis=0 is equivalent

360 assert n == 0

361 axis = 0

362 return self.apply(algos.diff, n=n, axis=axis)

363

364 def interpolate(self: T, **kwargs) -> T:

365 return self.apply_with_block("interpolate", swap_axis=False, **kwargs)

366

367 def shift(self: T, periods: int, axis: int, fill_value) -> T:

368 if fill_value is lib.no_default:

369 fill_value = None

370

371 if axis == 1 and self.ndim == 2:

372 # TODO column-wise shift

373 raise NotImplementedError

374

375 return self.apply_with_block(

376 "shift", periods=periods, axis=axis, fill_value=fill_value

377 )

378

379 def fillna(self: T, value, limit, inplace: bool, downcast) -> T:

380

381 if limit is not None:

382 # Do this validation even if we go through one of the no-op paths

383 limit = libalgos.validate_limit(None, limit=limit)

384

385 return self.apply_with_block(

386 "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast

387 )

388

389 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:

390 return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors)

391

392 def convert(

393 self: T,

394 copy: bool = True,

395 datetime: bool = True,

396 numeric: bool = True,

397 timedelta: bool = True,

398 ) -> T:

399 def _convert(arr):

400 if is_object_dtype(arr.dtype):

401 # extract PandasArray for tests that patch PandasArray._typ

402 arr = np.asarray(arr)

403 return soft_convert_objects(

404 arr,

405 datetime=datetime,

406 numeric=numeric,

407 timedelta=timedelta,

408 copy=copy,

409 )

410 else:

411 return arr.copy() if copy else arr

412

413 return self.apply(_convert)

414

415 def replace_regex(self: T, **kwargs) -> T:

416 return self.apply_with_block("_replace_regex", **kwargs)

417

418 def replace(self: T, to_replace, value, inplace: bool) -> T:

419 inplace = validate_bool_kwarg(inplace, "inplace")

420 assert np.ndim(value) == 0, value

421 # TODO "replace" is right now implemented on the blocks, we should move

422 # it to general array algos so it can be reused here

423 return self.apply_with_block(

424 "replace", value=value, to_replace=to_replace, inplace=inplace

425 )

426

427 def replace_list(

428 self: T,

429 src_list: list[Any],

430 dest_list: list[Any],

431 inplace: bool = False,

432 regex: bool = False,

433 ) -> T:

434 """do a list replace"""

435 inplace = validate_bool_kwarg(inplace, "inplace")

436

437 return self.apply_with_block(

438 "replace_list",

439 src_list=src_list,

440 dest_list=dest_list,

441 inplace=inplace,

442 regex=regex,

443 )

444

445 def to_native_types(self: T, **kwargs) -> T:

446 return self.apply(to_native_types, **kwargs)

447

448 @property

449 def is_mixed_type(self) -> bool:

450 return True

451

452 @property

453 def is_numeric_mixed_type(self) -> bool:

454 return all(is_numeric_dtype(t) for t in self.get_dtypes())

455

456 @property

457 def any_extension_types(self) -> bool:

458 """Whether any of the blocks in this manager are extension blocks"""

459 return False # any(block.is_extension for block in self.blocks)

460

461 @property

462 def is_view(self) -> bool:

463 """return a boolean if we are a single block and are a view"""

464 # TODO what is this used for?

465 return False

466

467 @property

468 def is_single_block(self) -> bool:

469 return len(self.arrays) == 1

470

471 def _get_data_subset(self: T, predicate: Callable) -> T:

472 indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]

473 arrays = [self.arrays[i] for i in indices]

474 # TODO copy?

475 # Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq,

476 # see test_describe_datetime_columns

477 taker = np.array(indices, dtype="intp")

478 new_cols = self._axes[1].take(taker)

479 new_axes = [self._axes[0], new_cols]

480 return type(self)(arrays, new_axes, verify_integrity=False)

481

482 def get_bool_data(self: T, copy: bool = False) -> T:

483 """

484 Select columns that are bool-dtype and object-dtype columns that are all-bool.

485

486 Parameters

487 ----------

488 copy : bool, default False

489 Whether to copy the blocks

490 """

491 return self._get_data_subset(is_inferred_bool_dtype)

492

493 def get_numeric_data(self: T, copy: bool = False) -> T:

494 """

495 Select columns that have a numeric dtype.

496

497 Parameters

498 ----------

499 copy : bool, default False

500 Whether to copy the blocks

501 """

502 return self._get_data_subset(

503 lambda arr: is_numeric_dtype(arr.dtype)

504 or getattr(arr.dtype, "_is_numeric", False)

505 )

506

507 def copy(self: T, deep=True) -> T:

508 """

509 Make deep or shallow copy of ArrayManager

510

511 Parameters

512 ----------

513 deep : bool or string, default True

514 If False, return shallow copy (do not copy data)

515 If 'all', copy data and a deep copy of the index

516

517 Returns

518 -------

519 BlockManager

520 """

521 if deep is None:

522 # ArrayManager does not yet support CoW, so deep=None always means

523 # deep=True for now

524 deep = True

525

526 # this preserves the notion of view copying of axes

527 if deep:

528 # hit in e.g. tests.io.json.test_pandas

529

530 def copy_func(ax):

531 return ax.copy(deep=True) if deep == "all" else ax.view()

532

533 new_axes = [copy_func(ax) for ax in self._axes]

534 else:

535 new_axes = list(self._axes)

536

537 if deep:

538 new_arrays = [arr.copy() for arr in self.arrays]

539 else:

540 new_arrays = list(self.arrays)

541 return type(self)(new_arrays, new_axes, verify_integrity=False)

542

543 def reindex_indexer(

544 self: T,

545 new_axis,

546 indexer,

547 axis: int,

548 fill_value=None,

549 allow_dups: bool = False,

550 copy: bool = True,

551 # ignored keywords

552 only_slice: bool = False,

553 # ArrayManager specific keywords

554 use_na_proxy: bool = False,

555 ) -> T:

556 axis = self._normalize_axis(axis)

557 return self._reindex_indexer(

558 new_axis,

559 indexer,

560 axis,

561 fill_value,

562 allow_dups,

563 copy,

564 use_na_proxy,

565 )

566

567 def _reindex_indexer(

568 self: T,

569 new_axis,

570 indexer: npt.NDArray[np.intp] | None,

571 axis: int,

572 fill_value=None,

573 allow_dups: bool = False,

574 copy: bool = True,

575 use_na_proxy: bool = False,

576 ) -> T:

577 """

578 Parameters

579 ----------

580 new_axis : Index

581 indexer : ndarray[intp] or None

582 axis : int

583 fill_value : object, default None

584 allow_dups : bool, default False

585 copy : bool, default True

586

587

588 pandas-indexer with -1's only.

589 """

590 if copy is None:

591 # ArrayManager does not yet support CoW, so deep=None always means

592 # deep=True for now

593 copy = True

594

595 if indexer is None:

596 if new_axis is self._axes[axis] and not copy:

597 return self

598

599 result = self.copy(deep=copy)

600 result._axes = list(self._axes)

601 result._axes[axis] = new_axis

602 return result

603

604 # some axes don't allow reindexing with dups

605 if not allow_dups:

606 self._axes[axis]._validate_can_reindex(indexer)

607

608 if axis >= self.ndim:

609 raise IndexError("Requested axis not found in manager")

610

611 if axis == 1:

612 new_arrays = []

613 for i in indexer:

614 if i == -1:

615 arr = self._make_na_array(

616 fill_value=fill_value, use_na_proxy=use_na_proxy

617 )

618 else:

619 arr = self.arrays[i]

620 if copy:

621 arr = arr.copy()

622 new_arrays.append(arr)

623

624 else:

625 validate_indices(indexer, len(self._axes[0]))

626 indexer = ensure_platform_int(indexer)

627 mask = indexer == -1

628 needs_masking = mask.any()

629 new_arrays = [

630 take_1d(

631 arr,

632 indexer,

633 allow_fill=needs_masking,

634 fill_value=fill_value,

635 mask=mask,

636 # if fill_value is not None else blk.fill_value

637 )

638 for arr in self.arrays

639 ]

640

641 new_axes = list(self._axes)

642 new_axes[axis] = new_axis

643

644 return type(self)(new_arrays, new_axes, verify_integrity=False)

645

646 def take(

647 self: T,

648 indexer,

649 axis: int = 1,

650 verify: bool = True,

651 convert_indices: bool = True,

652 ) -> T:

653 """

654 Take items along any axis.

655 """

656 axis = self._normalize_axis(axis)

657

658 indexer = (

659 np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64")

660 if isinstance(indexer, slice)

661 else np.asanyarray(indexer, dtype="int64")

662 )

663

664 if not indexer.ndim == 1:

665 raise ValueError("indexer should be 1-dimensional")

666

667 n = self.shape_proper[axis]

668 if convert_indices:

669 indexer = maybe_convert_indices(indexer, n, verify=verify)

670

671 new_labels = self._axes[axis].take(indexer)

672 return self._reindex_indexer(

673 new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True

674 )

675

676 def _make_na_array(self, fill_value=None, use_na_proxy=False):

677 if use_na_proxy:

678 assert fill_value is None

679 return NullArrayProxy(self.shape_proper[0])

680

681 if fill_value is None:

682 fill_value = np.nan

683

684 dtype, fill_value = infer_dtype_from_scalar(fill_value)

685 # error: Argument "dtype" to "empty" has incompatible type "Union[dtype[Any],

686 # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,

687 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],

688 # _DTypeDict, Tuple[Any, Any]]]"

689 values = np.empty(self.shape_proper[0], dtype=dtype) # type: ignore[arg-type]

690 values.fill(fill_value)

691 return values

692

693 def _equal_values(self, other) -> bool:

694 """

695 Used in .equals defined in base class. Only check the column values

696 assuming shape and indexes have already been checked.

697 """

698 for left, right in zip(self.arrays, other.arrays):

699 if not array_equals(left, right):

700 return False

701 else:

702 return True

703

704 # TODO

705 # to_dict

706

707

708class ArrayManager(BaseArrayManager):

709 @property

710 def ndim(self) -> Literal[2]:

711 return 2

712

713 def __init__(

714 self,

715 arrays: list[np.ndarray | ExtensionArray],

716 axes: list[Index],

717 verify_integrity: bool = True,

718 ) -> None:

719 # Note: we are storing the axes in "_axes" in the (row, columns) order

720 # which contrasts the order how it is stored in BlockManager

721 self._axes = axes

722 self.arrays = arrays

723

724 if verify_integrity:

725 self._axes = [ensure_index(ax) for ax in axes]

726 arrays = [extract_pandas_array(x, None, 1)[0] for x in arrays]

727 self.arrays = [maybe_coerce_values(arr) for arr in arrays]

728 self._verify_integrity()

729

730 def _verify_integrity(self) -> None:

731 n_rows, n_columns = self.shape_proper

732 if not len(self.arrays) == n_columns:

733 raise ValueError(

734 "Number of passed arrays must equal the size of the column Index: "

735 f"{len(self.arrays)} arrays vs {n_columns} columns."

736 )

737 for arr in self.arrays:

738 if not len(arr) == n_rows:

739 raise ValueError(

740 "Passed arrays should have the same length as the rows Index: "

741 f"{len(arr)} vs {n_rows} rows"

742 )

743 if not isinstance(arr, (np.ndarray, ExtensionArray)):

744 raise ValueError(

745 "Passed arrays should be np.ndarray or ExtensionArray instances, "

746 f"got {type(arr)} instead"

747 )

748 if not arr.ndim == 1:

749 raise ValueError(

750 "Passed arrays should be 1-dimensional, got array with "

751 f"{arr.ndim} dimensions instead."

752 )

753

754 # --------------------------------------------------------------------

755 # Indexing

756

757 def fast_xs(self, loc: int) -> SingleArrayManager:

758 """

759 Return the array corresponding to `frame.iloc[loc]`.

760

761 Parameters

762 ----------

763 loc : int

764

765 Returns

766 -------

767 np.ndarray or ExtensionArray

768 """

769 dtype = interleaved_dtype([arr.dtype for arr in self.arrays])

770

771 values = [arr[loc] for arr in self.arrays]

772 if isinstance(dtype, ExtensionDtype):

773 result = dtype.construct_array_type()._from_sequence(values, dtype=dtype)

774 # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT

775 elif is_datetime64_ns_dtype(dtype):

776 result = DatetimeArray._from_sequence(values, dtype=dtype)._data

777 elif is_timedelta64_ns_dtype(dtype):

778 result = TimedeltaArray._from_sequence(values, dtype=dtype)._data

779 else:

780 result = np.array(values, dtype=dtype)

781 return SingleArrayManager([result], [self._axes[1]])

782

783 def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager:

784 axis = self._normalize_axis(axis)

785

786 if axis == 0:

787 arrays = [arr[slobj] for arr in self.arrays]

788 elif axis == 1:

789 arrays = self.arrays[slobj]

790

791 new_axes = list(self._axes)

792 new_axes[axis] = new_axes[axis]._getitem_slice(slobj)

793

794 return type(self)(arrays, new_axes, verify_integrity=False)

795

796 def iget(self, i: int) -> SingleArrayManager:

797 """

798 Return the data as a SingleArrayManager.

799 """

800 values = self.arrays[i]

801 return SingleArrayManager([values], [self._axes[0]])

802

803 def iget_values(self, i: int) -> ArrayLike:

804 """

805 Return the data for column i as the values (ndarray or ExtensionArray).

806 """

807 return self.arrays[i]

808

809 @property

810 def column_arrays(self) -> list[ArrayLike]:

811 """

812 Used in the JSON C code to access column arrays.

813 """

814

815 return [np.asarray(arr) for arr in self.arrays]

816

817 def iset(

818 self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False

819 ) -> None:

820 """

821 Set new column(s).

822

823 This changes the ArrayManager in-place, but replaces (an) existing

824 column(s), not changing column values in-place).

825

826 Parameters

827 ----------

828 loc : integer, slice or boolean mask

829 Positional location (already bounds checked)

830 value : np.ndarray or ExtensionArray

831 inplace : bool, default False

832 Whether overwrite existing array as opposed to replacing it.

833 """

834 # single column -> single integer index

835 if lib.is_integer(loc):

836

837 # TODO can we avoid needing to unpack this here? That means converting

838 # DataFrame into 1D array when loc is an integer

839 if isinstance(value, np.ndarray) and value.ndim == 2:

840 assert value.shape[1] == 1

841 value = value[:, 0]

842

843 # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item

844 # but we should avoid that and pass directly the proper array

845 value = maybe_coerce_values(value)

846

847 assert isinstance(value, (np.ndarray, ExtensionArray))

848 assert value.ndim == 1

849 assert len(value) == len(self._axes[0])

850 self.arrays[loc] = value

851 return

852

853 # multiple columns -> convert slice or array to integer indices

854 elif isinstance(loc, slice):

855 indices = range(

856 loc.start if loc.start is not None else 0,

857 loc.stop if loc.stop is not None else self.shape_proper[1],

858 loc.step if loc.step is not None else 1,

859 )

860 else:

861 assert isinstance(loc, np.ndarray)

862 assert loc.dtype == "bool"

863 # error: Incompatible types in assignment (expression has type "ndarray",

864 # variable has type "range")

865 indices = np.nonzero(loc)[0] # type: ignore[assignment]

866

867 assert value.ndim == 2

868 assert value.shape[0] == len(self._axes[0])

869

870 for value_idx, mgr_idx in enumerate(indices):

871 # error: No overload variant of "__getitem__" of "ExtensionArray" matches

872 # argument type "Tuple[slice, int]"

873 value_arr = value[:, value_idx] # type: ignore[call-overload]

874 self.arrays[mgr_idx] = value_arr

875 return

876

877 def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None:

878 """

879 Set values ("setitem") into a single column (not setting the full column).

880

881 This is a method on the ArrayManager level, to avoid creating an

882 intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`)

883 """

884 if not is_integer(loc):

885 raise TypeError("The column index should be an integer")

886 arr = self.arrays[loc]

887 mgr = SingleArrayManager([arr], [self._axes[0]])

888 new_mgr = mgr.setitem((idx,), value)

889 # update existing ArrayManager in-place

890 self.arrays[loc] = new_mgr.arrays[0]

891

892 def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:

893 """

894 Insert item at selected position.

895

896 Parameters

897 ----------

898 loc : int

899 item : hashable

900 value : np.ndarray or ExtensionArray

901 """

902 # insert to the axis; this could possibly raise a TypeError

903 new_axis = self.items.insert(loc, item)

904

905 value = extract_array(value, extract_numpy=True)

906 if value.ndim == 2:

907 if value.shape[0] == 1:

908 # error: No overload variant of "__getitem__" of "ExtensionArray"

909 # matches argument type "Tuple[int, slice]"

910 value = value[0, :] # type: ignore[call-overload]

911 else:

912 raise ValueError(

913 f"Expected a 1D array, got an array with shape {value.shape}"

914 )

915 value = maybe_coerce_values(value)

916

917 # TODO self.arrays can be empty

918 # assert len(value) == len(self.arrays[0])

919

920 # TODO is this copy needed?

921 arrays = self.arrays.copy()

922 arrays.insert(loc, value)

923

924 self.arrays = arrays

925 self._axes[1] = new_axis

926

927 def idelete(self, indexer) -> ArrayManager:

928 """

929 Delete selected locations in-place (new block and array, same BlockManager)

930 """

931 to_keep = np.ones(self.shape[0], dtype=np.bool_)

932 to_keep[indexer] = False

933

934 self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]]

935 self._axes = [self._axes[0], self._axes[1][to_keep]]

936 return self

937

938 # --------------------------------------------------------------------

939 # Array-wise Operation

940

941 def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:

942 """

943 Apply grouped reduction function columnwise, returning a new ArrayManager.

944

945 Parameters

946 ----------

947 func : grouped reduction function

948 ignore_failures : bool, default False

949 Whether to drop columns where func raises TypeError.

950

951 Returns

952 -------

953 ArrayManager

954 """

955 result_arrays: list[np.ndarray] = []

956 result_indices: list[int] = []

957

958 for i, arr in enumerate(self.arrays):

959 # grouped_reduce functions all expect 2D arrays

960 arr = ensure_block_shape(arr, ndim=2)

961 try:

962 res = func(arr)

963 except (TypeError, NotImplementedError):

964 if not ignore_failures:

965 raise

966 continue

967

968 if res.ndim == 2:

969 # reverse of ensure_block_shape

970 assert res.shape[0] == 1

971 res = res[0]

972

973 result_arrays.append(res)

974 result_indices.append(i)

975

976 if len(result_arrays) == 0:

977 index = Index([None]) # placeholder

978 else:

979 index = Index(range(result_arrays[0].shape[0]))

980

981 if ignore_failures:

982 columns = self.items[np.array(result_indices, dtype="int64")]

983 else:

984 columns = self.items

985

986 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";

987 # expected "List[Union[ndarray, ExtensionArray]]"

988 return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type]

989

990 def reduce(

991 self: T, func: Callable, ignore_failures: bool = False

992 ) -> tuple[T, np.ndarray]:

993 """

994 Apply reduction function column-wise, returning a single-row ArrayManager.

995

996 Parameters

997 ----------

998 func : reduction function

999 ignore_failures : bool, default False

1000 Whether to drop columns where func raises TypeError.

1001

1002 Returns

1003 -------

1004 ArrayManager

1005 np.ndarray

1006 Indexer of column indices that are retained.

1007 """

1008 result_arrays: list[np.ndarray] = []

1009 result_indices: list[int] = []

1010 for i, arr in enumerate(self.arrays):

1011 try:

1012 res = func(arr, axis=0)

1013 except TypeError:

1014 if not ignore_failures:

1015 raise

1016 else:

1017 # TODO NaT doesn't preserve dtype, so we need to ensure to create

1018 # a timedelta result array if original was timedelta

1019 # what if datetime results in timedelta? (eg std)

1020 if res is NaT and is_timedelta64_ns_dtype(arr.dtype):

1021 result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]"))

1022 else:

1023 # error: Argument 1 to "append" of "list" has incompatible type

1024 # "ExtensionArray"; expected "ndarray"

1025 result_arrays.append(

1026 sanitize_array([res], None) # type: ignore[arg-type]

1027 )

1028 result_indices.append(i)

1029

1030 index = Index._simple_new(np.array([None], dtype=object)) # placeholder

1031 if ignore_failures:

1032 indexer = np.array(result_indices)

1033 columns = self.items[result_indices]

1034 else:

1035 indexer = np.arange(self.shape[0])

1036 columns = self.items

1037

1038 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";

1039 # expected "List[Union[ndarray, ExtensionArray]]"

1040 new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type]

1041 return new_mgr, indexer

1042

1043 def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:

1044 """

1045 Apply array_op blockwise with another (aligned) BlockManager.

1046 """

1047 # TODO what if `other` is BlockManager ?

1048 left_arrays = self.arrays

1049 right_arrays = other.arrays

1050 result_arrays = [

1051 array_op(left, right) for left, right in zip(left_arrays, right_arrays)

1052 ]

1053 return type(self)(result_arrays, self._axes)

1054

1055 def quantile(

1056 self,

1057 *,

1058 qs: Float64Index,

1059 axis: int = 0,

1060 transposed: bool = False,

1061 interpolation="linear",

1062 ) -> ArrayManager:

1063

1064 arrs = [ensure_block_shape(x, 2) for x in self.arrays]

1065 assert axis == 1

1066 new_arrs = [

1067 quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs

1068 ]

1069 for i, arr in enumerate(new_arrs):

1070 if arr.ndim == 2:

1071 assert arr.shape[0] == 1, arr.shape

1072 new_arrs[i] = arr[0]

1073

1074 axes = [qs, self._axes[1]]

1075 return type(self)(new_arrs, axes)

1076

1077 # ----------------------------------------------------------------

1078

1079 def unstack(self, unstacker, fill_value) -> ArrayManager:

1080 """

1081 Return a BlockManager with all blocks unstacked.

1082

1083 Parameters

1084 ----------

1085 unstacker : reshape._Unstacker

1086 fill_value : Any

1087 fill_value for newly introduced missing values.

1088

1089 Returns

1090 -------

1091 unstacked : BlockManager

1092 """

1093 indexer, _ = unstacker._indexer_and_to_sort

1094 if unstacker.mask.all():

1095 new_indexer = indexer

1096 allow_fill = False

1097 new_mask2D = None

1098 needs_masking = None

1099 else:

1100 new_indexer = np.full(unstacker.mask.shape, -1)

1101 new_indexer[unstacker.mask] = indexer

1102 allow_fill = True

1103 # calculating the full mask once and passing it to take_1d is faster

1104 # than letting take_1d calculate it in each repeated call

1105 new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape)

1106 needs_masking = new_mask2D.any(axis=0)

1107 new_indexer2D = new_indexer.reshape(*unstacker.full_shape)

1108 new_indexer2D = ensure_platform_int(new_indexer2D)

1109

1110 new_arrays = []

1111 for arr in self.arrays:

1112 for i in range(unstacker.full_shape[1]):

1113 if allow_fill:

1114 # error: Value of type "Optional[Any]" is not indexable [index]

1115 new_arr = take_1d(

1116 arr,

1117 new_indexer2D[:, i],

1118 allow_fill=needs_masking[i], # type: ignore[index]

1119 fill_value=fill_value,

1120 mask=new_mask2D[:, i], # type: ignore[index]

1121 )

1122 else:

1123 new_arr = take_1d(arr, new_indexer2D[:, i], allow_fill=False)

1124 new_arrays.append(new_arr)

1125

1126 new_index = unstacker.new_index

1127 new_columns = unstacker.get_new_columns(self._axes[1])

1128 new_axes = [new_index, new_columns]

1129

1130 return type(self)(new_arrays, new_axes, verify_integrity=False)

1131

1132 def as_array(

1133 self,

1134 dtype=None,

1135 copy: bool = False,

1136 na_value: object = lib.no_default,

1137 ) -> np.ndarray:

1138 """

1139 Convert the blockmanager data into an numpy array.

1140

1141 Parameters

1142 ----------

1143 dtype : object, default None

1144 Data type of the return array.

1145 copy : bool, default False

1146 If True then guarantee that a copy is returned. A value of

1147 False does not guarantee that the underlying data is not

1148 copied.

1149 na_value : object, default lib.no_default

1150 Value to be used as the missing value sentinel.

1151

1152 Returns

1153 -------

1154 arr : ndarray

1155 """

1156 if len(self.arrays) == 0:

1157 empty_arr = np.empty(self.shape, dtype=float)

1158 return empty_arr.transpose()

1159

1160 # We want to copy when na_value is provided to avoid

1161 # mutating the original object

1162 copy = copy or na_value is not lib.no_default

1163

1164 if not dtype:

1165 dtype = interleaved_dtype([arr.dtype for arr in self.arrays])

1166

1167 if isinstance(dtype, SparseDtype):

1168 dtype = dtype.subtype

1169 elif isinstance(dtype, PandasDtype):

1170 dtype = dtype.numpy_dtype

1171 elif is_extension_array_dtype(dtype):

1172 dtype = "object"

1173 elif is_dtype_equal(dtype, str):

1174 dtype = "object"

1175

1176 result = np.empty(self.shape_proper, dtype=dtype)

1177

1178 for i, arr in enumerate(self.arrays):

1179 arr = arr.astype(dtype, copy=copy)

1180 result[:, i] = arr

1181

1182 if na_value is not lib.no_default:

1183 result[isna(result)] = na_value

1184

1185 return result

1186

1187

1188class SingleArrayManager(BaseArrayManager, SingleDataManager):

1189

1190 __slots__ = [

1191 "_axes", # private attribute, because 'axes' has different order, see below

1192 "arrays",

1193 ]

1194

1195 arrays: list[np.ndarray | ExtensionArray]

1196 _axes: list[Index]

1197

1198 @property

1199 def ndim(self) -> Literal[1]:

1200 return 1

1201

1202 def __init__(

1203 self,

1204 arrays: list[np.ndarray | ExtensionArray],

1205 axes: list[Index],

1206 verify_integrity: bool = True,

1207 ) -> None:

1208 self._axes = axes

1209 self.arrays = arrays

1210

1211 if verify_integrity:

1212 assert len(axes) == 1

1213 assert len(arrays) == 1

1214 self._axes = [ensure_index(ax) for ax in self._axes]

1215 arr = arrays[0]

1216 arr = maybe_coerce_values(arr)

1217 arr = extract_pandas_array(arr, None, 1)[0]

1218 self.arrays = [arr]

1219 self._verify_integrity()

1220

1221 def _verify_integrity(self) -> None:

1222 (n_rows,) = self.shape

1223 assert len(self.arrays) == 1

1224 arr = self.arrays[0]

1225 assert len(arr) == n_rows

1226 if not arr.ndim == 1:

1227 raise ValueError(

1228 "Passed array should be 1-dimensional, got array with "

1229 f"{arr.ndim} dimensions instead."

1230 )

1231

1232 @staticmethod

1233 def _normalize_axis(axis):

1234 return axis

1235

1236 def make_empty(self, axes=None) -> SingleArrayManager:

1237 """Return an empty ArrayManager with index/array of length 0"""

1238 if axes is None:

1239 axes = [Index([], dtype=object)]

1240 array: np.ndarray = np.array([], dtype=self.dtype)

1241 return type(self)([array], axes)

1242

1243 @classmethod

1244 def from_array(cls, array, index) -> SingleArrayManager:

1245 return cls([array], [index])

1246

1247 @property

1248 def axes(self):

1249 return self._axes

1250

1251 @property

1252 def index(self) -> Index:

1253 return self._axes[0]

1254

1255 @property

1256 def dtype(self):

1257 return self.array.dtype

1258

1259 def external_values(self):

1260 """The array that Series.values returns"""

1261 return external_values(self.array)

1262

1263 def internal_values(self):

1264 """The array that Series._values returns"""

1265 return self.array

1266

1267 def array_values(self):

1268 """The array that Series.array returns"""

1269 arr = self.array

1270 if isinstance(arr, np.ndarray):

1271 arr = PandasArray(arr)

1272 return arr

1273

1274 @property

1275 def _can_hold_na(self) -> bool:

1276 if isinstance(self.array, np.ndarray):

1277 return self.array.dtype.kind not in ["b", "i", "u"]

1278 else:

1279 # ExtensionArray

1280 return self.array._can_hold_na

1281

1282 @property

1283 def is_single_block(self) -> bool:

1284 return True

1285

1286 def fast_xs(self, loc: int) -> SingleArrayManager:

1287 raise NotImplementedError("Use series._values[loc] instead")

1288

1289 def get_slice(self, slobj: slice, axis: int = 0) -> SingleArrayManager:

1290 if axis >= self.ndim:

1291 raise IndexError("Requested axis not found in manager")

1292

1293 new_array = self.array[slobj]

1294 new_index = self.index._getitem_slice(slobj)

1295 return type(self)([new_array], [new_index], verify_integrity=False)

1296

1297 def getitem_mgr(self, indexer) -> SingleArrayManager:

1298 new_array = self.array[indexer]

1299 new_index = self.index[indexer]

1300 return type(self)([new_array], [new_index])

1301

1302 def apply(self, func, **kwargs):

1303 if callable(func):

1304 new_array = func(self.array, **kwargs)

1305 else:

1306 new_array = getattr(self.array, func)(**kwargs)

1307 return type(self)([new_array], self._axes)

1308

1309 def setitem(self, indexer, value) -> SingleArrayManager:

1310 """

1311 Set values with indexer.

1312

1313 For SingleArrayManager, this backs s[indexer] = value

1314

1315 See `setitem_inplace` for a version that works inplace and doesn't

1316 return a new Manager.

1317 """

1318 if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim:

1319 raise ValueError(f"Cannot set values with ndim > {self.ndim}")

1320 return self.apply_with_block("setitem", indexer=indexer, value=value)

1321

1322 def idelete(self, indexer) -> SingleArrayManager:

1323 """

1324 Delete selected locations in-place (new array, same ArrayManager)

1325 """

1326 to_keep = np.ones(self.shape[0], dtype=np.bool_)

1327 to_keep[indexer] = False

1328

1329 self.arrays = [self.arrays[0][to_keep]]

1330 self._axes = [self._axes[0][to_keep]]

1331 return self

1332

1333 def _get_data_subset(self, predicate: Callable) -> SingleArrayManager:

1334 # used in get_numeric_data / get_bool_data

1335 if predicate(self.array):

1336 return type(self)(self.arrays, self._axes, verify_integrity=False)

1337 else:

1338 return self.make_empty()

1339

1340 def set_values(self, values: ArrayLike) -> None:

1341 """

1342 Set (replace) the values of the SingleArrayManager in place.

1343

1344 Use at your own risk! This does not check if the passed values are

1345 valid for the current SingleArrayManager (length, dtype, etc).

1346 """

1347 self.arrays[0] = values

1348

1349 def to_2d_mgr(self, columns: Index) -> ArrayManager:

1350 """

1351 Manager analogue of Series.to_frame

1352 """

1353 arrays = [self.arrays[0]]

1354 axes = [self.axes[0], columns]

1355

1356 return ArrayManager(arrays, axes, verify_integrity=False)

1357

1358

1359class NullArrayProxy:

1360 """

1361 Proxy object for an all-NA array.

1362

1363 Only stores the length of the array, and not the dtype. The dtype

1364 will only be known when actually concatenating (after determining the

1365 common dtype, for which this proxy is ignored).

1366 Using this object avoids that the internals/concat.py needs to determine

1367 the proper dtype and array type.

1368 """

1369

1370 ndim = 1

1371

1372 def __init__(self, n: int) -> None:

1373 self.n = n

1374

1375 @property

1376 def shape(self) -> tuple[int]:

1377 return (self.n,)

1378

1379 def to_array(self, dtype: DtypeObj) -> ArrayLike:

1380 """

1381 Helper function to create the actual all-NA array from the NullArrayProxy

1382 object.

1383

1384 Parameters

1385 ----------

1386 arr : NullArrayProxy

1387 dtype : the dtype for the resulting array

1388

1389 Returns

1390 -------

1391 np.ndarray or ExtensionArray

1392 """

1393 if isinstance(dtype, ExtensionDtype):

1394 empty = dtype.construct_array_type()._from_sequence([], dtype=dtype)

1395 indexer = -np.ones(self.n, dtype=np.intp)

1396 return empty.take(indexer, allow_fill=True)

1397 else:

1398 # when introducing missing values, int becomes float, bool becomes object

1399 dtype = ensure_dtype_can_hold_na(dtype)

1400 fill_value = na_value_for_dtype(dtype)

1401 arr = np.empty(self.n, dtype=dtype)

1402 arr.fill(fill_value)

1403 return ensure_wrapped_if_datetimelike(arr)

Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/internals/array_manager.py: 17%

601 statements