Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/internals/array_manager.py: 17%

601 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Experimental manager based on storing a collection of 1D arrays 

3""" 

4from __future__ import annotations 

5 

6from typing import ( 

7 TYPE_CHECKING, 

8 Any, 

9 Callable, 

10 Hashable, 

11 Literal, 

12 TypeVar, 

13) 

14 

15import numpy as np 

16 

17from pandas._libs import ( 

18 NaT, 

19 algos as libalgos, 

20 lib, 

21) 

22from pandas._typing import ( 

23 ArrayLike, 

24 DtypeObj, 

25 npt, 

26) 

27from pandas.util._validators import validate_bool_kwarg 

28 

29from pandas.core.dtypes.astype import astype_array_safe 

30from pandas.core.dtypes.cast import ( 

31 ensure_dtype_can_hold_na, 

32 infer_dtype_from_scalar, 

33 soft_convert_objects, 

34) 

35from pandas.core.dtypes.common import ( 

36 ensure_platform_int, 

37 is_datetime64_ns_dtype, 

38 is_dtype_equal, 

39 is_extension_array_dtype, 

40 is_integer, 

41 is_numeric_dtype, 

42 is_object_dtype, 

43 is_timedelta64_ns_dtype, 

44) 

45from pandas.core.dtypes.dtypes import ( 

46 ExtensionDtype, 

47 PandasDtype, 

48) 

49from pandas.core.dtypes.generic import ( 

50 ABCDataFrame, 

51 ABCSeries, 

52) 

53from pandas.core.dtypes.inference import is_inferred_bool_dtype 

54from pandas.core.dtypes.missing import ( 

55 array_equals, 

56 isna, 

57 na_value_for_dtype, 

58) 

59 

60import pandas.core.algorithms as algos 

61from pandas.core.array_algos.quantile import quantile_compat 

62from pandas.core.array_algos.take import take_1d 

63from pandas.core.arrays import ( 

64 DatetimeArray, 

65 ExtensionArray, 

66 PandasArray, 

67 TimedeltaArray, 

68) 

69from pandas.core.arrays.sparse import SparseDtype 

70from pandas.core.construction import ( 

71 ensure_wrapped_if_datetimelike, 

72 extract_array, 

73 sanitize_array, 

74) 

75from pandas.core.indexers import ( 

76 maybe_convert_indices, 

77 validate_indices, 

78) 

79from pandas.core.indexes.api import ( 

80 Index, 

81 ensure_index, 

82) 

83from pandas.core.internals.base import ( 

84 DataManager, 

85 SingleDataManager, 

86 interleaved_dtype, 

87) 

88from pandas.core.internals.blocks import ( 

89 ensure_block_shape, 

90 external_values, 

91 extract_pandas_array, 

92 maybe_coerce_values, 

93 new_block, 

94 to_native_types, 

95) 

96 

97if TYPE_CHECKING: 97 ↛ 98line 97 didn't jump to line 98, because the condition on line 97 was never true

98 from pandas import Float64Index 

99 

100 

101T = TypeVar("T", bound="BaseArrayManager") 

102 

103 

104class BaseArrayManager(DataManager): 

105 """ 

106 Core internal data structure to implement DataFrame and Series. 

107 

108 Alternative to the BlockManager, storing a list of 1D arrays instead of 

109 Blocks. 

110 

111 This is *not* a public API class 

112 

113 Parameters 

114 ---------- 

115 arrays : Sequence of arrays 

116 axes : Sequence of Index 

117 verify_integrity : bool, default True 

118 

119 """ 

120 

121 __slots__ = [ 

122 "_axes", # private attribute, because 'axes' has different order, see below 

123 "arrays", 

124 ] 

125 

126 arrays: list[np.ndarray | ExtensionArray] 

127 _axes: list[Index] 

128 

129 def __init__( 

130 self, 

131 arrays: list[np.ndarray | ExtensionArray], 

132 axes: list[Index], 

133 verify_integrity: bool = True, 

134 ) -> None: 

135 raise NotImplementedError 

136 

137 def make_empty(self: T, axes=None) -> T: 

138 """Return an empty ArrayManager with the items axis of len 0 (no columns)""" 

139 if axes is None: 

140 axes = [self.axes[1:], Index([])] 

141 

142 arrays: list[np.ndarray | ExtensionArray] = [] 

143 return type(self)(arrays, axes) 

144 

145 @property 

146 def items(self) -> Index: 

147 return self._axes[-1] 

148 

149 @property 

150 # error: Signature of "axes" incompatible with supertype "DataManager" 

151 def axes(self) -> list[Index]: # type: ignore[override] 

152 # mypy doesn't work to override attribute with property 

153 # see https://github.com/python/mypy/issues/4125 

154 """Axes is BlockManager-compatible order (columns, rows)""" 

155 return [self._axes[1], self._axes[0]] 

156 

157 @property 

158 def shape_proper(self) -> tuple[int, ...]: 

159 # this returns (n_rows, n_columns) 

160 return tuple(len(ax) for ax in self._axes) 

161 

162 @staticmethod 

163 def _normalize_axis(axis: int) -> int: 

164 # switch axis 

165 axis = 1 if axis == 0 else 0 

166 return axis 

167 

168 def set_axis(self, axis: int, new_labels: Index) -> None: 

169 # Caller is responsible for ensuring we have an Index object. 

170 self._validate_set_axis(axis, new_labels) 

171 axis = self._normalize_axis(axis) 

172 self._axes[axis] = new_labels 

173 

174 def get_dtypes(self) -> np.ndarray: 

175 return np.array([arr.dtype for arr in self.arrays], dtype="object") 

176 

177 def __getstate__(self): 

178 return self.arrays, self._axes 

179 

180 def __setstate__(self, state) -> None: 

181 self.arrays = state[0] 

182 self._axes = state[1] 

183 

184 def __repr__(self) -> str: 

185 output = type(self).__name__ 

186 output += f"\nIndex: {self._axes[0]}" 

187 if self.ndim == 2: 

188 output += f"\nColumns: {self._axes[1]}" 

189 output += f"\n{len(self.arrays)} arrays:" 

190 for arr in self.arrays: 

191 output += f"\n{arr.dtype}" 

192 return output 

193 

194 def apply( 

195 self: T, 

196 f, 

197 align_keys: list[str] | None = None, 

198 ignore_failures: bool = False, 

199 **kwargs, 

200 ) -> T: 

201 """ 

202 Iterate over the arrays, collect and create a new ArrayManager. 

203 

204 Parameters 

205 ---------- 

206 f : str or callable 

207 Name of the Array method to apply. 

208 align_keys: List[str] or None, default None 

209 ignore_failures: bool, default False 

210 **kwargs 

211 Keywords to pass to `f` 

212 

213 Returns 

214 ------- 

215 ArrayManager 

216 """ 

217 assert "filter" not in kwargs 

218 

219 align_keys = align_keys or [] 

220 result_arrays: list[np.ndarray] = [] 

221 result_indices: list[int] = [] 

222 # fillna: Series/DataFrame is responsible for making sure value is aligned 

223 

224 aligned_args = {k: kwargs[k] for k in align_keys} 

225 

226 if f == "apply": 

227 f = kwargs.pop("func") 

228 

229 for i, arr in enumerate(self.arrays): 

230 

231 if aligned_args: 

232 

233 for k, obj in aligned_args.items(): 

234 if isinstance(obj, (ABCSeries, ABCDataFrame)): 

235 # The caller is responsible for ensuring that 

236 # obj.axes[-1].equals(self.items) 

237 if obj.ndim == 1: 

238 kwargs[k] = obj.iloc[i] 

239 else: 

240 kwargs[k] = obj.iloc[:, i]._values 

241 else: 

242 # otherwise we have an array-like 

243 kwargs[k] = obj[i] 

244 

245 try: 

246 if callable(f): 

247 applied = f(arr, **kwargs) 

248 else: 

249 applied = getattr(arr, f)(**kwargs) 

250 except (TypeError, NotImplementedError): 

251 if not ignore_failures: 

252 raise 

253 continue 

254 # if not isinstance(applied, ExtensionArray): 

255 # # TODO not all EA operations return new EAs (eg astype) 

256 # applied = array(applied) 

257 result_arrays.append(applied) 

258 result_indices.append(i) 

259 

260 new_axes: list[Index] 

261 if ignore_failures: 

262 # TODO copy? 

263 new_axes = [self._axes[0], self._axes[1][result_indices]] 

264 else: 

265 new_axes = self._axes 

266 

267 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; 

268 # expected "List[Union[ndarray, ExtensionArray]]" 

269 return type(self)(result_arrays, new_axes) # type: ignore[arg-type] 

270 

271 def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T: 

272 # switch axis to follow BlockManager logic 

273 if swap_axis and "axis" in kwargs and self.ndim == 2: 

274 kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0 

275 

276 align_keys = align_keys or [] 

277 aligned_args = {k: kwargs[k] for k in align_keys} 

278 

279 result_arrays = [] 

280 

281 for i, arr in enumerate(self.arrays): 

282 

283 if aligned_args: 

284 for k, obj in aligned_args.items(): 

285 if isinstance(obj, (ABCSeries, ABCDataFrame)): 

286 # The caller is responsible for ensuring that 

287 # obj.axes[-1].equals(self.items) 

288 if obj.ndim == 1: 

289 if self.ndim == 2: 

290 kwargs[k] = obj.iloc[slice(i, i + 1)]._values 

291 else: 

292 kwargs[k] = obj.iloc[:]._values 

293 else: 

294 kwargs[k] = obj.iloc[:, [i]]._values 

295 else: 

296 # otherwise we have an ndarray 

297 if obj.ndim == 2: 

298 kwargs[k] = obj[[i]] 

299 

300 if isinstance(arr.dtype, np.dtype) and not isinstance(arr, np.ndarray): 

301 # i.e. TimedeltaArray, DatetimeArray with tz=None. Need to 

302 # convert for the Block constructors. 

303 arr = np.asarray(arr) 

304 

305 if self.ndim == 2: 

306 arr = ensure_block_shape(arr, 2) 

307 block = new_block(arr, placement=slice(0, 1, 1), ndim=2) 

308 else: 

309 block = new_block(arr, placement=slice(0, len(self), 1), ndim=1) 

310 

311 applied = getattr(block, f)(**kwargs) 

312 if isinstance(applied, list): 

313 applied = applied[0] 

314 arr = applied.values 

315 if self.ndim == 2 and arr.ndim == 2: 

316 # 2D for np.ndarray or DatetimeArray/TimedeltaArray 

317 assert len(arr) == 1 

318 # error: No overload variant of "__getitem__" of "ExtensionArray" 

319 # matches argument type "Tuple[int, slice]" 

320 arr = arr[0, :] # type: ignore[call-overload] 

321 result_arrays.append(arr) 

322 

323 return type(self)(result_arrays, self._axes) 

324 

325 def where(self: T, other, cond, align: bool) -> T: 

326 if align: 

327 align_keys = ["other", "cond"] 

328 else: 

329 align_keys = ["cond"] 

330 other = extract_array(other, extract_numpy=True) 

331 

332 return self.apply_with_block( 

333 "where", 

334 align_keys=align_keys, 

335 other=other, 

336 cond=cond, 

337 ) 

338 

339 def setitem(self: T, indexer, value) -> T: 

340 return self.apply_with_block("setitem", indexer=indexer, value=value) 

341 

342 def putmask(self: T, mask, new, align: bool = True) -> T: 

343 if align: 

344 align_keys = ["new", "mask"] 

345 else: 

346 align_keys = ["mask"] 

347 new = extract_array(new, extract_numpy=True) 

348 

349 return self.apply_with_block( 

350 "putmask", 

351 align_keys=align_keys, 

352 mask=mask, 

353 new=new, 

354 ) 

355 

356 def diff(self: T, n: int, axis: int) -> T: 

357 if axis == 1: 

358 # DataFrame only calls this for n=0, in which case performing it 

359 # with axis=0 is equivalent 

360 assert n == 0 

361 axis = 0 

362 return self.apply(algos.diff, n=n, axis=axis) 

363 

364 def interpolate(self: T, **kwargs) -> T: 

365 return self.apply_with_block("interpolate", swap_axis=False, **kwargs) 

366 

367 def shift(self: T, periods: int, axis: int, fill_value) -> T: 

368 if fill_value is lib.no_default: 

369 fill_value = None 

370 

371 if axis == 1 and self.ndim == 2: 

372 # TODO column-wise shift 

373 raise NotImplementedError 

374 

375 return self.apply_with_block( 

376 "shift", periods=periods, axis=axis, fill_value=fill_value 

377 ) 

378 

379 def fillna(self: T, value, limit, inplace: bool, downcast) -> T: 

380 

381 if limit is not None: 

382 # Do this validation even if we go through one of the no-op paths 

383 limit = libalgos.validate_limit(None, limit=limit) 

384 

385 return self.apply_with_block( 

386 "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast 

387 ) 

388 

389 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: 

390 return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors) 

391 

392 def convert( 

393 self: T, 

394 copy: bool = True, 

395 datetime: bool = True, 

396 numeric: bool = True, 

397 timedelta: bool = True, 

398 ) -> T: 

399 def _convert(arr): 

400 if is_object_dtype(arr.dtype): 

401 # extract PandasArray for tests that patch PandasArray._typ 

402 arr = np.asarray(arr) 

403 return soft_convert_objects( 

404 arr, 

405 datetime=datetime, 

406 numeric=numeric, 

407 timedelta=timedelta, 

408 copy=copy, 

409 ) 

410 else: 

411 return arr.copy() if copy else arr 

412 

413 return self.apply(_convert) 

414 

415 def replace_regex(self: T, **kwargs) -> T: 

416 return self.apply_with_block("_replace_regex", **kwargs) 

417 

418 def replace(self: T, to_replace, value, inplace: bool) -> T: 

419 inplace = validate_bool_kwarg(inplace, "inplace") 

420 assert np.ndim(value) == 0, value 

421 # TODO "replace" is right now implemented on the blocks, we should move 

422 # it to general array algos so it can be reused here 

423 return self.apply_with_block( 

424 "replace", value=value, to_replace=to_replace, inplace=inplace 

425 ) 

426 

427 def replace_list( 

428 self: T, 

429 src_list: list[Any], 

430 dest_list: list[Any], 

431 inplace: bool = False, 

432 regex: bool = False, 

433 ) -> T: 

434 """do a list replace""" 

435 inplace = validate_bool_kwarg(inplace, "inplace") 

436 

437 return self.apply_with_block( 

438 "replace_list", 

439 src_list=src_list, 

440 dest_list=dest_list, 

441 inplace=inplace, 

442 regex=regex, 

443 ) 

444 

445 def to_native_types(self: T, **kwargs) -> T: 

446 return self.apply(to_native_types, **kwargs) 

447 

448 @property 

449 def is_mixed_type(self) -> bool: 

450 return True 

451 

452 @property 

453 def is_numeric_mixed_type(self) -> bool: 

454 return all(is_numeric_dtype(t) for t in self.get_dtypes()) 

455 

456 @property 

457 def any_extension_types(self) -> bool: 

458 """Whether any of the blocks in this manager are extension blocks""" 

459 return False # any(block.is_extension for block in self.blocks) 

460 

461 @property 

462 def is_view(self) -> bool: 

463 """return a boolean if we are a single block and are a view""" 

464 # TODO what is this used for? 

465 return False 

466 

467 @property 

468 def is_single_block(self) -> bool: 

469 return len(self.arrays) == 1 

470 

471 def _get_data_subset(self: T, predicate: Callable) -> T: 

472 indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)] 

473 arrays = [self.arrays[i] for i in indices] 

474 # TODO copy? 

475 # Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq, 

476 # see test_describe_datetime_columns 

477 taker = np.array(indices, dtype="intp") 

478 new_cols = self._axes[1].take(taker) 

479 new_axes = [self._axes[0], new_cols] 

480 return type(self)(arrays, new_axes, verify_integrity=False) 

481 

482 def get_bool_data(self: T, copy: bool = False) -> T: 

483 """ 

484 Select columns that are bool-dtype and object-dtype columns that are all-bool. 

485 

486 Parameters 

487 ---------- 

488 copy : bool, default False 

489 Whether to copy the blocks 

490 """ 

491 return self._get_data_subset(is_inferred_bool_dtype) 

492 

493 def get_numeric_data(self: T, copy: bool = False) -> T: 

494 """ 

495 Select columns that have a numeric dtype. 

496 

497 Parameters 

498 ---------- 

499 copy : bool, default False 

500 Whether to copy the blocks 

501 """ 

502 return self._get_data_subset( 

503 lambda arr: is_numeric_dtype(arr.dtype) 

504 or getattr(arr.dtype, "_is_numeric", False) 

505 ) 

506 

507 def copy(self: T, deep=True) -> T: 

508 """ 

509 Make deep or shallow copy of ArrayManager 

510 

511 Parameters 

512 ---------- 

513 deep : bool or string, default True 

514 If False, return shallow copy (do not copy data) 

515 If 'all', copy data and a deep copy of the index 

516 

517 Returns 

518 ------- 

519 BlockManager 

520 """ 

521 if deep is None: 

522 # ArrayManager does not yet support CoW, so deep=None always means 

523 # deep=True for now 

524 deep = True 

525 

526 # this preserves the notion of view copying of axes 

527 if deep: 

528 # hit in e.g. tests.io.json.test_pandas 

529 

530 def copy_func(ax): 

531 return ax.copy(deep=True) if deep == "all" else ax.view() 

532 

533 new_axes = [copy_func(ax) for ax in self._axes] 

534 else: 

535 new_axes = list(self._axes) 

536 

537 if deep: 

538 new_arrays = [arr.copy() for arr in self.arrays] 

539 else: 

540 new_arrays = list(self.arrays) 

541 return type(self)(new_arrays, new_axes, verify_integrity=False) 

542 

543 def reindex_indexer( 

544 self: T, 

545 new_axis, 

546 indexer, 

547 axis: int, 

548 fill_value=None, 

549 allow_dups: bool = False, 

550 copy: bool = True, 

551 # ignored keywords 

552 only_slice: bool = False, 

553 # ArrayManager specific keywords 

554 use_na_proxy: bool = False, 

555 ) -> T: 

556 axis = self._normalize_axis(axis) 

557 return self._reindex_indexer( 

558 new_axis, 

559 indexer, 

560 axis, 

561 fill_value, 

562 allow_dups, 

563 copy, 

564 use_na_proxy, 

565 ) 

566 

567 def _reindex_indexer( 

568 self: T, 

569 new_axis, 

570 indexer: npt.NDArray[np.intp] | None, 

571 axis: int, 

572 fill_value=None, 

573 allow_dups: bool = False, 

574 copy: bool = True, 

575 use_na_proxy: bool = False, 

576 ) -> T: 

577 """ 

578 Parameters 

579 ---------- 

580 new_axis : Index 

581 indexer : ndarray[intp] or None 

582 axis : int 

583 fill_value : object, default None 

584 allow_dups : bool, default False 

585 copy : bool, default True 

586 

587 

588 pandas-indexer with -1's only. 

589 """ 

590 if copy is None: 

591 # ArrayManager does not yet support CoW, so deep=None always means 

592 # deep=True for now 

593 copy = True 

594 

595 if indexer is None: 

596 if new_axis is self._axes[axis] and not copy: 

597 return self 

598 

599 result = self.copy(deep=copy) 

600 result._axes = list(self._axes) 

601 result._axes[axis] = new_axis 

602 return result 

603 

604 # some axes don't allow reindexing with dups 

605 if not allow_dups: 

606 self._axes[axis]._validate_can_reindex(indexer) 

607 

608 if axis >= self.ndim: 

609 raise IndexError("Requested axis not found in manager") 

610 

611 if axis == 1: 

612 new_arrays = [] 

613 for i in indexer: 

614 if i == -1: 

615 arr = self._make_na_array( 

616 fill_value=fill_value, use_na_proxy=use_na_proxy 

617 ) 

618 else: 

619 arr = self.arrays[i] 

620 if copy: 

621 arr = arr.copy() 

622 new_arrays.append(arr) 

623 

624 else: 

625 validate_indices(indexer, len(self._axes[0])) 

626 indexer = ensure_platform_int(indexer) 

627 mask = indexer == -1 

628 needs_masking = mask.any() 

629 new_arrays = [ 

630 take_1d( 

631 arr, 

632 indexer, 

633 allow_fill=needs_masking, 

634 fill_value=fill_value, 

635 mask=mask, 

636 # if fill_value is not None else blk.fill_value 

637 ) 

638 for arr in self.arrays 

639 ] 

640 

641 new_axes = list(self._axes) 

642 new_axes[axis] = new_axis 

643 

644 return type(self)(new_arrays, new_axes, verify_integrity=False) 

645 

646 def take( 

647 self: T, 

648 indexer, 

649 axis: int = 1, 

650 verify: bool = True, 

651 convert_indices: bool = True, 

652 ) -> T: 

653 """ 

654 Take items along any axis. 

655 """ 

656 axis = self._normalize_axis(axis) 

657 

658 indexer = ( 

659 np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64") 

660 if isinstance(indexer, slice) 

661 else np.asanyarray(indexer, dtype="int64") 

662 ) 

663 

664 if not indexer.ndim == 1: 

665 raise ValueError("indexer should be 1-dimensional") 

666 

667 n = self.shape_proper[axis] 

668 if convert_indices: 

669 indexer = maybe_convert_indices(indexer, n, verify=verify) 

670 

671 new_labels = self._axes[axis].take(indexer) 

672 return self._reindex_indexer( 

673 new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True 

674 ) 

675 

676 def _make_na_array(self, fill_value=None, use_na_proxy=False): 

677 if use_na_proxy: 

678 assert fill_value is None 

679 return NullArrayProxy(self.shape_proper[0]) 

680 

681 if fill_value is None: 

682 fill_value = np.nan 

683 

684 dtype, fill_value = infer_dtype_from_scalar(fill_value) 

685 # error: Argument "dtype" to "empty" has incompatible type "Union[dtype[Any], 

686 # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, 

687 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], 

688 # _DTypeDict, Tuple[Any, Any]]]" 

689 values = np.empty(self.shape_proper[0], dtype=dtype) # type: ignore[arg-type] 

690 values.fill(fill_value) 

691 return values 

692 

693 def _equal_values(self, other) -> bool: 

694 """ 

695 Used in .equals defined in base class. Only check the column values 

696 assuming shape and indexes have already been checked. 

697 """ 

698 for left, right in zip(self.arrays, other.arrays): 

699 if not array_equals(left, right): 

700 return False 

701 else: 

702 return True 

703 

704 # TODO 

705 # to_dict 

706 

707 

708class ArrayManager(BaseArrayManager): 

709 @property 

710 def ndim(self) -> Literal[2]: 

711 return 2 

712 

713 def __init__( 

714 self, 

715 arrays: list[np.ndarray | ExtensionArray], 

716 axes: list[Index], 

717 verify_integrity: bool = True, 

718 ) -> None: 

719 # Note: we are storing the axes in "_axes" in the (row, columns) order 

720 # which contrasts the order how it is stored in BlockManager 

721 self._axes = axes 

722 self.arrays = arrays 

723 

724 if verify_integrity: 

725 self._axes = [ensure_index(ax) for ax in axes] 

726 arrays = [extract_pandas_array(x, None, 1)[0] for x in arrays] 

727 self.arrays = [maybe_coerce_values(arr) for arr in arrays] 

728 self._verify_integrity() 

729 

730 def _verify_integrity(self) -> None: 

731 n_rows, n_columns = self.shape_proper 

732 if not len(self.arrays) == n_columns: 

733 raise ValueError( 

734 "Number of passed arrays must equal the size of the column Index: " 

735 f"{len(self.arrays)} arrays vs {n_columns} columns." 

736 ) 

737 for arr in self.arrays: 

738 if not len(arr) == n_rows: 

739 raise ValueError( 

740 "Passed arrays should have the same length as the rows Index: " 

741 f"{len(arr)} vs {n_rows} rows" 

742 ) 

743 if not isinstance(arr, (np.ndarray, ExtensionArray)): 

744 raise ValueError( 

745 "Passed arrays should be np.ndarray or ExtensionArray instances, " 

746 f"got {type(arr)} instead" 

747 ) 

748 if not arr.ndim == 1: 

749 raise ValueError( 

750 "Passed arrays should be 1-dimensional, got array with " 

751 f"{arr.ndim} dimensions instead." 

752 ) 

753 

754 # -------------------------------------------------------------------- 

755 # Indexing 

756 

757 def fast_xs(self, loc: int) -> SingleArrayManager: 

758 """ 

759 Return the array corresponding to `frame.iloc[loc]`. 

760 

761 Parameters 

762 ---------- 

763 loc : int 

764 

765 Returns 

766 ------- 

767 np.ndarray or ExtensionArray 

768 """ 

769 dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) 

770 

771 values = [arr[loc] for arr in self.arrays] 

772 if isinstance(dtype, ExtensionDtype): 

773 result = dtype.construct_array_type()._from_sequence(values, dtype=dtype) 

774 # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT 

775 elif is_datetime64_ns_dtype(dtype): 

776 result = DatetimeArray._from_sequence(values, dtype=dtype)._data 

777 elif is_timedelta64_ns_dtype(dtype): 

778 result = TimedeltaArray._from_sequence(values, dtype=dtype)._data 

779 else: 

780 result = np.array(values, dtype=dtype) 

781 return SingleArrayManager([result], [self._axes[1]]) 

782 

783 def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager: 

784 axis = self._normalize_axis(axis) 

785 

786 if axis == 0: 

787 arrays = [arr[slobj] for arr in self.arrays] 

788 elif axis == 1: 

789 arrays = self.arrays[slobj] 

790 

791 new_axes = list(self._axes) 

792 new_axes[axis] = new_axes[axis]._getitem_slice(slobj) 

793 

794 return type(self)(arrays, new_axes, verify_integrity=False) 

795 

796 def iget(self, i: int) -> SingleArrayManager: 

797 """ 

798 Return the data as a SingleArrayManager. 

799 """ 

800 values = self.arrays[i] 

801 return SingleArrayManager([values], [self._axes[0]]) 

802 

803 def iget_values(self, i: int) -> ArrayLike: 

804 """ 

805 Return the data for column i as the values (ndarray or ExtensionArray). 

806 """ 

807 return self.arrays[i] 

808 

809 @property 

810 def column_arrays(self) -> list[ArrayLike]: 

811 """ 

812 Used in the JSON C code to access column arrays. 

813 """ 

814 

815 return [np.asarray(arr) for arr in self.arrays] 

816 

817 def iset( 

818 self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False 

819 ) -> None: 

820 """ 

821 Set new column(s). 

822 

823 This changes the ArrayManager in-place, but replaces (an) existing 

824 column(s), not changing column values in-place). 

825 

826 Parameters 

827 ---------- 

828 loc : integer, slice or boolean mask 

829 Positional location (already bounds checked) 

830 value : np.ndarray or ExtensionArray 

831 inplace : bool, default False 

832 Whether overwrite existing array as opposed to replacing it. 

833 """ 

834 # single column -> single integer index 

835 if lib.is_integer(loc): 

836 

837 # TODO can we avoid needing to unpack this here? That means converting 

838 # DataFrame into 1D array when loc is an integer 

839 if isinstance(value, np.ndarray) and value.ndim == 2: 

840 assert value.shape[1] == 1 

841 value = value[:, 0] 

842 

843 # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item 

844 # but we should avoid that and pass directly the proper array 

845 value = maybe_coerce_values(value) 

846 

847 assert isinstance(value, (np.ndarray, ExtensionArray)) 

848 assert value.ndim == 1 

849 assert len(value) == len(self._axes[0]) 

850 self.arrays[loc] = value 

851 return 

852 

853 # multiple columns -> convert slice or array to integer indices 

854 elif isinstance(loc, slice): 

855 indices = range( 

856 loc.start if loc.start is not None else 0, 

857 loc.stop if loc.stop is not None else self.shape_proper[1], 

858 loc.step if loc.step is not None else 1, 

859 ) 

860 else: 

861 assert isinstance(loc, np.ndarray) 

862 assert loc.dtype == "bool" 

863 # error: Incompatible types in assignment (expression has type "ndarray", 

864 # variable has type "range") 

865 indices = np.nonzero(loc)[0] # type: ignore[assignment] 

866 

867 assert value.ndim == 2 

868 assert value.shape[0] == len(self._axes[0]) 

869 

870 for value_idx, mgr_idx in enumerate(indices): 

871 # error: No overload variant of "__getitem__" of "ExtensionArray" matches 

872 # argument type "Tuple[slice, int]" 

873 value_arr = value[:, value_idx] # type: ignore[call-overload] 

874 self.arrays[mgr_idx] = value_arr 

875 return 

876 

877 def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: 

878 """ 

879 Set values ("setitem") into a single column (not setting the full column). 

880 

881 This is a method on the ArrayManager level, to avoid creating an 

882 intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) 

883 """ 

884 if not is_integer(loc): 

885 raise TypeError("The column index should be an integer") 

886 arr = self.arrays[loc] 

887 mgr = SingleArrayManager([arr], [self._axes[0]]) 

888 new_mgr = mgr.setitem((idx,), value) 

889 # update existing ArrayManager in-place 

890 self.arrays[loc] = new_mgr.arrays[0] 

891 

892 def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: 

893 """ 

894 Insert item at selected position. 

895 

896 Parameters 

897 ---------- 

898 loc : int 

899 item : hashable 

900 value : np.ndarray or ExtensionArray 

901 """ 

902 # insert to the axis; this could possibly raise a TypeError 

903 new_axis = self.items.insert(loc, item) 

904 

905 value = extract_array(value, extract_numpy=True) 

906 if value.ndim == 2: 

907 if value.shape[0] == 1: 

908 # error: No overload variant of "__getitem__" of "ExtensionArray" 

909 # matches argument type "Tuple[int, slice]" 

910 value = value[0, :] # type: ignore[call-overload] 

911 else: 

912 raise ValueError( 

913 f"Expected a 1D array, got an array with shape {value.shape}" 

914 ) 

915 value = maybe_coerce_values(value) 

916 

917 # TODO self.arrays can be empty 

918 # assert len(value) == len(self.arrays[0]) 

919 

920 # TODO is this copy needed? 

921 arrays = self.arrays.copy() 

922 arrays.insert(loc, value) 

923 

924 self.arrays = arrays 

925 self._axes[1] = new_axis 

926 

927 def idelete(self, indexer) -> ArrayManager: 

928 """ 

929 Delete selected locations in-place (new block and array, same BlockManager) 

930 """ 

931 to_keep = np.ones(self.shape[0], dtype=np.bool_) 

932 to_keep[indexer] = False 

933 

934 self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]] 

935 self._axes = [self._axes[0], self._axes[1][to_keep]] 

936 return self 

937 

938 # -------------------------------------------------------------------- 

939 # Array-wise Operation 

940 

941 def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: 

942 """ 

943 Apply grouped reduction function columnwise, returning a new ArrayManager. 

944 

945 Parameters 

946 ---------- 

947 func : grouped reduction function 

948 ignore_failures : bool, default False 

949 Whether to drop columns where func raises TypeError. 

950 

951 Returns 

952 ------- 

953 ArrayManager 

954 """ 

955 result_arrays: list[np.ndarray] = [] 

956 result_indices: list[int] = [] 

957 

958 for i, arr in enumerate(self.arrays): 

959 # grouped_reduce functions all expect 2D arrays 

960 arr = ensure_block_shape(arr, ndim=2) 

961 try: 

962 res = func(arr) 

963 except (TypeError, NotImplementedError): 

964 if not ignore_failures: 

965 raise 

966 continue 

967 

968 if res.ndim == 2: 

969 # reverse of ensure_block_shape 

970 assert res.shape[0] == 1 

971 res = res[0] 

972 

973 result_arrays.append(res) 

974 result_indices.append(i) 

975 

976 if len(result_arrays) == 0: 

977 index = Index([None]) # placeholder 

978 else: 

979 index = Index(range(result_arrays[0].shape[0])) 

980 

981 if ignore_failures: 

982 columns = self.items[np.array(result_indices, dtype="int64")] 

983 else: 

984 columns = self.items 

985 

986 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; 

987 # expected "List[Union[ndarray, ExtensionArray]]" 

988 return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] 

989 

990 def reduce( 

991 self: T, func: Callable, ignore_failures: bool = False 

992 ) -> tuple[T, np.ndarray]: 

993 """ 

994 Apply reduction function column-wise, returning a single-row ArrayManager. 

995 

996 Parameters 

997 ---------- 

998 func : reduction function 

999 ignore_failures : bool, default False 

1000 Whether to drop columns where func raises TypeError. 

1001 

1002 Returns 

1003 ------- 

1004 ArrayManager 

1005 np.ndarray 

1006 Indexer of column indices that are retained. 

1007 """ 

1008 result_arrays: list[np.ndarray] = [] 

1009 result_indices: list[int] = [] 

1010 for i, arr in enumerate(self.arrays): 

1011 try: 

1012 res = func(arr, axis=0) 

1013 except TypeError: 

1014 if not ignore_failures: 

1015 raise 

1016 else: 

1017 # TODO NaT doesn't preserve dtype, so we need to ensure to create 

1018 # a timedelta result array if original was timedelta 

1019 # what if datetime results in timedelta? (eg std) 

1020 if res is NaT and is_timedelta64_ns_dtype(arr.dtype): 

1021 result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]")) 

1022 else: 

1023 # error: Argument 1 to "append" of "list" has incompatible type 

1024 # "ExtensionArray"; expected "ndarray" 

1025 result_arrays.append( 

1026 sanitize_array([res], None) # type: ignore[arg-type] 

1027 ) 

1028 result_indices.append(i) 

1029 

1030 index = Index._simple_new(np.array([None], dtype=object)) # placeholder 

1031 if ignore_failures: 

1032 indexer = np.array(result_indices) 

1033 columns = self.items[result_indices] 

1034 else: 

1035 indexer = np.arange(self.shape[0]) 

1036 columns = self.items 

1037 

1038 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; 

1039 # expected "List[Union[ndarray, ExtensionArray]]" 

1040 new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] 

1041 return new_mgr, indexer 

1042 

1043 def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: 

1044 """ 

1045 Apply array_op blockwise with another (aligned) BlockManager. 

1046 """ 

1047 # TODO what if `other` is BlockManager ? 

1048 left_arrays = self.arrays 

1049 right_arrays = other.arrays 

1050 result_arrays = [ 

1051 array_op(left, right) for left, right in zip(left_arrays, right_arrays) 

1052 ] 

1053 return type(self)(result_arrays, self._axes) 

1054 

1055 def quantile( 

1056 self, 

1057 *, 

1058 qs: Float64Index, 

1059 axis: int = 0, 

1060 transposed: bool = False, 

1061 interpolation="linear", 

1062 ) -> ArrayManager: 

1063 

1064 arrs = [ensure_block_shape(x, 2) for x in self.arrays] 

1065 assert axis == 1 

1066 new_arrs = [ 

1067 quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs 

1068 ] 

1069 for i, arr in enumerate(new_arrs): 

1070 if arr.ndim == 2: 

1071 assert arr.shape[0] == 1, arr.shape 

1072 new_arrs[i] = arr[0] 

1073 

1074 axes = [qs, self._axes[1]] 

1075 return type(self)(new_arrs, axes) 

1076 

1077 # ---------------------------------------------------------------- 

1078 

1079 def unstack(self, unstacker, fill_value) -> ArrayManager: 

1080 """ 

1081 Return a BlockManager with all blocks unstacked. 

1082 

1083 Parameters 

1084 ---------- 

1085 unstacker : reshape._Unstacker 

1086 fill_value : Any 

1087 fill_value for newly introduced missing values. 

1088 

1089 Returns 

1090 ------- 

1091 unstacked : BlockManager 

1092 """ 

1093 indexer, _ = unstacker._indexer_and_to_sort 

1094 if unstacker.mask.all(): 

1095 new_indexer = indexer 

1096 allow_fill = False 

1097 new_mask2D = None 

1098 needs_masking = None 

1099 else: 

1100 new_indexer = np.full(unstacker.mask.shape, -1) 

1101 new_indexer[unstacker.mask] = indexer 

1102 allow_fill = True 

1103 # calculating the full mask once and passing it to take_1d is faster 

1104 # than letting take_1d calculate it in each repeated call 

1105 new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) 

1106 needs_masking = new_mask2D.any(axis=0) 

1107 new_indexer2D = new_indexer.reshape(*unstacker.full_shape) 

1108 new_indexer2D = ensure_platform_int(new_indexer2D) 

1109 

1110 new_arrays = [] 

1111 for arr in self.arrays: 

1112 for i in range(unstacker.full_shape[1]): 

1113 if allow_fill: 

1114 # error: Value of type "Optional[Any]" is not indexable [index] 

1115 new_arr = take_1d( 

1116 arr, 

1117 new_indexer2D[:, i], 

1118 allow_fill=needs_masking[i], # type: ignore[index] 

1119 fill_value=fill_value, 

1120 mask=new_mask2D[:, i], # type: ignore[index] 

1121 ) 

1122 else: 

1123 new_arr = take_1d(arr, new_indexer2D[:, i], allow_fill=False) 

1124 new_arrays.append(new_arr) 

1125 

1126 new_index = unstacker.new_index 

1127 new_columns = unstacker.get_new_columns(self._axes[1]) 

1128 new_axes = [new_index, new_columns] 

1129 

1130 return type(self)(new_arrays, new_axes, verify_integrity=False) 

1131 

1132 def as_array( 

1133 self, 

1134 dtype=None, 

1135 copy: bool = False, 

1136 na_value: object = lib.no_default, 

1137 ) -> np.ndarray: 

1138 """ 

1139 Convert the blockmanager data into an numpy array. 

1140 

1141 Parameters 

1142 ---------- 

1143 dtype : object, default None 

1144 Data type of the return array. 

1145 copy : bool, default False 

1146 If True then guarantee that a copy is returned. A value of 

1147 False does not guarantee that the underlying data is not 

1148 copied. 

1149 na_value : object, default lib.no_default 

1150 Value to be used as the missing value sentinel. 

1151 

1152 Returns 

1153 ------- 

1154 arr : ndarray 

1155 """ 

1156 if len(self.arrays) == 0: 

1157 empty_arr = np.empty(self.shape, dtype=float) 

1158 return empty_arr.transpose() 

1159 

1160 # We want to copy when na_value is provided to avoid 

1161 # mutating the original object 

1162 copy = copy or na_value is not lib.no_default 

1163 

1164 if not dtype: 

1165 dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) 

1166 

1167 if isinstance(dtype, SparseDtype): 

1168 dtype = dtype.subtype 

1169 elif isinstance(dtype, PandasDtype): 

1170 dtype = dtype.numpy_dtype 

1171 elif is_extension_array_dtype(dtype): 

1172 dtype = "object" 

1173 elif is_dtype_equal(dtype, str): 

1174 dtype = "object" 

1175 

1176 result = np.empty(self.shape_proper, dtype=dtype) 

1177 

1178 for i, arr in enumerate(self.arrays): 

1179 arr = arr.astype(dtype, copy=copy) 

1180 result[:, i] = arr 

1181 

1182 if na_value is not lib.no_default: 

1183 result[isna(result)] = na_value 

1184 

1185 return result 

1186 

1187 

1188class SingleArrayManager(BaseArrayManager, SingleDataManager): 

1189 

1190 __slots__ = [ 

1191 "_axes", # private attribute, because 'axes' has different order, see below 

1192 "arrays", 

1193 ] 

1194 

1195 arrays: list[np.ndarray | ExtensionArray] 

1196 _axes: list[Index] 

1197 

1198 @property 

1199 def ndim(self) -> Literal[1]: 

1200 return 1 

1201 

1202 def __init__( 

1203 self, 

1204 arrays: list[np.ndarray | ExtensionArray], 

1205 axes: list[Index], 

1206 verify_integrity: bool = True, 

1207 ) -> None: 

1208 self._axes = axes 

1209 self.arrays = arrays 

1210 

1211 if verify_integrity: 

1212 assert len(axes) == 1 

1213 assert len(arrays) == 1 

1214 self._axes = [ensure_index(ax) for ax in self._axes] 

1215 arr = arrays[0] 

1216 arr = maybe_coerce_values(arr) 

1217 arr = extract_pandas_array(arr, None, 1)[0] 

1218 self.arrays = [arr] 

1219 self._verify_integrity() 

1220 

1221 def _verify_integrity(self) -> None: 

1222 (n_rows,) = self.shape 

1223 assert len(self.arrays) == 1 

1224 arr = self.arrays[0] 

1225 assert len(arr) == n_rows 

1226 if not arr.ndim == 1: 

1227 raise ValueError( 

1228 "Passed array should be 1-dimensional, got array with " 

1229 f"{arr.ndim} dimensions instead." 

1230 ) 

1231 

1232 @staticmethod 

1233 def _normalize_axis(axis): 

1234 return axis 

1235 

1236 def make_empty(self, axes=None) -> SingleArrayManager: 

1237 """Return an empty ArrayManager with index/array of length 0""" 

1238 if axes is None: 

1239 axes = [Index([], dtype=object)] 

1240 array: np.ndarray = np.array([], dtype=self.dtype) 

1241 return type(self)([array], axes) 

1242 

1243 @classmethod 

1244 def from_array(cls, array, index) -> SingleArrayManager: 

1245 return cls([array], [index]) 

1246 

1247 @property 

1248 def axes(self): 

1249 return self._axes 

1250 

1251 @property 

1252 def index(self) -> Index: 

1253 return self._axes[0] 

1254 

1255 @property 

1256 def dtype(self): 

1257 return self.array.dtype 

1258 

1259 def external_values(self): 

1260 """The array that Series.values returns""" 

1261 return external_values(self.array) 

1262 

1263 def internal_values(self): 

1264 """The array that Series._values returns""" 

1265 return self.array 

1266 

1267 def array_values(self): 

1268 """The array that Series.array returns""" 

1269 arr = self.array 

1270 if isinstance(arr, np.ndarray): 

1271 arr = PandasArray(arr) 

1272 return arr 

1273 

1274 @property 

1275 def _can_hold_na(self) -> bool: 

1276 if isinstance(self.array, np.ndarray): 

1277 return self.array.dtype.kind not in ["b", "i", "u"] 

1278 else: 

1279 # ExtensionArray 

1280 return self.array._can_hold_na 

1281 

1282 @property 

1283 def is_single_block(self) -> bool: 

1284 return True 

1285 

1286 def fast_xs(self, loc: int) -> SingleArrayManager: 

1287 raise NotImplementedError("Use series._values[loc] instead") 

1288 

1289 def get_slice(self, slobj: slice, axis: int = 0) -> SingleArrayManager: 

1290 if axis >= self.ndim: 

1291 raise IndexError("Requested axis not found in manager") 

1292 

1293 new_array = self.array[slobj] 

1294 new_index = self.index._getitem_slice(slobj) 

1295 return type(self)([new_array], [new_index], verify_integrity=False) 

1296 

1297 def getitem_mgr(self, indexer) -> SingleArrayManager: 

1298 new_array = self.array[indexer] 

1299 new_index = self.index[indexer] 

1300 return type(self)([new_array], [new_index]) 

1301 

1302 def apply(self, func, **kwargs): 

1303 if callable(func): 

1304 new_array = func(self.array, **kwargs) 

1305 else: 

1306 new_array = getattr(self.array, func)(**kwargs) 

1307 return type(self)([new_array], self._axes) 

1308 

1309 def setitem(self, indexer, value) -> SingleArrayManager: 

1310 """ 

1311 Set values with indexer. 

1312 

1313 For SingleArrayManager, this backs s[indexer] = value 

1314 

1315 See `setitem_inplace` for a version that works inplace and doesn't 

1316 return a new Manager. 

1317 """ 

1318 if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: 

1319 raise ValueError(f"Cannot set values with ndim > {self.ndim}") 

1320 return self.apply_with_block("setitem", indexer=indexer, value=value) 

1321 

1322 def idelete(self, indexer) -> SingleArrayManager: 

1323 """ 

1324 Delete selected locations in-place (new array, same ArrayManager) 

1325 """ 

1326 to_keep = np.ones(self.shape[0], dtype=np.bool_) 

1327 to_keep[indexer] = False 

1328 

1329 self.arrays = [self.arrays[0][to_keep]] 

1330 self._axes = [self._axes[0][to_keep]] 

1331 return self 

1332 

1333 def _get_data_subset(self, predicate: Callable) -> SingleArrayManager: 

1334 # used in get_numeric_data / get_bool_data 

1335 if predicate(self.array): 

1336 return type(self)(self.arrays, self._axes, verify_integrity=False) 

1337 else: 

1338 return self.make_empty() 

1339 

1340 def set_values(self, values: ArrayLike) -> None: 

1341 """ 

1342 Set (replace) the values of the SingleArrayManager in place. 

1343 

1344 Use at your own risk! This does not check if the passed values are 

1345 valid for the current SingleArrayManager (length, dtype, etc). 

1346 """ 

1347 self.arrays[0] = values 

1348 

1349 def to_2d_mgr(self, columns: Index) -> ArrayManager: 

1350 """ 

1351 Manager analogue of Series.to_frame 

1352 """ 

1353 arrays = [self.arrays[0]] 

1354 axes = [self.axes[0], columns] 

1355 

1356 return ArrayManager(arrays, axes, verify_integrity=False) 

1357 

1358 

1359class NullArrayProxy: 

1360 """ 

1361 Proxy object for an all-NA array. 

1362 

1363 Only stores the length of the array, and not the dtype. The dtype 

1364 will only be known when actually concatenating (after determining the 

1365 common dtype, for which this proxy is ignored). 

1366 Using this object avoids that the internals/concat.py needs to determine 

1367 the proper dtype and array type. 

1368 """ 

1369 

1370 ndim = 1 

1371 

1372 def __init__(self, n: int) -> None: 

1373 self.n = n 

1374 

1375 @property 

1376 def shape(self) -> tuple[int]: 

1377 return (self.n,) 

1378 

1379 def to_array(self, dtype: DtypeObj) -> ArrayLike: 

1380 """ 

1381 Helper function to create the actual all-NA array from the NullArrayProxy 

1382 object. 

1383 

1384 Parameters 

1385 ---------- 

1386 arr : NullArrayProxy 

1387 dtype : the dtype for the resulting array 

1388 

1389 Returns 

1390 ------- 

1391 np.ndarray or ExtensionArray 

1392 """ 

1393 if isinstance(dtype, ExtensionDtype): 

1394 empty = dtype.construct_array_type()._from_sequence([], dtype=dtype) 

1395 indexer = -np.ones(self.n, dtype=np.intp) 

1396 return empty.take(indexer, allow_fill=True) 

1397 else: 

1398 # when introducing missing values, int becomes float, bool becomes object 

1399 dtype = ensure_dtype_can_hold_na(dtype) 

1400 fill_value = na_value_for_dtype(dtype) 

1401 arr = np.empty(self.n, dtype=dtype) 

1402 arr.fill(fill_value) 

1403 return ensure_wrapped_if_datetimelike(arr)