Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/internals/managers.py: 13%

1014 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import itertools 

4from typing import ( 

5 Any, 

6 Callable, 

7 Hashable, 

8 Literal, 

9 Sequence, 

10 TypeVar, 

11 cast, 

12) 

13import warnings 

14import weakref 

15 

16import numpy as np 

17 

18from pandas._config import get_option 

19 

20from pandas._libs import ( 

21 algos as libalgos, 

22 internals as libinternals, 

23 lib, 

24) 

25from pandas._libs.internals import BlockPlacement 

26from pandas._typing import ( 

27 ArrayLike, 

28 DtypeObj, 

29 Shape, 

30 npt, 

31 type_t, 

32) 

33from pandas.errors import PerformanceWarning 

34from pandas.util._decorators import cache_readonly 

35from pandas.util._exceptions import find_stack_level 

36from pandas.util._validators import validate_bool_kwarg 

37 

38from pandas.core.dtypes.cast import infer_dtype_from_scalar 

39from pandas.core.dtypes.common import ( 

40 ensure_platform_int, 

41 is_1d_only_ea_dtype, 

42 is_dtype_equal, 

43 is_list_like, 

44) 

45from pandas.core.dtypes.dtypes import ExtensionDtype 

46from pandas.core.dtypes.generic import ( 

47 ABCDataFrame, 

48 ABCSeries, 

49) 

50from pandas.core.dtypes.missing import ( 

51 array_equals, 

52 isna, 

53) 

54 

55import pandas.core.algorithms as algos 

56from pandas.core.arrays._mixins import NDArrayBackedExtensionArray 

57from pandas.core.arrays.sparse import SparseDtype 

58import pandas.core.common as com 

59from pandas.core.construction import ( 

60 ensure_wrapped_if_datetimelike, 

61 extract_array, 

62) 

63from pandas.core.indexers import maybe_convert_indices 

64from pandas.core.indexes.api import ( 

65 Float64Index, 

66 Index, 

67 ensure_index, 

68) 

69from pandas.core.internals.base import ( 

70 DataManager, 

71 SingleDataManager, 

72 interleaved_dtype, 

73) 

74from pandas.core.internals.blocks import ( 

75 Block, 

76 DatetimeTZBlock, 

77 NumpyBlock, 

78 ensure_block_shape, 

79 extend_blocks, 

80 get_block_type, 

81 new_block, 

82 new_block_2d, 

83) 

84from pandas.core.internals.ops import ( 

85 blockwise_all, 

86 operate_blockwise, 

87) 

88 

89T = TypeVar("T", bound="BaseBlockManager") 

90 

91 

92class BaseBlockManager(DataManager): 

93 """ 

94 Core internal data structure to implement DataFrame, Series, etc. 

95 

96 Manage a bunch of labeled 2D mixed-type ndarrays. Essentially it's a 

97 lightweight blocked set of labeled data to be manipulated by the DataFrame 

98 public API class 

99 

100 Attributes 

101 ---------- 

102 shape 

103 ndim 

104 axes 

105 values 

106 items 

107 

108 Methods 

109 ------- 

110 set_axis(axis, new_labels) 

111 copy(deep=True) 

112 

113 get_dtypes 

114 

115 apply(func, axes, block_filter_fn) 

116 

117 get_bool_data 

118 get_numeric_data 

119 

120 get_slice(slice_like, axis) 

121 get(label) 

122 iget(loc) 

123 

124 take(indexer, axis) 

125 reindex_axis(new_labels, axis) 

126 reindex_indexer(new_labels, indexer, axis) 

127 

128 delete(label) 

129 insert(loc, label, value) 

130 set(label, value) 

131 

132 Parameters 

133 ---------- 

134 blocks: Sequence of Block 

135 axes: Sequence of Index 

136 verify_integrity: bool, default True 

137 

138 Notes 

139 ----- 

140 This is *not* a public API class 

141 """ 

142 

143 __slots__ = () 

144 

145 _blknos: npt.NDArray[np.intp] 

146 _blklocs: npt.NDArray[np.intp] 

147 blocks: tuple[Block, ...] 

148 axes: list[Index] 

149 refs: list[weakref.ref | None] | None 

150 parent: object 

151 

152 @property 

153 def ndim(self) -> int: 

154 raise NotImplementedError 

155 

156 _known_consolidated: bool 

157 _is_consolidated: bool 

158 

159 def __init__(self, blocks, axes, refs=None, verify_integrity: bool = True) -> None: 

160 raise NotImplementedError 

161 

162 @classmethod 

163 def from_blocks( 

164 cls: type_t[T], 

165 blocks: list[Block], 

166 axes: list[Index], 

167 refs: list[weakref.ref | None] | None = None, 

168 parent: object = None, 

169 ) -> T: 

170 raise NotImplementedError 

171 

172 @property 

173 def blknos(self) -> npt.NDArray[np.intp]: 

174 """ 

175 Suppose we want to find the array corresponding to our i'th column. 

176 

177 blknos[i] identifies the block from self.blocks that contains this column. 

178 

179 blklocs[i] identifies the column of interest within 

180 self.blocks[self.blknos[i]] 

181 """ 

182 if self._blknos is None: 

183 # Note: these can be altered by other BlockManager methods. 

184 self._rebuild_blknos_and_blklocs() 

185 

186 return self._blknos 

187 

188 @property 

189 def blklocs(self) -> npt.NDArray[np.intp]: 

190 """ 

191 See blknos.__doc__ 

192 """ 

193 if self._blklocs is None: 

194 # Note: these can be altered by other BlockManager methods. 

195 self._rebuild_blknos_and_blklocs() 

196 

197 return self._blklocs 

198 

199 def make_empty(self: T, axes=None) -> T: 

200 """return an empty BlockManager with the items axis of len 0""" 

201 if axes is None: 

202 axes = [Index([])] + self.axes[1:] 

203 

204 # preserve dtype if possible 

205 if self.ndim == 1: 

206 assert isinstance(self, SingleBlockManager) # for mypy 

207 blk = self.blocks[0] 

208 arr = blk.values[:0] 

209 bp = BlockPlacement(slice(0, 0)) 

210 nb = blk.make_block_same_class(arr, placement=bp) 

211 blocks = [nb] 

212 else: 

213 blocks = [] 

214 return type(self).from_blocks(blocks, axes) 

215 

216 def __nonzero__(self) -> bool: 

217 return True 

218 

219 # Python3 compat 

220 __bool__ = __nonzero__ 

221 

222 def _normalize_axis(self, axis: int) -> int: 

223 # switch axis to follow BlockManager logic 

224 if self.ndim == 2: 

225 axis = 1 if axis == 0 else 0 

226 return axis 

227 

228 def set_axis(self, axis: int, new_labels: Index) -> None: 

229 # Caller is responsible for ensuring we have an Index object. 

230 self._validate_set_axis(axis, new_labels) 

231 self.axes[axis] = new_labels 

232 

233 @property 

234 def is_single_block(self) -> bool: 

235 # Assumes we are 2D; overridden by SingleBlockManager 

236 return len(self.blocks) == 1 

237 

238 @property 

239 def items(self) -> Index: 

240 return self.axes[0] 

241 

242 def _has_no_reference(self, i: int) -> bool: 

243 """ 

244 Check for column `i` if it has references. 

245 (whether it references another array or is itself being referenced) 

246 Returns True if the column has no references. 

247 """ 

248 blkno = self.blknos[i] 

249 return self._has_no_reference_block(blkno) 

250 

251 def _has_no_reference_block(self, blkno: int) -> bool: 

252 """ 

253 Check for block `i` if it has references. 

254 (whether it references another array or is itself being referenced) 

255 Returns True if the block has no references. 

256 """ 

257 # TODO(CoW) include `or self.refs[blkno]() is None` ? 

258 return ( 

259 self.refs is None or self.refs[blkno] is None 

260 ) and weakref.getweakrefcount(self.blocks[blkno]) == 0 

261 

262 def _clear_reference_block(self, blkno: int) -> None: 

263 """ 

264 Clear any reference for column `i`. 

265 """ 

266 if self.refs is not None: 

267 self.refs[blkno] = None 

268 if com.all_none(*self.refs): 

269 self.parent = None 

270 

271 def get_dtypes(self): 

272 dtypes = np.array([blk.dtype for blk in self.blocks]) 

273 return dtypes.take(self.blknos) 

274 

275 @property 

276 def arrays(self) -> list[ArrayLike]: 

277 """ 

278 Quick access to the backing arrays of the Blocks. 

279 

280 Only for compatibility with ArrayManager for testing convenience. 

281 Not to be used in actual code, and return value is not the same as the 

282 ArrayManager method (list of 1D arrays vs iterator of 2D ndarrays / 1D EAs). 

283 

284 Warning! The returned arrays don't handle Copy-on-Write, so this should 

285 be used with caution (only in read-mode). 

286 """ 

287 return [blk.values for blk in self.blocks] 

288 

289 def __repr__(self) -> str: 

290 output = type(self).__name__ 

291 for i, ax in enumerate(self.axes): 

292 if i == 0: 

293 output += f"\nItems: {ax}" 

294 else: 

295 output += f"\nAxis {i}: {ax}" 

296 

297 for block in self.blocks: 

298 output += f"\n{block}" 

299 return output 

300 

301 def apply( 

302 self: T, 

303 f, 

304 align_keys: list[str] | None = None, 

305 ignore_failures: bool = False, 

306 **kwargs, 

307 ) -> T: 

308 """ 

309 Iterate over the blocks, collect and create a new BlockManager. 

310 

311 Parameters 

312 ---------- 

313 f : str or callable 

314 Name of the Block method to apply. 

315 align_keys: List[str] or None, default None 

316 ignore_failures: bool, default False 

317 **kwargs 

318 Keywords to pass to `f` 

319 

320 Returns 

321 ------- 

322 BlockManager 

323 """ 

324 assert "filter" not in kwargs 

325 

326 align_keys = align_keys or [] 

327 result_blocks: list[Block] = [] 

328 # fillna: Series/DataFrame is responsible for making sure value is aligned 

329 

330 aligned_args = {k: kwargs[k] for k in align_keys} 

331 

332 for b in self.blocks: 

333 

334 if aligned_args: 

335 

336 for k, obj in aligned_args.items(): 

337 if isinstance(obj, (ABCSeries, ABCDataFrame)): 

338 # The caller is responsible for ensuring that 

339 # obj.axes[-1].equals(self.items) 

340 if obj.ndim == 1: 

341 kwargs[k] = obj.iloc[b.mgr_locs.indexer]._values 

342 else: 

343 kwargs[k] = obj.iloc[:, b.mgr_locs.indexer]._values 

344 else: 

345 # otherwise we have an ndarray 

346 kwargs[k] = obj[b.mgr_locs.indexer] 

347 

348 try: 

349 if callable(f): 

350 applied = b.apply(f, **kwargs) 

351 else: 

352 applied = getattr(b, f)(**kwargs) 

353 except (TypeError, NotImplementedError): 

354 if not ignore_failures: 

355 raise 

356 continue 

357 result_blocks = extend_blocks(applied, result_blocks) 

358 

359 if ignore_failures: 

360 return self._combine(result_blocks) 

361 

362 out = type(self).from_blocks(result_blocks, self.axes) 

363 return out 

364 

365 def where(self: T, other, cond, align: bool) -> T: 

366 if align: 

367 align_keys = ["other", "cond"] 

368 else: 

369 align_keys = ["cond"] 

370 other = extract_array(other, extract_numpy=True) 

371 

372 return self.apply( 

373 "where", 

374 align_keys=align_keys, 

375 other=other, 

376 cond=cond, 

377 ) 

378 

379 def setitem(self: T, indexer, value) -> T: 

380 """ 

381 Set values with indexer. 

382 

383 For SingleBlockManager, this backs s[indexer] = value 

384 """ 

385 if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: 

386 raise ValueError(f"Cannot set values with ndim > {self.ndim}") 

387 

388 if _using_copy_on_write() and not self._has_no_reference(0): 

389 # if being referenced -> perform Copy-on-Write and clear the reference 

390 # this method is only called if there is a single block -> hardcoded 0 

391 self = self.copy() 

392 

393 return self.apply("setitem", indexer=indexer, value=value) 

394 

395 def putmask(self, mask, new, align: bool = True): 

396 if ( 

397 _using_copy_on_write() 

398 and self.refs is not None 

399 and not all(ref is None for ref in self.refs) 

400 ): 

401 # some reference -> copy full dataframe 

402 # TODO(CoW) this could be optimized to only copy the blocks that would 

403 # get modified 

404 self = self.copy() 

405 

406 if align: 

407 align_keys = ["new", "mask"] 

408 else: 

409 align_keys = ["mask"] 

410 new = extract_array(new, extract_numpy=True) 

411 

412 return self.apply( 

413 "putmask", 

414 align_keys=align_keys, 

415 mask=mask, 

416 new=new, 

417 ) 

418 

419 def diff(self: T, n: int, axis: int) -> T: 

420 axis = self._normalize_axis(axis) 

421 return self.apply("diff", n=n, axis=axis) 

422 

423 def interpolate(self: T, **kwargs) -> T: 

424 return self.apply("interpolate", **kwargs) 

425 

426 def shift(self: T, periods: int, axis: int, fill_value) -> T: 

427 axis = self._normalize_axis(axis) 

428 if fill_value is lib.no_default: 

429 fill_value = None 

430 

431 return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) 

432 

433 def fillna(self: T, value, limit, inplace: bool, downcast) -> T: 

434 

435 if limit is not None: 

436 # Do this validation even if we go through one of the no-op paths 

437 limit = libalgos.validate_limit(None, limit=limit) 

438 if inplace: 

439 # TODO(CoW) can be optimized to only copy those blocks that have refs 

440 if _using_copy_on_write() and any( 

441 not self._has_no_reference_block(i) for i in range(len(self.blocks)) 

442 ): 

443 self = self.copy() 

444 

445 return self.apply( 

446 "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast 

447 ) 

448 

449 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: 

450 return self.apply("astype", dtype=dtype, copy=copy, errors=errors) 

451 

452 def convert( 

453 self: T, 

454 copy: bool = True, 

455 datetime: bool = True, 

456 numeric: bool = True, 

457 timedelta: bool = True, 

458 ) -> T: 

459 return self.apply( 

460 "convert", 

461 copy=copy, 

462 datetime=datetime, 

463 numeric=numeric, 

464 timedelta=timedelta, 

465 ) 

466 

467 def replace(self: T, to_replace, value, inplace: bool) -> T: 

468 inplace = validate_bool_kwarg(inplace, "inplace") 

469 # NDFrame.replace ensures the not-is_list_likes here 

470 assert not is_list_like(to_replace) 

471 assert not is_list_like(value) 

472 return self.apply( 

473 "replace", to_replace=to_replace, value=value, inplace=inplace 

474 ) 

475 

476 def replace_regex(self, **kwargs): 

477 return self.apply("_replace_regex", **kwargs) 

478 

479 def replace_list( 

480 self: T, 

481 src_list: list[Any], 

482 dest_list: list[Any], 

483 inplace: bool = False, 

484 regex: bool = False, 

485 ) -> T: 

486 """do a list replace""" 

487 inplace = validate_bool_kwarg(inplace, "inplace") 

488 

489 bm = self.apply( 

490 "replace_list", 

491 src_list=src_list, 

492 dest_list=dest_list, 

493 inplace=inplace, 

494 regex=regex, 

495 ) 

496 bm._consolidate_inplace() 

497 return bm 

498 

499 def to_native_types(self: T, **kwargs) -> T: 

500 """ 

501 Convert values to native types (strings / python objects) that are used 

502 in formatting (repr / csv). 

503 """ 

504 return self.apply("to_native_types", **kwargs) 

505 

506 @property 

507 def is_numeric_mixed_type(self) -> bool: 

508 return all(block.is_numeric for block in self.blocks) 

509 

510 @property 

511 def any_extension_types(self) -> bool: 

512 """Whether any of the blocks in this manager are extension blocks""" 

513 return any(block.is_extension for block in self.blocks) 

514 

515 @property 

516 def is_view(self) -> bool: 

517 """return a boolean if we are a single block and are a view""" 

518 if len(self.blocks) == 1: 

519 return self.blocks[0].is_view 

520 

521 # It is technically possible to figure out which blocks are views 

522 # e.g. [ b.values.base is not None for b in self.blocks ] 

523 # but then we have the case of possibly some blocks being a view 

524 # and some blocks not. setting in theory is possible on the non-view 

525 # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit 

526 # complicated 

527 

528 return False 

529 

530 def _get_data_subset(self: T, predicate: Callable) -> T: 

531 blocks = [blk for blk in self.blocks if predicate(blk.values)] 

532 return self._combine(blocks, copy=False) 

533 

534 def get_bool_data(self: T, copy: bool = False) -> T: 

535 """ 

536 Select blocks that are bool-dtype and columns from object-dtype blocks 

537 that are all-bool. 

538 

539 Parameters 

540 ---------- 

541 copy : bool, default False 

542 Whether to copy the blocks 

543 """ 

544 

545 new_blocks = [] 

546 

547 for blk in self.blocks: 

548 if blk.dtype == bool: 

549 new_blocks.append(blk) 

550 

551 elif blk.is_object: 

552 nbs = blk._split() 

553 for nb in nbs: 

554 if nb.is_bool: 

555 new_blocks.append(nb) 

556 

557 return self._combine(new_blocks, copy) 

558 

559 def get_numeric_data(self: T, copy: bool = False) -> T: 

560 """ 

561 Parameters 

562 ---------- 

563 copy : bool, default False 

564 Whether to copy the blocks 

565 """ 

566 numeric_blocks = [blk for blk in self.blocks if blk.is_numeric] 

567 if len(numeric_blocks) == len(self.blocks): 

568 # Avoid somewhat expensive _combine 

569 if copy: 

570 return self.copy(deep=True) 

571 return self 

572 return self._combine(numeric_blocks, copy) 

573 

574 def _combine( 

575 self: T, blocks: list[Block], copy: bool = True, index: Index | None = None 

576 ) -> T: 

577 """return a new manager with the blocks""" 

578 if len(blocks) == 0: 

579 if self.ndim == 2: 

580 # retain our own Index dtype 

581 if index is not None: 

582 axes = [self.items[:0], index] 

583 else: 

584 axes = [self.items[:0]] + self.axes[1:] 

585 return self.make_empty(axes) 

586 return self.make_empty() 

587 

588 # FIXME: optimization potential 

589 indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) 

590 inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) 

591 

592 new_blocks: list[Block] = [] 

593 # TODO(CoW) we could optimize here if we know that the passed blocks 

594 # are fully "owned" (eg created from an operation, not coming from 

595 # an existing manager) 

596 new_refs: list[weakref.ref | None] | None = None if copy else [] 

597 for b in blocks: 

598 nb = b.copy(deep=copy) 

599 nb.mgr_locs = BlockPlacement(inv_indexer[nb.mgr_locs.indexer]) 

600 new_blocks.append(nb) 

601 if not copy: 

602 # None has no attribute "append" 

603 new_refs.append(weakref.ref(b)) # type: ignore[union-attr] 

604 

605 axes = list(self.axes) 

606 if index is not None: 

607 axes[-1] = index 

608 axes[0] = self.items.take(indexer) 

609 

610 return type(self).from_blocks( 

611 new_blocks, axes, new_refs, parent=None if copy else self 

612 ) 

613 

614 @property 

615 def nblocks(self) -> int: 

616 return len(self.blocks) 

617 

618 def copy(self: T, deep=True) -> T: 

619 """ 

620 Make deep or shallow copy of BlockManager 

621 

622 Parameters 

623 ---------- 

624 deep : bool, string or None, default True 

625 If False or None, return a shallow copy (do not copy data) 

626 If 'all', copy data and a deep copy of the index 

627 

628 Returns 

629 ------- 

630 BlockManager 

631 """ 

632 if deep is None: 

633 if _using_copy_on_write(): 

634 # use shallow copy 

635 deep = False 

636 else: 

637 # preserve deep copy for BlockManager with copy=None 

638 deep = True 

639 

640 # this preserves the notion of view copying of axes 

641 if deep: 

642 # hit in e.g. tests.io.json.test_pandas 

643 

644 def copy_func(ax): 

645 return ax.copy(deep=True) if deep == "all" else ax.view() 

646 

647 new_axes = [copy_func(ax) for ax in self.axes] 

648 else: 

649 new_axes = list(self.axes) 

650 

651 res = self.apply("copy", deep=deep) 

652 new_refs: list[weakref.ref | None] | None 

653 if deep: 

654 new_refs = None 

655 parent = None 

656 else: 

657 new_refs = [weakref.ref(blk) for blk in self.blocks] 

658 parent = self 

659 

660 res.axes = new_axes 

661 res.refs = new_refs 

662 res.parent = parent 

663 

664 if self.ndim > 1: 

665 # Avoid needing to re-compute these 

666 blknos = self._blknos 

667 if blknos is not None: 

668 res._blknos = blknos.copy() 

669 res._blklocs = self._blklocs.copy() 

670 

671 if deep: 

672 res._consolidate_inplace() 

673 return res 

674 

675 def consolidate(self: T) -> T: 

676 """ 

677 Join together blocks having same dtype 

678 

679 Returns 

680 ------- 

681 y : BlockManager 

682 """ 

683 if self.is_consolidated(): 

684 return self 

685 

686 bm = type(self)(self.blocks, self.axes, self.refs, verify_integrity=False) 

687 bm._is_consolidated = False 

688 bm._consolidate_inplace() 

689 return bm 

690 

691 def reindex_indexer( 

692 self: T, 

693 new_axis: Index, 

694 indexer: npt.NDArray[np.intp] | None, 

695 axis: int, 

696 fill_value=None, 

697 allow_dups: bool = False, 

698 copy: bool | None = True, 

699 only_slice: bool = False, 

700 *, 

701 use_na_proxy: bool = False, 

702 ) -> T: 

703 """ 

704 Parameters 

705 ---------- 

706 new_axis : Index 

707 indexer : ndarray[intp] or None 

708 axis : int 

709 fill_value : object, default None 

710 allow_dups : bool, default False 

711 copy : bool or None, default True 

712 If None, regard as False to get shallow copy. 

713 only_slice : bool, default False 

714 Whether to take views, not copies, along columns. 

715 use_na_proxy : bool, default False 

716 Whether to use a np.void ndarray for newly introduced columns. 

717 

718 pandas-indexer with -1's only. 

719 """ 

720 if copy is None: 

721 if _using_copy_on_write(): 

722 # use shallow copy 

723 copy = False 

724 else: 

725 # preserve deep copy for BlockManager with copy=None 

726 copy = True 

727 

728 if indexer is None: 

729 if new_axis is self.axes[axis] and not copy: 

730 return self 

731 

732 result = self.copy(deep=copy) 

733 result.axes = list(self.axes) 

734 result.axes[axis] = new_axis 

735 return result 

736 

737 # some axes don't allow reindexing with dups 

738 if not allow_dups: 

739 self.axes[axis]._validate_can_reindex(indexer) 

740 

741 if axis >= self.ndim: 

742 raise IndexError("Requested axis not found in manager") 

743 

744 if axis == 0: 

745 new_blocks, new_refs = self._slice_take_blocks_ax0( 

746 indexer, 

747 fill_value=fill_value, 

748 only_slice=only_slice, 

749 use_na_proxy=use_na_proxy, 

750 ) 

751 parent = None if com.all_none(*new_refs) else self 

752 else: 

753 new_blocks = [ 

754 blk.take_nd( 

755 indexer, 

756 axis=1, 

757 fill_value=( 

758 fill_value if fill_value is not None else blk.fill_value 

759 ), 

760 ) 

761 for blk in self.blocks 

762 ] 

763 new_refs = None 

764 parent = None 

765 

766 new_axes = list(self.axes) 

767 new_axes[axis] = new_axis 

768 

769 new_mgr = type(self).from_blocks(new_blocks, new_axes, new_refs, parent=parent) 

770 if axis == 1: 

771 # We can avoid the need to rebuild these 

772 new_mgr._blknos = self.blknos.copy() 

773 new_mgr._blklocs = self.blklocs.copy() 

774 return new_mgr 

775 

776 def _slice_take_blocks_ax0( 

777 self, 

778 slice_or_indexer: slice | np.ndarray, 

779 fill_value=lib.no_default, 

780 only_slice: bool = False, 

781 *, 

782 use_na_proxy: bool = False, 

783 ) -> tuple[list[Block], list[weakref.ref | None]]: 

784 """ 

785 Slice/take blocks along axis=0. 

786 

787 Overloaded for SingleBlock 

788 

789 Parameters 

790 ---------- 

791 slice_or_indexer : slice or np.ndarray[int64] 

792 fill_value : scalar, default lib.no_default 

793 only_slice : bool, default False 

794 If True, we always return views on existing arrays, never copies. 

795 This is used when called from ops.blockwise.operate_blockwise. 

796 use_na_proxy : bool, default False 

797 Whether to use a np.void ndarray for newly introduced columns. 

798 

799 Returns 

800 ------- 

801 new_blocks : list of Block 

802 """ 

803 allow_fill = fill_value is not lib.no_default 

804 

805 sl_type, slobj, sllen = _preprocess_slice_or_indexer( 

806 slice_or_indexer, self.shape[0], allow_fill=allow_fill 

807 ) 

808 

809 if self.is_single_block: 

810 blk = self.blocks[0] 

811 

812 if sl_type == "slice": 

813 # GH#32959 EABlock would fail since we can't make 0-width 

814 # TODO(EA2D): special casing unnecessary with 2D EAs 

815 if sllen == 0: 

816 return [], [] 

817 bp = BlockPlacement(slice(0, sllen)) 

818 return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)], [ 

819 weakref.ref(blk) 

820 ] 

821 elif not allow_fill or self.ndim == 1: 

822 if allow_fill and fill_value is None: 

823 fill_value = blk.fill_value 

824 

825 if not allow_fill and only_slice: 

826 # GH#33597 slice instead of take, so we get 

827 # views instead of copies 

828 blocks = [ 

829 blk.getitem_block_columns( 

830 slice(ml, ml + 1), new_mgr_locs=BlockPlacement(i) 

831 ) 

832 for i, ml in enumerate(slobj) 

833 ] 

834 # We have 

835 # all(np.shares_memory(nb.values, blk.values) for nb in blocks) 

836 return blocks, [weakref.ref(blk)] * len(blocks) 

837 else: 

838 bp = BlockPlacement(slice(0, sllen)) 

839 return [ 

840 blk.take_nd( 

841 slobj, 

842 axis=0, 

843 new_mgr_locs=bp, 

844 fill_value=fill_value, 

845 ) 

846 ], [None] 

847 

848 if sl_type == "slice": 

849 blknos = self.blknos[slobj] 

850 blklocs = self.blklocs[slobj] 

851 else: 

852 blknos = algos.take_nd( 

853 self.blknos, slobj, fill_value=-1, allow_fill=allow_fill 

854 ) 

855 blklocs = algos.take_nd( 

856 self.blklocs, slobj, fill_value=-1, allow_fill=allow_fill 

857 ) 

858 

859 # When filling blknos, make sure blknos is updated before appending to 

860 # blocks list, that way new blkno is exactly len(blocks). 

861 blocks = [] 

862 refs: list[weakref.ref | None] = [] 

863 group = not only_slice 

864 for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=group): 

865 if blkno == -1: 

866 # If we've got here, fill_value was not lib.no_default 

867 

868 blocks.append( 

869 self._make_na_block( 

870 placement=mgr_locs, 

871 fill_value=fill_value, 

872 use_na_proxy=use_na_proxy, 

873 ) 

874 ) 

875 refs.append(None) 

876 else: 

877 blk = self.blocks[blkno] 

878 

879 # Otherwise, slicing along items axis is necessary. 

880 if not blk._can_consolidate and not blk._validate_ndim: 

881 # i.e. we dont go through here for DatetimeTZBlock 

882 # A non-consolidatable block, it's easy, because there's 

883 # only one item and each mgr loc is a copy of that single 

884 # item. 

885 for mgr_loc in mgr_locs: 

886 newblk = blk.copy(deep=False) 

887 newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1)) 

888 blocks.append(newblk) 

889 refs.append(weakref.ref(blk)) 

890 

891 else: 

892 # GH#32779 to avoid the performance penalty of copying, 

893 # we may try to only slice 

894 taker = blklocs[mgr_locs.indexer] 

895 max_len = max(len(mgr_locs), taker.max() + 1) 

896 if only_slice or _using_copy_on_write(): 

897 taker = lib.maybe_indices_to_slice(taker, max_len) 

898 

899 if isinstance(taker, slice): 

900 nb = blk.getitem_block_columns(taker, new_mgr_locs=mgr_locs) 

901 blocks.append(nb) 

902 refs.append(weakref.ref(blk)) 

903 elif only_slice: 

904 # GH#33597 slice instead of take, so we get 

905 # views instead of copies 

906 for i, ml in zip(taker, mgr_locs): 

907 slc = slice(i, i + 1) 

908 bp = BlockPlacement(ml) 

909 nb = blk.getitem_block_columns(slc, new_mgr_locs=bp) 

910 # We have np.shares_memory(nb.values, blk.values) 

911 blocks.append(nb) 

912 refs.append(weakref.ref(blk)) 

913 else: 

914 nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs) 

915 blocks.append(nb) 

916 refs.append(None) 

917 

918 return blocks, refs 

919 

920 def _make_na_block( 

921 self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False 

922 ) -> Block: 

923 # Note: we only get here with self.ndim == 2 

924 

925 if use_na_proxy: 

926 assert fill_value is None 

927 shape = (len(placement), self.shape[1]) 

928 vals = np.empty(shape, dtype=np.void) 

929 nb = NumpyBlock(vals, placement, ndim=2) 

930 return nb 

931 

932 if fill_value is None: 

933 fill_value = np.nan 

934 block_shape = list(self.shape) 

935 block_shape[0] = len(placement) 

936 

937 dtype, fill_value = infer_dtype_from_scalar(fill_value) 

938 # error: Argument "dtype" to "empty" has incompatible type "Union[dtype, 

939 # ExtensionDtype]"; expected "Union[dtype, None, type, _SupportsDtype, str, 

940 # Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict, 

941 # Tuple[Any, Any]]" 

942 block_values = np.empty(block_shape, dtype=dtype) # type: ignore[arg-type] 

943 block_values.fill(fill_value) 

944 return new_block_2d(block_values, placement=placement) 

945 

946 def take( 

947 self: T, 

948 indexer, 

949 axis: int = 1, 

950 verify: bool = True, 

951 convert_indices: bool = True, 

952 ) -> T: 

953 """ 

954 Take items along any axis. 

955 

956 indexer : np.ndarray or slice 

957 axis : int, default 1 

958 verify : bool, default True 

959 Check that all entries are between 0 and len(self) - 1, inclusive. 

960 Pass verify=False if this check has been done by the caller. 

961 convert_indices : bool, default True 

962 Whether to attempt to convert indices to positive values. 

963 

964 Returns 

965 ------- 

966 BlockManager 

967 """ 

968 # We have 6 tests that get here with a slice 

969 indexer = ( 

970 np.arange(indexer.start, indexer.stop, indexer.step, dtype=np.intp) 

971 if isinstance(indexer, slice) 

972 else np.asanyarray(indexer, dtype=np.intp) 

973 ) 

974 

975 n = self.shape[axis] 

976 if convert_indices: 

977 indexer = maybe_convert_indices(indexer, n, verify=verify) 

978 

979 new_labels = self.axes[axis].take(indexer) 

980 return self.reindex_indexer( 

981 new_axis=new_labels, 

982 indexer=indexer, 

983 axis=axis, 

984 allow_dups=True, 

985 copy=None, 

986 ) 

987 

988 

989class BlockManager(libinternals.BlockManager, BaseBlockManager): 

990 """ 

991 BaseBlockManager that holds 2D blocks. 

992 """ 

993 

994 ndim = 2 

995 

996 # ---------------------------------------------------------------- 

997 # Constructors 

998 

999 def __init__( 

1000 self, 

1001 blocks: Sequence[Block], 

1002 axes: Sequence[Index], 

1003 refs: list[weakref.ref | None] | None = None, 

1004 parent: object = None, 

1005 verify_integrity: bool = True, 

1006 ) -> None: 

1007 

1008 if verify_integrity: 

1009 # Assertion disabled for performance 

1010 # assert all(isinstance(x, Index) for x in axes) 

1011 

1012 for block in blocks: 

1013 if self.ndim != block.ndim: 

1014 raise AssertionError( 

1015 f"Number of Block dimensions ({block.ndim}) must equal " 

1016 f"number of axes ({self.ndim})" 

1017 ) 

1018 if isinstance(block, DatetimeTZBlock) and block.values.ndim == 1: 

1019 # TODO(2.0): remove once fastparquet no longer needs this 

1020 warnings.warn( 

1021 "In a future version, the BlockManager constructor " 

1022 "will assume that a DatetimeTZBlock with block.ndim==2 " 

1023 "has block.values.ndim == 2.", 

1024 DeprecationWarning, 

1025 stacklevel=find_stack_level(), 

1026 ) 

1027 

1028 # error: Incompatible types in assignment (expression has type 

1029 # "Union[ExtensionArray, ndarray]", variable has type 

1030 # "DatetimeArray") 

1031 block.values = ensure_block_shape( # type: ignore[assignment] 

1032 block.values, self.ndim 

1033 ) 

1034 try: 

1035 block._cache.clear() 

1036 except AttributeError: 

1037 # _cache not initialized 

1038 pass 

1039 

1040 self._verify_integrity() 

1041 

1042 def _verify_integrity(self) -> None: 

1043 mgr_shape = self.shape 

1044 tot_items = sum(len(x.mgr_locs) for x in self.blocks) 

1045 for block in self.blocks: 

1046 if block.shape[1:] != mgr_shape[1:]: 

1047 raise construction_error(tot_items, block.shape[1:], self.axes) 

1048 if len(self.items) != tot_items: 

1049 raise AssertionError( 

1050 "Number of manager items must equal union of " 

1051 f"block items\n# manager items: {len(self.items)}, # " 

1052 f"tot_items: {tot_items}" 

1053 ) 

1054 if self.refs is not None: 

1055 if len(self.refs) != len(self.blocks): 

1056 raise AssertionError( 

1057 "Number of passed refs must equal the number of blocks: " 

1058 f"{len(self.refs)} refs vs {len(self.blocks)} blocks." 

1059 "\nIf you see this error, please report a bug at " 

1060 "https://github.com/pandas-dev/pandas/issues" 

1061 ) 

1062 

1063 @classmethod 

1064 def from_blocks( 

1065 cls, 

1066 blocks: list[Block], 

1067 axes: list[Index], 

1068 refs: list[weakref.ref | None] | None = None, 

1069 parent: object = None, 

1070 ) -> BlockManager: 

1071 """ 

1072 Constructor for BlockManager and SingleBlockManager with same signature. 

1073 """ 

1074 parent = parent if _using_copy_on_write() else None 

1075 return cls(blocks, axes, refs, parent, verify_integrity=False) 

1076 

1077 # ---------------------------------------------------------------- 

1078 # Indexing 

1079 

1080 def fast_xs(self, loc: int) -> SingleBlockManager: 

1081 """ 

1082 Return the array corresponding to `frame.iloc[loc]`. 

1083 

1084 Parameters 

1085 ---------- 

1086 loc : int 

1087 

1088 Returns 

1089 ------- 

1090 np.ndarray or ExtensionArray 

1091 """ 

1092 if len(self.blocks) == 1: 

1093 result = self.blocks[0].iget((slice(None), loc)) 

1094 block = new_block(result, placement=slice(0, len(result)), ndim=1) 

1095 # in the case of a single block, the new block is a view 

1096 ref = weakref.ref(self.blocks[0]) 

1097 return SingleBlockManager(block, self.axes[0], [ref], parent=self) 

1098 

1099 dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) 

1100 

1101 n = len(self) 

1102 

1103 # GH#46406 

1104 immutable_ea = isinstance(dtype, SparseDtype) 

1105 

1106 if isinstance(dtype, ExtensionDtype) and not immutable_ea: 

1107 cls = dtype.construct_array_type() 

1108 result = cls._empty((n,), dtype=dtype) 

1109 else: 

1110 # error: Argument "dtype" to "empty" has incompatible type 

1111 # "Union[Type[object], dtype[Any], ExtensionDtype, None]"; expected 

1112 # "None" 

1113 result = np.empty( 

1114 n, dtype=object if immutable_ea else dtype # type: ignore[arg-type] 

1115 ) 

1116 result = ensure_wrapped_if_datetimelike(result) 

1117 

1118 for blk in self.blocks: 

1119 # Such assignment may incorrectly coerce NaT to None 

1120 # result[blk.mgr_locs] = blk._slice((slice(None), loc)) 

1121 for i, rl in enumerate(blk.mgr_locs): 

1122 result[rl] = blk.iget((i, loc)) 

1123 

1124 if immutable_ea: 

1125 dtype = cast(ExtensionDtype, dtype) 

1126 result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) 

1127 

1128 block = new_block(result, placement=slice(0, len(result)), ndim=1) 

1129 return SingleBlockManager(block, self.axes[0]) 

1130 

1131 def iget(self, i: int, track_ref: bool = True) -> SingleBlockManager: 

1132 """ 

1133 Return the data as a SingleBlockManager. 

1134 """ 

1135 block = self.blocks[self.blknos[i]] 

1136 values = block.iget(self.blklocs[i]) 

1137 

1138 # shortcut for select a single-dim from a 2-dim BM 

1139 bp = BlockPlacement(slice(0, len(values))) 

1140 nb = type(block)(values, placement=bp, ndim=1) 

1141 ref = weakref.ref(block) if track_ref else None 

1142 parent = self if track_ref else None 

1143 return SingleBlockManager(nb, self.axes[1], [ref], parent) 

1144 

1145 def iget_values(self, i: int) -> ArrayLike: 

1146 """ 

1147 Return the data for column i as the values (ndarray or ExtensionArray). 

1148 

1149 Warning! The returned array is a view but doesn't handle Copy-on-Write, 

1150 so this should be used with caution. 

1151 """ 

1152 # TODO(CoW) making the arrays read-only might make this safer to use? 

1153 block = self.blocks[self.blknos[i]] 

1154 values = block.iget(self.blklocs[i]) 

1155 return values 

1156 

1157 @property 

1158 def column_arrays(self) -> list[np.ndarray]: 

1159 """ 

1160 Used in the JSON C code to access column arrays. 

1161 This optimizes compared to using `iget_values` by converting each 

1162 

1163 Warning! This doesn't handle Copy-on-Write, so should be used with 

1164 caution (current use case of consuming this in the JSON code is fine). 

1165 """ 

1166 # This is an optimized equivalent to 

1167 # result = [self.iget_values(i) for i in range(len(self.items))] 

1168 result: list[np.ndarray | None] = [None] * len(self.items) 

1169 

1170 for blk in self.blocks: 

1171 mgr_locs = blk._mgr_locs 

1172 values = blk.values_for_json() 

1173 if values.ndim == 1: 

1174 # TODO(EA2D): special casing not needed with 2D EAs 

1175 result[mgr_locs[0]] = values 

1176 

1177 else: 

1178 for i, loc in enumerate(mgr_locs): 

1179 result[loc] = values[i] 

1180 

1181 # error: Incompatible return value type (got "List[None]", 

1182 # expected "List[ndarray[Any, Any]]") 

1183 return result # type: ignore[return-value] 

1184 

1185 def iset( 

1186 self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False 

1187 ): 

1188 """ 

1189 Set new item in-place. Does not consolidate. Adds new Block if not 

1190 contained in the current set of items 

1191 """ 

1192 

1193 # FIXME: refactor, clearly separate broadcasting & zip-like assignment 

1194 # can prob also fix the various if tests for sparse/categorical 

1195 if self._blklocs is None and self.ndim > 1: 

1196 self._rebuild_blknos_and_blklocs() 

1197 

1198 # Note: we exclude DTA/TDA here 

1199 value_is_extension_type = is_1d_only_ea_dtype(value.dtype) 

1200 if not value_is_extension_type: 

1201 if value.ndim == 2: 

1202 value = value.T 

1203 else: 

1204 value = ensure_block_shape(value, ndim=2) 

1205 

1206 if value.shape[1:] != self.shape[1:]: 

1207 raise AssertionError( 

1208 "Shape of new values must be compatible with manager shape" 

1209 ) 

1210 

1211 if lib.is_integer(loc): 

1212 # We have 6 tests where loc is _not_ an int. 

1213 # In this case, get_blkno_placements will yield only one tuple, 

1214 # containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1))) 

1215 

1216 # Check if we can use _iset_single fastpath 

1217 loc = cast(int, loc) 

1218 blkno = self.blknos[loc] 

1219 blk = self.blocks[blkno] 

1220 if len(blk._mgr_locs) == 1: # TODO: fastest way to check this? 

1221 return self._iset_single( 

1222 loc, 

1223 value, 

1224 inplace=inplace, 

1225 blkno=blkno, 

1226 blk=blk, 

1227 ) 

1228 

1229 # error: Incompatible types in assignment (expression has type 

1230 # "List[Union[int, slice, ndarray]]", variable has type "Union[int, 

1231 # slice, ndarray]") 

1232 loc = [loc] # type: ignore[assignment] 

1233 

1234 # categorical/sparse/datetimetz 

1235 if value_is_extension_type: 

1236 

1237 def value_getitem(placement): 

1238 return value 

1239 

1240 else: 

1241 

1242 def value_getitem(placement): 

1243 return value[placement.indexer] 

1244 

1245 # Accessing public blknos ensures the public versions are initialized 

1246 blknos = self.blknos[loc] 

1247 blklocs = self.blklocs[loc].copy() 

1248 

1249 unfit_mgr_locs = [] 

1250 unfit_val_locs = [] 

1251 removed_blknos = [] 

1252 for blkno_l, val_locs in libinternals.get_blkno_placements(blknos, group=True): 

1253 blk = self.blocks[blkno_l] 

1254 blk_locs = blklocs[val_locs.indexer] 

1255 if inplace and blk.should_store(value): 

1256 # Updating inplace -> check if we need to do Copy-on-Write 

1257 if _using_copy_on_write() and not self._has_no_reference_block(blkno_l): 

1258 blk.set_inplace(blk_locs, value_getitem(val_locs), copy=True) 

1259 self._clear_reference_block(blkno_l) 

1260 else: 

1261 blk.set_inplace(blk_locs, value_getitem(val_locs)) 

1262 else: 

1263 unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs]) 

1264 unfit_val_locs.append(val_locs) 

1265 

1266 # If all block items are unfit, schedule the block for removal. 

1267 if len(val_locs) == len(blk.mgr_locs): 

1268 removed_blknos.append(blkno_l) 

1269 else: 

1270 nb = blk.delete(blk_locs) 

1271 blocks_tup = ( 

1272 self.blocks[:blkno_l] + (nb,) + self.blocks[blkno_l + 1 :] 

1273 ) 

1274 self.blocks = blocks_tup 

1275 self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb)) 

1276 # blk.delete gives a copy, so we can remove a possible reference 

1277 self._clear_reference_block(blkno_l) 

1278 

1279 if len(removed_blknos): 

1280 # Remove blocks & update blknos and refs accordingly 

1281 is_deleted = np.zeros(self.nblocks, dtype=np.bool_) 

1282 is_deleted[removed_blknos] = True 

1283 

1284 new_blknos = np.empty(self.nblocks, dtype=np.intp) 

1285 new_blknos.fill(-1) 

1286 new_blknos[~is_deleted] = np.arange(self.nblocks - len(removed_blknos)) 

1287 self._blknos = new_blknos[self._blknos] 

1288 self.blocks = tuple( 

1289 blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos) 

1290 ) 

1291 if self.refs is not None: 

1292 self.refs = [ 

1293 ref 

1294 for i, ref in enumerate(self.refs) 

1295 if i not in set(removed_blknos) 

1296 ] 

1297 

1298 if unfit_val_locs: 

1299 unfit_idxr = np.concatenate(unfit_mgr_locs) 

1300 unfit_count = len(unfit_idxr) 

1301 

1302 new_blocks: list[Block] = [] 

1303 if value_is_extension_type: 

1304 # This code (ab-)uses the fact that EA blocks contain only 

1305 # one item. 

1306 # TODO(EA2D): special casing unnecessary with 2D EAs 

1307 new_blocks.extend( 

1308 new_block_2d( 

1309 values=value, 

1310 placement=BlockPlacement(slice(mgr_loc, mgr_loc + 1)), 

1311 ) 

1312 for mgr_loc in unfit_idxr 

1313 ) 

1314 

1315 self._blknos[unfit_idxr] = np.arange(unfit_count) + len(self.blocks) 

1316 self._blklocs[unfit_idxr] = 0 

1317 

1318 else: 

1319 # unfit_val_locs contains BlockPlacement objects 

1320 unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:]) 

1321 

1322 new_blocks.append( 

1323 new_block_2d( 

1324 values=value_getitem(unfit_val_items), 

1325 placement=BlockPlacement(unfit_idxr), 

1326 ) 

1327 ) 

1328 

1329 self._blknos[unfit_idxr] = len(self.blocks) 

1330 self._blklocs[unfit_idxr] = np.arange(unfit_count) 

1331 

1332 self.blocks += tuple(new_blocks) 

1333 # TODO(CoW) is this always correct to assume that the new_blocks 

1334 # are not referencing anything else? 

1335 if self.refs is not None: 

1336 self.refs = list(self.refs) + [None] * len(new_blocks) 

1337 

1338 # Newly created block's dtype may already be present. 

1339 self._known_consolidated = False 

1340 

1341 def _iset_single( 

1342 self, loc: int, value: ArrayLike, inplace: bool, blkno: int, blk: Block 

1343 ) -> None: 

1344 """ 

1345 Fastpath for iset when we are only setting a single position and 

1346 the Block currently in that position is itself single-column. 

1347 

1348 In this case we can swap out the entire Block and blklocs and blknos 

1349 are unaffected. 

1350 """ 

1351 # Caller is responsible for verifying value.shape 

1352 

1353 if inplace and blk.should_store(value): 

1354 copy = False 

1355 if _using_copy_on_write() and not self._has_no_reference_block(blkno): 

1356 # perform Copy-on-Write and clear the reference 

1357 copy = True 

1358 self._clear_reference_block(blkno) 

1359 iloc = self.blklocs[loc] 

1360 blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy) 

1361 return 

1362 

1363 nb = new_block_2d(value, placement=blk._mgr_locs) 

1364 old_blocks = self.blocks 

1365 new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :] 

1366 self.blocks = new_blocks 

1367 self._clear_reference_block(blkno) 

1368 return 

1369 

1370 def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: 

1371 """ 

1372 Set values ("setitem") into a single column (not setting the full column). 

1373 

1374 This is a method on the BlockManager level, to avoid creating an 

1375 intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) 

1376 """ 

1377 if _using_copy_on_write() and not self._has_no_reference(loc): 

1378 # otherwise perform Copy-on-Write and clear the reference 

1379 blkno = self.blknos[loc] 

1380 blocks = list(self.blocks) 

1381 blocks[blkno] = blocks[blkno].copy() 

1382 self.blocks = tuple(blocks) 

1383 self._clear_reference_block(blkno) 

1384 

1385 # this manager is only created temporarily to mutate the values in place 

1386 # so don't track references, otherwise the `setitem` would perform CoW again 

1387 col_mgr = self.iget(loc, track_ref=False) 

1388 new_mgr = col_mgr.setitem((idx,), value) 

1389 self.iset(loc, new_mgr._block.values, inplace=True) 

1390 

1391 def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: 

1392 """ 

1393 Insert item at selected position. 

1394 

1395 Parameters 

1396 ---------- 

1397 loc : int 

1398 item : hashable 

1399 value : np.ndarray or ExtensionArray 

1400 """ 

1401 # insert to the axis; this could possibly raise a TypeError 

1402 new_axis = self.items.insert(loc, item) 

1403 

1404 if value.ndim == 2: 

1405 value = value.T 

1406 if len(value) > 1: 

1407 raise ValueError( 

1408 f"Expected a 1D array, got an array with shape {value.T.shape}" 

1409 ) 

1410 else: 

1411 value = ensure_block_shape(value, ndim=self.ndim) 

1412 

1413 bp = BlockPlacement(slice(loc, loc + 1)) 

1414 block = new_block_2d(values=value, placement=bp) 

1415 

1416 if not len(self.blocks): 

1417 # Fastpath 

1418 self._blklocs = np.array([0], dtype=np.intp) 

1419 self._blknos = np.array([0], dtype=np.intp) 

1420 else: 

1421 self._insert_update_mgr_locs(loc) 

1422 self._insert_update_blklocs_and_blknos(loc) 

1423 

1424 self.axes[0] = new_axis 

1425 self.blocks += (block,) 

1426 # TODO(CoW) do we always "own" the passed `value`? 

1427 if self.refs is not None: 

1428 self.refs += [None] 

1429 

1430 self._known_consolidated = False 

1431 

1432 if sum(not block.is_extension for block in self.blocks) > 100: 

1433 warnings.warn( 

1434 "DataFrame is highly fragmented. This is usually the result " 

1435 "of calling `frame.insert` many times, which has poor performance. " 

1436 "Consider joining all columns at once using pd.concat(axis=1) " 

1437 "instead. To get a de-fragmented frame, use `newframe = frame.copy()`", 

1438 PerformanceWarning, 

1439 stacklevel=find_stack_level(), 

1440 ) 

1441 

1442 def _insert_update_mgr_locs(self, loc) -> None: 

1443 """ 

1444 When inserting a new Block at location 'loc', we increment 

1445 all of the mgr_locs of blocks above that by one. 

1446 """ 

1447 for blkno, count in _fast_count_smallints(self.blknos[loc:]): 

1448 # .620 this way, .326 of which is in increment_above 

1449 blk = self.blocks[blkno] 

1450 blk._mgr_locs = blk._mgr_locs.increment_above(loc) 

1451 

1452 def _insert_update_blklocs_and_blknos(self, loc) -> None: 

1453 """ 

1454 When inserting a new Block at location 'loc', we update our 

1455 _blklocs and _blknos. 

1456 """ 

1457 

1458 # Accessing public blklocs ensures the public versions are initialized 

1459 if loc == self.blklocs.shape[0]: 

1460 # np.append is a lot faster, let's use it if we can. 

1461 self._blklocs = np.append(self._blklocs, 0) 

1462 self._blknos = np.append(self._blknos, len(self.blocks)) 

1463 elif loc == 0: 

1464 # np.append is a lot faster, let's use it if we can. 

1465 self._blklocs = np.append(self._blklocs[::-1], 0)[::-1] 

1466 self._blknos = np.append(self._blknos[::-1], len(self.blocks))[::-1] 

1467 else: 

1468 new_blklocs, new_blknos = libinternals.update_blklocs_and_blknos( 

1469 self.blklocs, self.blknos, loc, len(self.blocks) 

1470 ) 

1471 self._blklocs = new_blklocs 

1472 self._blknos = new_blknos 

1473 

1474 def idelete(self, indexer) -> BlockManager: 

1475 """ 

1476 Delete selected locations, returning a new BlockManager. 

1477 """ 

1478 is_deleted = np.zeros(self.shape[0], dtype=np.bool_) 

1479 is_deleted[indexer] = True 

1480 taker = (~is_deleted).nonzero()[0] 

1481 

1482 nbs, new_refs = self._slice_take_blocks_ax0(taker, only_slice=True) 

1483 new_columns = self.items[~is_deleted] 

1484 axes = [new_columns, self.axes[1]] 

1485 # TODO this might not be needed (can a delete ever be done in chained manner?) 

1486 parent = None if com.all_none(*new_refs) else self 

1487 return type(self)(tuple(nbs), axes, new_refs, parent, verify_integrity=False) 

1488 

1489 # ---------------------------------------------------------------- 

1490 # Block-wise Operation 

1491 

1492 def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: 

1493 """ 

1494 Apply grouped reduction function blockwise, returning a new BlockManager. 

1495 

1496 Parameters 

1497 ---------- 

1498 func : grouped reduction function 

1499 ignore_failures : bool, default False 

1500 Whether to drop blocks where func raises TypeError. 

1501 

1502 Returns 

1503 ------- 

1504 BlockManager 

1505 """ 

1506 result_blocks: list[Block] = [] 

1507 dropped_any = False 

1508 

1509 for blk in self.blocks: 

1510 if blk.is_object: 

1511 # split on object-dtype blocks bc some columns may raise 

1512 # while others do not. 

1513 for sb in blk._split(): 

1514 try: 

1515 applied = sb.apply(func) 

1516 except (TypeError, NotImplementedError): 

1517 if not ignore_failures: 

1518 raise 

1519 dropped_any = True 

1520 continue 

1521 result_blocks = extend_blocks(applied, result_blocks) 

1522 else: 

1523 try: 

1524 applied = blk.apply(func) 

1525 except (TypeError, NotImplementedError): 

1526 if not ignore_failures: 

1527 raise 

1528 dropped_any = True 

1529 continue 

1530 result_blocks = extend_blocks(applied, result_blocks) 

1531 

1532 if len(result_blocks) == 0: 

1533 index = Index([None]) # placeholder 

1534 else: 

1535 index = Index(range(result_blocks[0].values.shape[-1])) 

1536 

1537 if dropped_any: 

1538 # faster to skip _combine if we haven't dropped any blocks 

1539 return self._combine(result_blocks, copy=False, index=index) 

1540 

1541 return type(self).from_blocks(result_blocks, [self.axes[0], index]) 

1542 

1543 def reduce( 

1544 self: T, func: Callable, ignore_failures: bool = False 

1545 ) -> tuple[T, np.ndarray]: 

1546 """ 

1547 Apply reduction function blockwise, returning a single-row BlockManager. 

1548 

1549 Parameters 

1550 ---------- 

1551 func : reduction function 

1552 ignore_failures : bool, default False 

1553 Whether to drop blocks where func raises TypeError. 

1554 

1555 Returns 

1556 ------- 

1557 BlockManager 

1558 np.ndarray 

1559 Indexer of mgr_locs that are retained. 

1560 """ 

1561 # If 2D, we assume that we're operating column-wise 

1562 assert self.ndim == 2 

1563 

1564 res_blocks: list[Block] = [] 

1565 for blk in self.blocks: 

1566 nbs = blk.reduce(func, ignore_failures) 

1567 res_blocks.extend(nbs) 

1568 

1569 index = Index([None]) # placeholder 

1570 if ignore_failures: 

1571 if res_blocks: 

1572 indexer = np.concatenate([blk.mgr_locs.as_array for blk in res_blocks]) 

1573 new_mgr = self._combine(res_blocks, copy=False, index=index) 

1574 else: 

1575 indexer = [] 

1576 new_mgr = type(self).from_blocks([], [self.items[:0], index]) 

1577 else: 

1578 indexer = np.arange(self.shape[0]) 

1579 new_mgr = type(self).from_blocks(res_blocks, [self.items, index]) 

1580 return new_mgr, indexer 

1581 

1582 def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: 

1583 """ 

1584 Apply array_op blockwise with another (aligned) BlockManager. 

1585 """ 

1586 return operate_blockwise(self, other, array_op) 

1587 

1588 def _equal_values(self: BlockManager, other: BlockManager) -> bool: 

1589 """ 

1590 Used in .equals defined in base class. Only check the column values 

1591 assuming shape and indexes have already been checked. 

1592 """ 

1593 return blockwise_all(self, other, array_equals) 

1594 

1595 def quantile( 

1596 self: T, 

1597 *, 

1598 qs: Float64Index, 

1599 axis: int = 0, 

1600 interpolation="linear", 

1601 ) -> T: 

1602 """ 

1603 Iterate over blocks applying quantile reduction. 

1604 This routine is intended for reduction type operations and 

1605 will do inference on the generated blocks. 

1606 

1607 Parameters 

1608 ---------- 

1609 axis: reduction axis, default 0 

1610 consolidate: bool, default True. Join together blocks having same 

1611 dtype 

1612 interpolation : type of interpolation, default 'linear' 

1613 qs : list of the quantiles to be computed 

1614 

1615 Returns 

1616 ------- 

1617 BlockManager 

1618 """ 

1619 # Series dispatches to DataFrame for quantile, which allows us to 

1620 # simplify some of the code here and in the blocks 

1621 assert self.ndim >= 2 

1622 assert is_list_like(qs) # caller is responsible for this 

1623 assert axis == 1 # only ever called this way 

1624 

1625 new_axes = list(self.axes) 

1626 new_axes[1] = Float64Index(qs) 

1627 

1628 blocks = [ 

1629 blk.quantile(axis=axis, qs=qs, interpolation=interpolation) 

1630 for blk in self.blocks 

1631 ] 

1632 

1633 return type(self)(blocks, new_axes) 

1634 

1635 # ---------------------------------------------------------------- 

1636 

1637 def unstack(self, unstacker, fill_value) -> BlockManager: 

1638 """ 

1639 Return a BlockManager with all blocks unstacked. 

1640 

1641 Parameters 

1642 ---------- 

1643 unstacker : reshape._Unstacker 

1644 fill_value : Any 

1645 fill_value for newly introduced missing values. 

1646 

1647 Returns 

1648 ------- 

1649 unstacked : BlockManager 

1650 """ 

1651 new_columns = unstacker.get_new_columns(self.items) 

1652 new_index = unstacker.new_index 

1653 

1654 allow_fill = not unstacker.mask_all 

1655 if allow_fill: 

1656 # calculating the full mask once and passing it to Block._unstack is 

1657 # faster than letting calculating it in each repeated call 

1658 new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) 

1659 needs_masking = new_mask2D.any(axis=0) 

1660 else: 

1661 needs_masking = np.zeros(unstacker.full_shape[1], dtype=bool) 

1662 

1663 new_blocks: list[Block] = [] 

1664 columns_mask: list[np.ndarray] = [] 

1665 

1666 if len(self.items) == 0: 

1667 factor = 1 

1668 else: 

1669 fac = len(new_columns) / len(self.items) 

1670 assert fac == int(fac) 

1671 factor = int(fac) 

1672 

1673 for blk in self.blocks: 

1674 mgr_locs = blk.mgr_locs 

1675 new_placement = mgr_locs.tile_for_unstack(factor) 

1676 

1677 blocks, mask = blk._unstack( 

1678 unstacker, 

1679 fill_value, 

1680 new_placement=new_placement, 

1681 needs_masking=needs_masking, 

1682 ) 

1683 

1684 new_blocks.extend(blocks) 

1685 columns_mask.extend(mask) 

1686 

1687 # Block._unstack should ensure this holds, 

1688 assert mask.sum() == sum(len(nb._mgr_locs) for nb in blocks) 

1689 # In turn this ensures that in the BlockManager call below 

1690 # we have len(new_columns) == sum(x.shape[0] for x in new_blocks) 

1691 # which suffices to allow us to pass verify_inegrity=False 

1692 

1693 new_columns = new_columns[columns_mask] 

1694 

1695 bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False) 

1696 return bm 

1697 

1698 def to_dict(self, copy: bool = True): 

1699 """ 

1700 Return a dict of str(dtype) -> BlockManager 

1701 

1702 Parameters 

1703 ---------- 

1704 copy : bool, default True 

1705 

1706 Returns 

1707 ------- 

1708 values : a dict of dtype -> BlockManager 

1709 """ 

1710 

1711 bd: dict[str, list[Block]] = {} 

1712 for b in self.blocks: 

1713 bd.setdefault(str(b.dtype), []).append(b) 

1714 

1715 # TODO(EA2D): the combine will be unnecessary with 2D EAs 

1716 return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()} 

1717 

1718 def as_array( 

1719 self, 

1720 dtype: np.dtype | None = None, 

1721 copy: bool = False, 

1722 na_value: object = lib.no_default, 

1723 ) -> np.ndarray: 

1724 """ 

1725 Convert the blockmanager data into an numpy array. 

1726 

1727 Parameters 

1728 ---------- 

1729 dtype : np.dtype or None, default None 

1730 Data type of the return array. 

1731 copy : bool, default False 

1732 If True then guarantee that a copy is returned. A value of 

1733 False does not guarantee that the underlying data is not 

1734 copied. 

1735 na_value : object, default lib.no_default 

1736 Value to be used as the missing value sentinel. 

1737 

1738 Returns 

1739 ------- 

1740 arr : ndarray 

1741 """ 

1742 # TODO(CoW) handle case where resulting array is a view 

1743 if len(self.blocks) == 0: 

1744 arr = np.empty(self.shape, dtype=float) 

1745 return arr.transpose() 

1746 

1747 # We want to copy when na_value is provided to avoid 

1748 # mutating the original object 

1749 copy = copy or na_value is not lib.no_default 

1750 

1751 if self.is_single_block: 

1752 blk = self.blocks[0] 

1753 if blk.is_extension: 

1754 # Avoid implicit conversion of extension blocks to object 

1755 

1756 # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no 

1757 # attribute "to_numpy" 

1758 arr = blk.values.to_numpy( # type: ignore[union-attr] 

1759 dtype=dtype, 

1760 na_value=na_value, 

1761 ).reshape(blk.shape) 

1762 else: 

1763 arr = np.asarray(blk.get_values()) 

1764 if dtype: 

1765 arr = arr.astype(dtype, copy=False) 

1766 else: 

1767 arr = self._interleave(dtype=dtype, na_value=na_value) 

1768 # The underlying data was copied within _interleave 

1769 copy = False 

1770 

1771 if copy: 

1772 arr = arr.copy() 

1773 

1774 if na_value is not lib.no_default: 

1775 arr[isna(arr)] = na_value 

1776 

1777 return arr.transpose() 

1778 

1779 def _interleave( 

1780 self, 

1781 dtype: np.dtype | None = None, 

1782 na_value: object = lib.no_default, 

1783 ) -> np.ndarray: 

1784 """ 

1785 Return ndarray from blocks with specified item order 

1786 Items must be contained in the blocks 

1787 """ 

1788 if not dtype: 

1789 # Incompatible types in assignment (expression has type 

1790 # "Optional[Union[dtype[Any], ExtensionDtype]]", variable has 

1791 # type "Optional[dtype[Any]]") 

1792 dtype = interleaved_dtype( # type: ignore[assignment] 

1793 [blk.dtype for blk in self.blocks] 

1794 ) 

1795 

1796 # TODO: https://github.com/pandas-dev/pandas/issues/22791 

1797 # Give EAs some input on what happens here. Sparse needs this. 

1798 if isinstance(dtype, SparseDtype): 

1799 dtype = dtype.subtype 

1800 dtype = cast(np.dtype, dtype) 

1801 elif isinstance(dtype, ExtensionDtype): 

1802 dtype = np.dtype("object") 

1803 elif is_dtype_equal(dtype, str): 

1804 dtype = np.dtype("object") 

1805 

1806 result = np.empty(self.shape, dtype=dtype) 

1807 

1808 itemmask = np.zeros(self.shape[0]) 

1809 

1810 if dtype == np.dtype("object") and na_value is lib.no_default: 

1811 # much more performant than using to_numpy below 

1812 for blk in self.blocks: 

1813 rl = blk.mgr_locs 

1814 arr = blk.get_values(dtype) 

1815 result[rl.indexer] = arr 

1816 itemmask[rl.indexer] = 1 

1817 return result 

1818 

1819 for blk in self.blocks: 

1820 rl = blk.mgr_locs 

1821 if blk.is_extension: 

1822 # Avoid implicit conversion of extension blocks to object 

1823 

1824 # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no 

1825 # attribute "to_numpy" 

1826 arr = blk.values.to_numpy( # type: ignore[union-attr] 

1827 dtype=dtype, 

1828 na_value=na_value, 

1829 ) 

1830 else: 

1831 arr = blk.get_values(dtype) 

1832 result[rl.indexer] = arr 

1833 itemmask[rl.indexer] = 1 

1834 

1835 if not itemmask.all(): 

1836 raise AssertionError("Some items were not contained in blocks") 

1837 

1838 return result 

1839 

1840 # ---------------------------------------------------------------- 

1841 # Consolidation 

1842 

1843 def is_consolidated(self) -> bool: 

1844 """ 

1845 Return True if more than one block with the same dtype 

1846 """ 

1847 if not self._known_consolidated: 

1848 self._consolidate_check() 

1849 return self._is_consolidated 

1850 

1851 def _consolidate_check(self) -> None: 

1852 if len(self.blocks) == 1: 

1853 # fastpath 

1854 self._is_consolidated = True 

1855 self._known_consolidated = True 

1856 return 

1857 dtypes = [blk.dtype for blk in self.blocks if blk._can_consolidate] 

1858 self._is_consolidated = len(dtypes) == len(set(dtypes)) 

1859 self._known_consolidated = True 

1860 

1861 def _consolidate_inplace(self) -> None: 

1862 # In general, _consolidate_inplace should only be called via 

1863 # DataFrame._consolidate_inplace, otherwise we will fail to invalidate 

1864 # the DataFrame's _item_cache. The exception is for newly-created 

1865 # BlockManager objects not yet attached to a DataFrame. 

1866 if not self.is_consolidated(): 

1867 if self.refs is None: 

1868 self.blocks = _consolidate(self.blocks) 

1869 else: 

1870 self.blocks, self.refs = _consolidate_with_refs(self.blocks, self.refs) 

1871 self._is_consolidated = True 

1872 self._known_consolidated = True 

1873 self._rebuild_blknos_and_blklocs() 

1874 

1875 

1876class SingleBlockManager(BaseBlockManager, SingleDataManager): 

1877 """manage a single block with""" 

1878 

1879 @property 

1880 def ndim(self) -> Literal[1]: 

1881 return 1 

1882 

1883 _is_consolidated = True 

1884 _known_consolidated = True 

1885 __slots__ = () 

1886 is_single_block = True 

1887 

1888 def __init__( 

1889 self, 

1890 block: Block, 

1891 axis: Index, 

1892 refs: list[weakref.ref | None] | None = None, 

1893 parent: object = None, 

1894 verify_integrity: bool = False, 

1895 fastpath=lib.no_default, 

1896 ) -> None: 

1897 # Assertions disabled for performance 

1898 # assert isinstance(block, Block), type(block) 

1899 # assert isinstance(axis, Index), type(axis) 

1900 

1901 if fastpath is not lib.no_default: 

1902 warnings.warn( 

1903 "The `fastpath` keyword is deprecated and will be removed " 

1904 "in a future version.", 

1905 FutureWarning, 

1906 stacklevel=find_stack_level(), 

1907 ) 

1908 

1909 self.axes = [axis] 

1910 self.blocks = (block,) 

1911 self.refs = refs 

1912 self.parent = parent if _using_copy_on_write() else None 

1913 

1914 @classmethod 

1915 def from_blocks( 

1916 cls, 

1917 blocks: list[Block], 

1918 axes: list[Index], 

1919 refs: list[weakref.ref | None] | None = None, 

1920 parent: object = None, 

1921 ) -> SingleBlockManager: 

1922 """ 

1923 Constructor for BlockManager and SingleBlockManager with same signature. 

1924 """ 

1925 assert len(blocks) == 1 

1926 assert len(axes) == 1 

1927 if refs is not None: 

1928 assert len(refs) == 1 

1929 return cls(blocks[0], axes[0], refs, parent, verify_integrity=False) 

1930 

1931 @classmethod 

1932 def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager: 

1933 """ 

1934 Constructor for if we have an array that is not yet a Block. 

1935 """ 

1936 block = new_block(array, placement=slice(0, len(index)), ndim=1) 

1937 return cls(block, index) 

1938 

1939 def to_2d_mgr(self, columns: Index) -> BlockManager: 

1940 """ 

1941 Manager analogue of Series.to_frame 

1942 """ 

1943 blk = self.blocks[0] 

1944 arr = ensure_block_shape(blk.values, ndim=2) 

1945 bp = BlockPlacement(0) 

1946 new_blk = type(blk)(arr, placement=bp, ndim=2) 

1947 axes = [columns, self.axes[0]] 

1948 refs: list[weakref.ref | None] = [weakref.ref(blk)] 

1949 parent = self if _using_copy_on_write() else None 

1950 return BlockManager( 

1951 [new_blk], axes=axes, refs=refs, parent=parent, verify_integrity=False 

1952 ) 

1953 

1954 def _has_no_reference(self, i: int = 0) -> bool: 

1955 """ 

1956 Check for column `i` if it has references. 

1957 (whether it references another array or is itself being referenced) 

1958 Returns True if the column has no references. 

1959 """ 

1960 return (self.refs is None or self.refs[0] is None) and weakref.getweakrefcount( 

1961 self.blocks[0] 

1962 ) == 0 

1963 

1964 def __getstate__(self): 

1965 block_values = [b.values for b in self.blocks] 

1966 block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks] 

1967 axes_array = list(self.axes) 

1968 

1969 extra_state = { 

1970 "0.14.1": { 

1971 "axes": axes_array, 

1972 "blocks": [ 

1973 {"values": b.values, "mgr_locs": b.mgr_locs.indexer} 

1974 for b in self.blocks 

1975 ], 

1976 } 

1977 } 

1978 

1979 # First three elements of the state are to maintain forward 

1980 # compatibility with 0.13.1. 

1981 return axes_array, block_values, block_items, extra_state 

1982 

1983 def __setstate__(self, state): 

1984 def unpickle_block(values, mgr_locs, ndim: int) -> Block: 

1985 # TODO(EA2D): ndim would be unnecessary with 2D EAs 

1986 # older pickles may store e.g. DatetimeIndex instead of DatetimeArray 

1987 values = extract_array(values, extract_numpy=True) 

1988 return new_block(values, placement=mgr_locs, ndim=ndim) 

1989 

1990 if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: 

1991 state = state[3]["0.14.1"] 

1992 self.axes = [ensure_index(ax) for ax in state["axes"]] 

1993 ndim = len(self.axes) 

1994 self.blocks = tuple( 

1995 unpickle_block(b["values"], b["mgr_locs"], ndim=ndim) 

1996 for b in state["blocks"] 

1997 ) 

1998 else: 

1999 raise NotImplementedError("pre-0.14.1 pickles are no longer supported") 

2000 

2001 self._post_setstate() 

2002 

2003 def _post_setstate(self): 

2004 pass 

2005 

2006 @cache_readonly 

2007 def _block(self) -> Block: 

2008 return self.blocks[0] 

2009 

2010 @property 

2011 def _blknos(self): 

2012 """compat with BlockManager""" 

2013 return None 

2014 

2015 @property 

2016 def _blklocs(self): 

2017 """compat with BlockManager""" 

2018 return None 

2019 

2020 def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockManager: 

2021 # similar to get_slice, but not restricted to slice indexer 

2022 blk = self._block 

2023 array = blk._slice(indexer) 

2024 if array.ndim > 1: 

2025 # This will be caught by Series._get_values 

2026 raise ValueError("dimension-expanding indexing not allowed") 

2027 

2028 bp = BlockPlacement(slice(0, len(array))) 

2029 block = type(blk)(array, placement=bp, ndim=1) 

2030 

2031 new_idx = self.index[indexer] 

2032 # TODO(CoW) in theory only need to track reference if new_array is a view 

2033 ref = weakref.ref(blk) 

2034 return type(self)(block, new_idx, [ref], parent=self) 

2035 

2036 def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: 

2037 # Assertion disabled for performance 

2038 # assert isinstance(slobj, slice), type(slobj) 

2039 if axis >= self.ndim: 

2040 raise IndexError("Requested axis not found in manager") 

2041 

2042 blk = self._block 

2043 array = blk._slice(slobj) 

2044 bp = BlockPlacement(slice(0, len(array))) 

2045 block = type(blk)(array, placement=bp, ndim=1) 

2046 new_index = self.index._getitem_slice(slobj) 

2047 # TODO this method is only used in groupby SeriesSplitter at the moment, 

2048 # so passing refs / parent is not yet covered by the tests 

2049 return type(self)(block, new_index, [weakref.ref(blk)], parent=self) 

2050 

2051 @property 

2052 def index(self) -> Index: 

2053 return self.axes[0] 

2054 

2055 @property 

2056 def dtype(self) -> DtypeObj: 

2057 return self._block.dtype 

2058 

2059 def get_dtypes(self) -> np.ndarray: 

2060 return np.array([self._block.dtype]) 

2061 

2062 def external_values(self): 

2063 """The array that Series.values returns""" 

2064 return self._block.external_values() 

2065 

2066 def internal_values(self): 

2067 """The array that Series._values returns""" 

2068 return self._block.values 

2069 

2070 def array_values(self): 

2071 """The array that Series.array returns""" 

2072 return self._block.array_values 

2073 

2074 def get_numeric_data(self, copy: bool = False): 

2075 if self._block.is_numeric: 

2076 return self.copy(deep=copy) 

2077 return self.make_empty() 

2078 

2079 @property 

2080 def _can_hold_na(self) -> bool: 

2081 return self._block._can_hold_na 

2082 

2083 def setitem_inplace(self, indexer, value) -> None: 

2084 """ 

2085 Set values with indexer. 

2086 

2087 For Single[Block/Array]Manager, this backs s[indexer] = value 

2088 

2089 This is an inplace version of `setitem()`, mutating the manager/values 

2090 in place, not returning a new Manager (and Block), and thus never changing 

2091 the dtype. 

2092 """ 

2093 if _using_copy_on_write() and not self._has_no_reference(0): 

2094 self.blocks = (self._block.copy(),) 

2095 self.refs = None 

2096 self.parent = None 

2097 self._cache.clear() 

2098 

2099 super().setitem_inplace(indexer, value) 

2100 

2101 def idelete(self, indexer) -> SingleBlockManager: 

2102 """ 

2103 Delete single location from SingleBlockManager. 

2104 

2105 Ensures that self.blocks doesn't become empty. 

2106 """ 

2107 nb = self._block.delete(indexer) 

2108 self.blocks = (nb,) 

2109 self.axes[0] = self.axes[0].delete(indexer) 

2110 self._cache.clear() 

2111 # clear reference since delete always results in a new array 

2112 self.refs = None 

2113 self.parent = None 

2114 return self 

2115 

2116 def fast_xs(self, loc): 

2117 """ 

2118 fast path for getting a cross-section 

2119 return a view of the data 

2120 """ 

2121 raise NotImplementedError("Use series._values[loc] instead") 

2122 

2123 def set_values(self, values: ArrayLike): 

2124 """ 

2125 Set the values of the single block in place. 

2126 

2127 Use at your own risk! This does not check if the passed values are 

2128 valid for the current Block/SingleBlockManager (length, dtype, etc). 

2129 """ 

2130 # TODO(CoW) do we need to handle copy on write here? Currently this is 

2131 # only used for FrameColumnApply.series_generator (what if apply is 

2132 # mutating inplace?) 

2133 self.blocks[0].values = values 

2134 self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values))) 

2135 

2136 def _equal_values(self: T, other: T) -> bool: 

2137 """ 

2138 Used in .equals defined in base class. Only check the column values 

2139 assuming shape and indexes have already been checked. 

2140 """ 

2141 # For SingleBlockManager (i.e.Series) 

2142 if other.ndim != 1: 

2143 return False 

2144 left = self.blocks[0].values 

2145 right = other.blocks[0].values 

2146 return array_equals(left, right) 

2147 

2148 

2149# -------------------------------------------------------------------- 

2150# Constructor Helpers 

2151 

2152 

2153def create_block_manager_from_blocks( 

2154 blocks: list[Block], 

2155 axes: list[Index], 

2156 consolidate: bool = True, 

2157 verify_integrity: bool = True, 

2158) -> BlockManager: 

2159 # If verify_integrity=False, then caller is responsible for checking 

2160 # all(x.shape[-1] == len(axes[1]) for x in blocks) 

2161 # sum(x.shape[0] for x in blocks) == len(axes[0]) 

2162 # set(x for blk in blocks for x in blk.mgr_locs) == set(range(len(axes[0]))) 

2163 # all(blk.ndim == 2 for blk in blocks) 

2164 # This allows us to safely pass verify_integrity=False 

2165 

2166 try: 

2167 mgr = BlockManager(blocks, axes, verify_integrity=verify_integrity) 

2168 

2169 except ValueError as err: 

2170 arrays = [blk.values for blk in blocks] 

2171 tot_items = sum(arr.shape[0] for arr in arrays) 

2172 raise construction_error(tot_items, arrays[0].shape[1:], axes, err) 

2173 

2174 if consolidate: 

2175 mgr._consolidate_inplace() 

2176 return mgr 

2177 

2178 

2179def create_block_manager_from_column_arrays( 

2180 arrays: list[ArrayLike], 

2181 axes: list[Index], 

2182 consolidate: bool = True, 

2183) -> BlockManager: 

2184 # Assertions disabled for performance (caller is responsible for verifying) 

2185 # assert isinstance(axes, list) 

2186 # assert all(isinstance(x, Index) for x in axes) 

2187 # assert all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays) 

2188 # assert all(type(x) is not PandasArray for x in arrays) 

2189 # assert all(x.ndim == 1 for x in arrays) 

2190 # assert all(len(x) == len(axes[1]) for x in arrays) 

2191 # assert len(arrays) == len(axes[0]) 

2192 # These last three are sufficient to allow us to safely pass 

2193 # verify_integrity=False below. 

2194 

2195 try: 

2196 blocks = _form_blocks(arrays, consolidate) 

2197 mgr = BlockManager(blocks, axes, verify_integrity=False) 

2198 except ValueError as e: 

2199 raise construction_error(len(arrays), arrays[0].shape, axes, e) 

2200 if consolidate: 

2201 mgr._consolidate_inplace() 

2202 return mgr 

2203 

2204 

2205def construction_error( 

2206 tot_items: int, 

2207 block_shape: Shape, 

2208 axes: list[Index], 

2209 e: ValueError | None = None, 

2210): 

2211 """raise a helpful message about our construction""" 

2212 passed = tuple(map(int, [tot_items] + list(block_shape))) 

2213 # Correcting the user facing error message during dataframe construction 

2214 if len(passed) <= 2: 

2215 passed = passed[::-1] 

2216 

2217 implied = tuple(len(ax) for ax in axes) 

2218 # Correcting the user facing error message during dataframe construction 

2219 if len(implied) <= 2: 

2220 implied = implied[::-1] 

2221 

2222 # We return the exception object instead of raising it so that we 

2223 # can raise it in the caller; mypy plays better with that 

2224 if passed == implied and e is not None: 

2225 return e 

2226 if block_shape[0] == 0: 

2227 return ValueError("Empty data passed with indices specified.") 

2228 return ValueError(f"Shape of passed values is {passed}, indices imply {implied}") 

2229 

2230 

2231# ----------------------------------------------------------------------- 

2232 

2233 

2234def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, bool, DtypeObj]: 

2235 # compat for numpy<1.21, in which comparing a np.dtype with an ExtensionDtype 

2236 # raises instead of returning False. Once earlier numpy versions are dropped, 

2237 # this can be simplified to `return tup[1].dtype` 

2238 dtype = tup[1].dtype 

2239 

2240 if is_1d_only_ea_dtype(dtype): 

2241 # We know these won't be consolidated, so don't need to group these. 

2242 # This avoids expensive comparisons of CategoricalDtype objects 

2243 sep = id(dtype) 

2244 else: 

2245 sep = 0 

2246 

2247 return sep, isinstance(dtype, np.dtype), dtype 

2248 

2249 

2250def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]: 

2251 tuples = list(enumerate(arrays)) 

2252 

2253 if not consolidate: 

2254 nbs = _tuples_to_blocks_no_consolidate(tuples) 

2255 return nbs 

2256 

2257 # group by dtype 

2258 grouper = itertools.groupby(tuples, _grouping_func) 

2259 

2260 nbs = [] 

2261 for (_, _, dtype), tup_block in grouper: 

2262 block_type = get_block_type(dtype) 

2263 

2264 if isinstance(dtype, np.dtype): 

2265 is_dtlike = dtype.kind in ["m", "M"] 

2266 

2267 if issubclass(dtype.type, (str, bytes)): 

2268 dtype = np.dtype(object) 

2269 

2270 values, placement = _stack_arrays(list(tup_block), dtype) 

2271 if is_dtlike: 

2272 values = ensure_wrapped_if_datetimelike(values) 

2273 blk = block_type(values, placement=BlockPlacement(placement), ndim=2) 

2274 nbs.append(blk) 

2275 

2276 elif is_1d_only_ea_dtype(dtype): 

2277 dtype_blocks = [ 

2278 block_type(x[1], placement=BlockPlacement(x[0]), ndim=2) 

2279 for x in tup_block 

2280 ] 

2281 nbs.extend(dtype_blocks) 

2282 

2283 else: 

2284 dtype_blocks = [ 

2285 block_type( 

2286 ensure_block_shape(x[1], 2), placement=BlockPlacement(x[0]), ndim=2 

2287 ) 

2288 for x in tup_block 

2289 ] 

2290 nbs.extend(dtype_blocks) 

2291 return nbs 

2292 

2293 

2294def _tuples_to_blocks_no_consolidate(tuples) -> list[Block]: 

2295 # tuples produced within _form_blocks are of the form (placement, array) 

2296 return [ 

2297 new_block_2d(ensure_block_shape(x[1], ndim=2), placement=BlockPlacement(x[0])) 

2298 for x in tuples 

2299 ] 

2300 

2301 

2302def _stack_arrays(tuples, dtype: np.dtype): 

2303 

2304 placement, arrays = zip(*tuples) 

2305 

2306 first = arrays[0] 

2307 shape = (len(arrays),) + first.shape 

2308 

2309 stacked = np.empty(shape, dtype=dtype) 

2310 for i, arr in enumerate(arrays): 

2311 stacked[i] = arr 

2312 

2313 return stacked, placement 

2314 

2315 

2316def _consolidate(blocks: tuple[Block, ...]) -> tuple[Block, ...]: 

2317 """ 

2318 Merge blocks having same dtype, exclude non-consolidating blocks 

2319 """ 

2320 # sort by _can_consolidate, dtype 

2321 gkey = lambda x: x._consolidate_key 

2322 grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) 

2323 

2324 new_blocks: list[Block] = [] 

2325 for (_can_consolidate, dtype), group_blocks in grouper: 

2326 merged_blocks, _ = _merge_blocks( 

2327 list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate 

2328 ) 

2329 new_blocks = extend_blocks(merged_blocks, new_blocks) 

2330 return tuple(new_blocks) 

2331 

2332 

2333def _consolidate_with_refs( 

2334 blocks: tuple[Block, ...], refs 

2335) -> tuple[tuple[Block, ...], list[weakref.ref | None]]: 

2336 """ 

2337 Merge blocks having same dtype, exclude non-consolidating blocks, handling 

2338 refs 

2339 """ 

2340 gkey = lambda x: x[0]._consolidate_key 

2341 grouper = itertools.groupby(sorted(zip(blocks, refs), key=gkey), gkey) 

2342 

2343 new_blocks: list[Block] = [] 

2344 new_refs: list[weakref.ref | None] = [] 

2345 for (_can_consolidate, dtype), group_blocks_refs in grouper: 

2346 group_blocks, group_refs = list(zip(*list(group_blocks_refs))) 

2347 merged_blocks, consolidated = _merge_blocks( 

2348 list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate 

2349 ) 

2350 new_blocks = extend_blocks(merged_blocks, new_blocks) 

2351 if consolidated: 

2352 new_refs.extend([None]) 

2353 else: 

2354 new_refs.extend(group_refs) 

2355 return tuple(new_blocks), new_refs 

2356 

2357 

2358def _merge_blocks( 

2359 blocks: list[Block], dtype: DtypeObj, can_consolidate: bool 

2360) -> tuple[list[Block], bool]: 

2361 

2362 if len(blocks) == 1: 

2363 return blocks, False 

2364 

2365 if can_consolidate: 

2366 

2367 # TODO: optimization potential in case all mgrs contain slices and 

2368 # combination of those slices is a slice, too. 

2369 new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) 

2370 

2371 new_values: ArrayLike 

2372 

2373 if isinstance(blocks[0].dtype, np.dtype): 

2374 # error: List comprehension has incompatible type List[Union[ndarray, 

2375 # ExtensionArray]]; expected List[Union[complex, generic, 

2376 # Sequence[Union[int, float, complex, str, bytes, generic]], 

2377 # Sequence[Sequence[Any]], SupportsArray]] 

2378 new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] 

2379 else: 

2380 bvals = [blk.values for blk in blocks] 

2381 bvals2 = cast(Sequence[NDArrayBackedExtensionArray], bvals) 

2382 new_values = bvals2[0]._concat_same_type(bvals2, axis=0) 

2383 

2384 argsort = np.argsort(new_mgr_locs) 

2385 new_values = new_values[argsort] 

2386 new_mgr_locs = new_mgr_locs[argsort] 

2387 

2388 bp = BlockPlacement(new_mgr_locs) 

2389 return [new_block_2d(new_values, placement=bp)], True 

2390 

2391 # can't consolidate --> no merge 

2392 return blocks, False 

2393 

2394 

2395def _fast_count_smallints(arr: npt.NDArray[np.intp]): 

2396 """Faster version of set(arr) for sequences of small numbers.""" 

2397 counts = np.bincount(arr) 

2398 nz = counts.nonzero()[0] 

2399 # Note: list(zip(...) outperforms list(np.c_[nz, counts[nz]]) here, 

2400 # in one benchmark by a factor of 11 

2401 return zip(nz, counts[nz]) 

2402 

2403 

2404def _preprocess_slice_or_indexer( 

2405 slice_or_indexer: slice | np.ndarray, length: int, allow_fill: bool 

2406): 

2407 if isinstance(slice_or_indexer, slice): 

2408 return ( 

2409 "slice", 

2410 slice_or_indexer, 

2411 libinternals.slice_len(slice_or_indexer, length), 

2412 ) 

2413 else: 

2414 if ( 

2415 not isinstance(slice_or_indexer, np.ndarray) 

2416 or slice_or_indexer.dtype.kind != "i" 

2417 ): 

2418 dtype = getattr(slice_or_indexer, "dtype", None) 

2419 raise TypeError(type(slice_or_indexer), dtype) 

2420 

2421 indexer = ensure_platform_int(slice_or_indexer) 

2422 if not allow_fill: 

2423 indexer = maybe_convert_indices(indexer, length) 

2424 return "fancy", indexer, len(indexer) 

2425 

2426 

2427def _using_copy_on_write(): 

2428 return get_option("mode.copy_on_write")