Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/internals/blocks.py: 19%

946 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from functools import wraps 

4import re 

5from typing import ( 

6 TYPE_CHECKING, 

7 Any, 

8 Callable, 

9 Iterable, 

10 Sequence, 

11 cast, 

12 final, 

13) 

14import warnings 

15 

16import numpy as np 

17 

18from pandas._libs import ( 

19 Timestamp, 

20 internals as libinternals, 

21 lib, 

22 writers, 

23) 

24from pandas._libs.internals import BlockPlacement 

25from pandas._libs.tslibs import IncompatibleFrequency 

26from pandas._typing import ( 

27 ArrayLike, 

28 DtypeObj, 

29 F, 

30 IgnoreRaise, 

31 Shape, 

32 npt, 

33) 

34from pandas.errors import AbstractMethodError 

35from pandas.util._decorators import cache_readonly 

36from pandas.util._exceptions import find_stack_level 

37from pandas.util._validators import validate_bool_kwarg 

38 

39from pandas.core.dtypes.astype import astype_array_safe 

40from pandas.core.dtypes.cast import ( 

41 LossySetitemError, 

42 can_hold_element, 

43 find_result_type, 

44 maybe_downcast_to_dtype, 

45 np_can_hold_element, 

46 soft_convert_objects, 

47) 

48from pandas.core.dtypes.common import ( 

49 ensure_platform_int, 

50 is_1d_only_ea_dtype, 

51 is_1d_only_ea_obj, 

52 is_dtype_equal, 

53 is_interval_dtype, 

54 is_list_like, 

55 is_sparse, 

56 is_string_dtype, 

57) 

58from pandas.core.dtypes.dtypes import ( 

59 CategoricalDtype, 

60 ExtensionDtype, 

61 PandasDtype, 

62 PeriodDtype, 

63) 

64from pandas.core.dtypes.generic import ( 

65 ABCDataFrame, 

66 ABCIndex, 

67 ABCPandasArray, 

68 ABCSeries, 

69) 

70from pandas.core.dtypes.inference import is_inferred_bool_dtype 

71from pandas.core.dtypes.missing import ( 

72 is_valid_na_for_dtype, 

73 isna, 

74 na_value_for_dtype, 

75) 

76 

77import pandas.core.algorithms as algos 

78from pandas.core.array_algos.putmask import ( 

79 extract_bool_array, 

80 putmask_inplace, 

81 putmask_without_repeat, 

82 setitem_datetimelike_compat, 

83 validate_putmask, 

84) 

85from pandas.core.array_algos.quantile import quantile_compat 

86from pandas.core.array_algos.replace import ( 

87 compare_or_regex_search, 

88 replace_regex, 

89 should_use_regex, 

90) 

91from pandas.core.array_algos.transforms import shift 

92from pandas.core.arrays import ( 

93 Categorical, 

94 DatetimeArray, 

95 ExtensionArray, 

96 IntervalArray, 

97 PandasArray, 

98 PeriodArray, 

99 TimedeltaArray, 

100) 

101from pandas.core.arrays.sparse import SparseDtype 

102from pandas.core.base import PandasObject 

103import pandas.core.common as com 

104import pandas.core.computation.expressions as expressions 

105from pandas.core.construction import ( 

106 ensure_wrapped_if_datetimelike, 

107 extract_array, 

108) 

109from pandas.core.indexers import check_setitem_lengths 

110import pandas.core.missing as missing 

111 

112if TYPE_CHECKING: 112 ↛ 113line 112 didn't jump to line 113, because the condition on line 112 was never true

113 from pandas import ( 

114 Float64Index, 

115 Index, 

116 ) 

117 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray 

118 

119# comparison is faster than is_object_dtype 

120_dtype_obj = np.dtype("object") 

121 

122 

123def maybe_split(meth: F) -> F: 

124 """ 

125 If we have a multi-column block, split and operate block-wise. Otherwise 

126 use the original method. 

127 """ 

128 

129 @wraps(meth) 

130 def newfunc(self, *args, **kwargs) -> list[Block]: 

131 

132 if self.ndim == 1 or self.shape[0] == 1: 

133 return meth(self, *args, **kwargs) 

134 else: 

135 # Split and operate column-by-column 

136 return self.split_and_operate(meth, *args, **kwargs) 

137 

138 return cast(F, newfunc) 

139 

140 

141class Block(PandasObject): 

142 """ 

143 Canonical n-dimensional unit of homogeneous dtype contained in a pandas 

144 data structure 

145 

146 Index-ignorant; let the container take care of that 

147 """ 

148 

149 values: np.ndarray | ExtensionArray 

150 ndim: int 

151 __init__: Callable 

152 

153 __slots__ = () 

154 is_numeric = False 

155 is_object = False 

156 is_extension = False 

157 _can_consolidate = True 

158 _validate_ndim = True 

159 

160 @final 

161 @cache_readonly 

162 def _consolidate_key(self): 

163 return self._can_consolidate, self.dtype.name 

164 

165 @final 

166 @cache_readonly 

167 def _can_hold_na(self) -> bool: 

168 """ 

169 Can we store NA values in this Block? 

170 """ 

171 dtype = self.dtype 

172 if isinstance(dtype, np.dtype): 

173 return dtype.kind not in ["b", "i", "u"] 

174 return dtype._can_hold_na 

175 

176 @final 

177 @cache_readonly 

178 def is_categorical(self) -> bool: 

179 warnings.warn( 

180 "Block.is_categorical is deprecated and will be removed in a " 

181 "future version. Use isinstance(block.values, Categorical) " 

182 "instead. See https://github.com/pandas-dev/pandas/issues/40226", 

183 DeprecationWarning, 

184 stacklevel=find_stack_level(), 

185 ) 

186 return isinstance(self.values, Categorical) 

187 

188 @final 

189 @property 

190 def is_bool(self) -> bool: 

191 """ 

192 We can be bool if a) we are bool dtype or b) object dtype with bool objects. 

193 """ 

194 return is_inferred_bool_dtype(self.values) 

195 

196 @final 

197 def external_values(self): 

198 return external_values(self.values) 

199 

200 @final 

201 @cache_readonly 

202 def fill_value(self): 

203 # Used in reindex_indexer 

204 return na_value_for_dtype(self.dtype, compat=False) 

205 

206 @final 

207 def _standardize_fill_value(self, value): 

208 # if we are passed a scalar None, convert it here 

209 if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype): 

210 value = self.fill_value 

211 return value 

212 

213 @property 

214 def mgr_locs(self) -> BlockPlacement: 

215 return self._mgr_locs 

216 

217 @mgr_locs.setter 

218 def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None: 

219 self._mgr_locs = new_mgr_locs 

220 

221 @final 

222 def make_block(self, values, placement=None) -> Block: 

223 """ 

224 Create a new block, with type inference propagate any values that are 

225 not specified 

226 """ 

227 if placement is None: 

228 placement = self._mgr_locs 

229 if self.is_extension: 

230 values = ensure_block_shape(values, ndim=self.ndim) 

231 

232 # TODO: perf by not going through new_block 

233 # We assume maybe_coerce_values has already been called 

234 return new_block(values, placement=placement, ndim=self.ndim) 

235 

236 @final 

237 def make_block_same_class( 

238 self, values, placement: BlockPlacement | None = None 

239 ) -> Block: 

240 """Wrap given values in a block of same type as self.""" 

241 if placement is None: 

242 placement = self._mgr_locs 

243 

244 if values.dtype.kind in ["m", "M"]: 

245 

246 new_values = ensure_wrapped_if_datetimelike(values) 

247 if new_values is not values: 

248 # TODO(2.0): remove once fastparquet has stopped relying on it 

249 warnings.warn( 

250 "In a future version, Block.make_block_same_class will " 

251 "assume that datetime64 and timedelta64 ndarrays have " 

252 "already been cast to DatetimeArray and TimedeltaArray, " 

253 "respectively.", 

254 DeprecationWarning, 

255 stacklevel=find_stack_level(), 

256 ) 

257 values = new_values 

258 

259 # We assume maybe_coerce_values has already been called 

260 return type(self)(values, placement=placement, ndim=self.ndim) 

261 

262 @final 

263 def __repr__(self) -> str: 

264 # don't want to print out all of the items here 

265 name = type(self).__name__ 

266 if self.ndim == 1: 

267 result = f"{name}: {len(self)} dtype: {self.dtype}" 

268 else: 

269 

270 shape = " x ".join([str(s) for s in self.shape]) 

271 result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}" 

272 

273 return result 

274 

275 @final 

276 def __len__(self) -> int: 

277 return len(self.values) 

278 

279 @final 

280 def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block: 

281 """ 

282 Perform __getitem__-like, return result as block. 

283 

284 Only supports slices that preserve dimensionality. 

285 """ 

286 # Note: the only place where we are called with ndarray[intp] 

287 # is from internals.concat, and we can verify that never happens 

288 # with 1-column blocks, i.e. never for ExtensionBlock. 

289 

290 # Invalid index type "Union[slice, ndarray[Any, dtype[signedinteger[Any]]]]" 

291 # for "BlockPlacement"; expected type "Union[slice, Sequence[int]]" 

292 new_mgr_locs = self._mgr_locs[slicer] # type: ignore[index] 

293 

294 new_values = self._slice(slicer) 

295 

296 if new_values.ndim != self.values.ndim: 

297 raise ValueError("Only same dim slicing is allowed") 

298 

299 return type(self)(new_values, new_mgr_locs, self.ndim) 

300 

301 @final 

302 def getitem_block_columns( 

303 self, slicer: slice, new_mgr_locs: BlockPlacement 

304 ) -> Block: 

305 """ 

306 Perform __getitem__-like, return result as block. 

307 

308 Only supports slices that preserve dimensionality. 

309 """ 

310 new_values = self._slice(slicer) 

311 

312 if new_values.ndim != self.values.ndim: 

313 raise ValueError("Only same dim slicing is allowed") 

314 

315 return type(self)(new_values, new_mgr_locs, self.ndim) 

316 

317 @final 

318 def _can_hold_element(self, element: Any) -> bool: 

319 """require the same dtype as ourselves""" 

320 element = extract_array(element, extract_numpy=True) 

321 return can_hold_element(self.values, element) 

322 

323 @final 

324 def should_store(self, value: ArrayLike) -> bool: 

325 """ 

326 Should we set self.values[indexer] = value inplace or do we need to cast? 

327 

328 Parameters 

329 ---------- 

330 value : np.ndarray or ExtensionArray 

331 

332 Returns 

333 ------- 

334 bool 

335 """ 

336 # faster equivalent to is_dtype_equal(value.dtype, self.dtype) 

337 try: 

338 return value.dtype == self.dtype 

339 except TypeError: 

340 return False 

341 

342 # --------------------------------------------------------------------- 

343 # Apply/Reduce and Helpers 

344 

345 @final 

346 def apply(self, func, **kwargs) -> list[Block]: 

347 """ 

348 apply the function to my values; return a block if we are not 

349 one 

350 """ 

351 result = func(self.values, **kwargs) 

352 

353 return self._split_op_result(result) 

354 

355 def reduce(self, func, ignore_failures: bool = False) -> list[Block]: 

356 # We will apply the function and reshape the result into a single-row 

357 # Block with the same mgr_locs; squeezing will be done at a higher level 

358 assert self.ndim == 2 

359 

360 try: 

361 result = func(self.values) 

362 except (TypeError, NotImplementedError): 

363 if ignore_failures: 

364 return [] 

365 raise 

366 

367 if self.values.ndim == 1: 

368 # TODO(EA2D): special case not needed with 2D EAs 

369 res_values = np.array([[result]]) 

370 else: 

371 res_values = result.reshape(-1, 1) 

372 

373 nb = self.make_block(res_values) 

374 return [nb] 

375 

376 @final 

377 def _split_op_result(self, result: ArrayLike) -> list[Block]: 

378 # See also: split_and_operate 

379 if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype): 

380 # TODO(EA2D): unnecessary with 2D EAs 

381 # if we get a 2D ExtensionArray, we need to split it into 1D pieces 

382 nbs = [] 

383 for i, loc in enumerate(self._mgr_locs): 

384 if not is_1d_only_ea_obj(result): 

385 vals = result[i : i + 1] 

386 else: 

387 vals = result[i] 

388 

389 block = self.make_block(values=vals, placement=loc) 

390 nbs.append(block) 

391 return nbs 

392 

393 nb = self.make_block(result) 

394 

395 return [nb] 

396 

397 @final 

398 def _split(self) -> list[Block]: 

399 """ 

400 Split a block into a list of single-column blocks. 

401 """ 

402 assert self.ndim == 2 

403 

404 new_blocks = [] 

405 for i, ref_loc in enumerate(self._mgr_locs): 

406 vals = self.values[slice(i, i + 1)] 

407 

408 bp = BlockPlacement(ref_loc) 

409 nb = type(self)(vals, placement=bp, ndim=2) 

410 new_blocks.append(nb) 

411 return new_blocks 

412 

413 @final 

414 def split_and_operate(self, func, *args, **kwargs) -> list[Block]: 

415 """ 

416 Split the block and apply func column-by-column. 

417 

418 Parameters 

419 ---------- 

420 func : Block method 

421 *args 

422 **kwargs 

423 

424 Returns 

425 ------- 

426 List[Block] 

427 """ 

428 assert self.ndim == 2 and self.shape[0] != 1 

429 

430 res_blocks = [] 

431 for nb in self._split(): 

432 rbs = func(nb, *args, **kwargs) 

433 res_blocks.extend(rbs) 

434 return res_blocks 

435 

436 # --------------------------------------------------------------------- 

437 # Up/Down-casting 

438 

439 @final 

440 def coerce_to_target_dtype(self, other) -> Block: 

441 """ 

442 coerce the current block to a dtype compat for other 

443 we will return a block, possibly object, and not raise 

444 

445 we can also safely try to coerce to the same dtype 

446 and will receive the same block 

447 """ 

448 new_dtype = find_result_type(self.values, other) 

449 

450 return self.astype(new_dtype, copy=False) 

451 

452 @final 

453 def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]: 

454 if downcast is False: 

455 return blocks 

456 

457 if self.dtype == _dtype_obj: 

458 # GH#44241 We downcast regardless of the argument; 

459 # respecting 'downcast=None' may be worthwhile at some point, 

460 # but ATM it breaks too much existing code. 

461 # split and convert the blocks 

462 

463 return extend_blocks( 

464 [blk.convert(datetime=True, numeric=False) for blk in blocks] 

465 ) 

466 

467 if downcast is None: 

468 return blocks 

469 

470 return extend_blocks([b._downcast_2d(downcast) for b in blocks]) 

471 

472 @final 

473 @maybe_split 

474 def _downcast_2d(self, dtype) -> list[Block]: 

475 """ 

476 downcast specialized to 2D case post-validation. 

477 

478 Refactored to allow use of maybe_split. 

479 """ 

480 new_values = maybe_downcast_to_dtype(self.values, dtype=dtype) 

481 return [self.make_block(new_values)] 

482 

483 def convert( 

484 self, 

485 copy: bool = True, 

486 datetime: bool = True, 

487 numeric: bool = True, 

488 timedelta: bool = True, 

489 ) -> list[Block]: 

490 """ 

491 attempt to coerce any object types to better types return a copy 

492 of the block (if copy = True) by definition we are not an ObjectBlock 

493 here! 

494 """ 

495 return [self.copy()] if copy else [self] 

496 

497 # --------------------------------------------------------------------- 

498 # Array-Like Methods 

499 

500 @cache_readonly 

501 def dtype(self) -> DtypeObj: 

502 return self.values.dtype 

503 

504 @final 

505 def astype( 

506 self, dtype: DtypeObj, copy: bool = False, errors: IgnoreRaise = "raise" 

507 ) -> Block: 

508 """ 

509 Coerce to the new dtype. 

510 

511 Parameters 

512 ---------- 

513 dtype : np.dtype or ExtensionDtype 

514 copy : bool, default False 

515 copy if indicated 

516 errors : str, {'raise', 'ignore'}, default 'raise' 

517 - ``raise`` : allow exceptions to be raised 

518 - ``ignore`` : suppress exceptions. On error return original object 

519 

520 Returns 

521 ------- 

522 Block 

523 """ 

524 values = self.values 

525 

526 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) 

527 

528 new_values = maybe_coerce_values(new_values) 

529 newb = self.make_block(new_values) 

530 if newb.shape != self.shape: 

531 raise TypeError( 

532 f"cannot set astype for copy = [{copy}] for dtype " 

533 f"({self.dtype.name} [{self.shape}]) to different shape " 

534 f"({newb.dtype.name} [{newb.shape}])" 

535 ) 

536 return newb 

537 

538 @final 

539 def to_native_types(self, na_rep="nan", quoting=None, **kwargs) -> Block: 

540 """convert to our native types format""" 

541 result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs) 

542 return self.make_block(result) 

543 

544 @final 

545 def copy(self, deep: bool = True) -> Block: 

546 """copy constructor""" 

547 values = self.values 

548 if deep: 

549 values = values.copy() 

550 return type(self)(values, placement=self._mgr_locs, ndim=self.ndim) 

551 

552 # --------------------------------------------------------------------- 

553 # Replace 

554 

555 @final 

556 def replace( 

557 self, 

558 to_replace, 

559 value, 

560 inplace: bool = False, 

561 # mask may be pre-computed if we're called from replace_list 

562 mask: npt.NDArray[np.bool_] | None = None, 

563 ) -> list[Block]: 

564 """ 

565 replace the to_replace value with value, possible to create new 

566 blocks here this is just a call to putmask. 

567 """ 

568 

569 # Note: the checks we do in NDFrame.replace ensure we never get 

570 # here with listlike to_replace or value, as those cases 

571 # go through replace_list 

572 values = self.values 

573 

574 if isinstance(values, Categorical): 

575 # TODO: avoid special-casing 

576 blk = self if inplace else self.copy() 

577 # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], 

578 # ExtensionArray]" has no attribute "_replace" 

579 blk.values._replace( # type: ignore[union-attr] 

580 to_replace=to_replace, value=value, inplace=True 

581 ) 

582 return [blk] 

583 

584 if not self._can_hold_element(to_replace): 

585 # We cannot hold `to_replace`, so we know immediately that 

586 # replacing it is a no-op. 

587 # Note: If to_replace were a list, NDFrame.replace would call 

588 # replace_list instead of replace. 

589 return [self] if inplace else [self.copy()] 

590 

591 if mask is None: 

592 mask = missing.mask_missing(values, to_replace) 

593 if not mask.any(): 

594 # Note: we get here with test_replace_extension_other incorrectly 

595 # bc _can_hold_element is incorrect. 

596 return [self] if inplace else [self.copy()] 

597 

598 elif self._can_hold_element(value): 

599 blk = self if inplace else self.copy() 

600 putmask_inplace(blk.values, mask, value) 

601 if not (self.is_object and value is None): 

602 # if the user *explicitly* gave None, we keep None, otherwise 

603 # may downcast to NaN 

604 blocks = blk.convert(numeric=False, copy=False) 

605 else: 

606 blocks = [blk] 

607 return blocks 

608 

609 elif self.ndim == 1 or self.shape[0] == 1: 

610 if value is None: 

611 blk = self.astype(np.dtype(object)) 

612 else: 

613 blk = self.coerce_to_target_dtype(value) 

614 return blk.replace( 

615 to_replace=to_replace, 

616 value=value, 

617 inplace=True, 

618 mask=mask, 

619 ) 

620 

621 else: 

622 # split so that we only upcast where necessary 

623 blocks = [] 

624 for i, nb in enumerate(self._split()): 

625 blocks.extend( 

626 type(self).replace( 

627 nb, 

628 to_replace=to_replace, 

629 value=value, 

630 inplace=True, 

631 mask=mask[i : i + 1], 

632 ) 

633 ) 

634 return blocks 

635 

636 @final 

637 def _replace_regex( 

638 self, 

639 to_replace, 

640 value, 

641 inplace: bool = False, 

642 convert: bool = True, 

643 mask=None, 

644 ) -> list[Block]: 

645 """ 

646 Replace elements by the given value. 

647 

648 Parameters 

649 ---------- 

650 to_replace : object or pattern 

651 Scalar to replace or regular expression to match. 

652 value : object 

653 Replacement object. 

654 inplace : bool, default False 

655 Perform inplace modification. 

656 convert : bool, default True 

657 If true, try to coerce any object types to better types. 

658 mask : array-like of bool, optional 

659 True indicate corresponding element is ignored. 

660 

661 Returns 

662 ------- 

663 List[Block] 

664 """ 

665 if not self._can_hold_element(to_replace): 

666 # i.e. only ObjectBlock, but could in principle include a 

667 # String ExtensionBlock 

668 return [self] if inplace else [self.copy()] 

669 

670 rx = re.compile(to_replace) 

671 

672 new_values = self.values if inplace else self.values.copy() 

673 replace_regex(new_values, rx, value, mask) 

674 

675 block = self.make_block(new_values) 

676 return block.convert(numeric=False, copy=False) 

677 

678 @final 

679 def replace_list( 

680 self, 

681 src_list: Iterable[Any], 

682 dest_list: Sequence[Any], 

683 inplace: bool = False, 

684 regex: bool = False, 

685 ) -> list[Block]: 

686 """ 

687 See BlockManager.replace_list docstring. 

688 """ 

689 values = self.values 

690 

691 # Exclude anything that we know we won't contain 

692 pairs = [ 

693 (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x) 

694 ] 

695 if not len(pairs): 

696 # shortcut, nothing to replace 

697 return [self] if inplace else [self.copy()] 

698 

699 src_len = len(pairs) - 1 

700 

701 if is_string_dtype(values.dtype): 

702 # Calculate the mask once, prior to the call of comp 

703 # in order to avoid repeating the same computations 

704 mask = ~isna(values) 

705 masks = [ 

706 compare_or_regex_search(values, s[0], regex=regex, mask=mask) 

707 for s in pairs 

708 ] 

709 else: 

710 # GH#38086 faster if we know we dont need to check for regex 

711 masks = [missing.mask_missing(values, s[0]) for s in pairs] 

712 

713 # error: Argument 1 to "extract_bool_array" has incompatible type 

714 # "Union[ExtensionArray, ndarray, bool]"; expected "Union[ExtensionArray, 

715 # ndarray]" 

716 masks = [extract_bool_array(x) for x in masks] # type: ignore[arg-type] 

717 

718 rb = [self if inplace else self.copy()] 

719 for i, (src, dest) in enumerate(pairs): 

720 convert = i == src_len # only convert once at the end 

721 new_rb: list[Block] = [] 

722 

723 # GH-39338: _replace_coerce can split a block into 

724 # single-column blocks, so track the index so we know 

725 # where to index into the mask 

726 for blk_num, blk in enumerate(rb): 

727 if len(rb) == 1: 

728 m = masks[i] 

729 else: 

730 mib = masks[i] 

731 assert not isinstance(mib, bool) 

732 m = mib[blk_num : blk_num + 1] 

733 

734 # error: Argument "mask" to "_replace_coerce" of "Block" has 

735 # incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]"; 

736 # expected "ndarray[Any, dtype[bool_]]" 

737 result = blk._replace_coerce( 

738 to_replace=src, 

739 value=dest, 

740 mask=m, # type: ignore[arg-type] 

741 inplace=inplace, 

742 regex=regex, 

743 ) 

744 if convert and blk.is_object and not all(x is None for x in dest_list): 

745 # GH#44498 avoid unwanted cast-back 

746 result = extend_blocks( 

747 [b.convert(numeric=False, copy=True) for b in result] 

748 ) 

749 new_rb.extend(result) 

750 rb = new_rb 

751 return rb 

752 

753 @final 

754 def _replace_coerce( 

755 self, 

756 to_replace, 

757 value, 

758 mask: npt.NDArray[np.bool_], 

759 inplace: bool = True, 

760 regex: bool = False, 

761 ) -> list[Block]: 

762 """ 

763 Replace value corresponding to the given boolean array with another 

764 value. 

765 

766 Parameters 

767 ---------- 

768 to_replace : object or pattern 

769 Scalar to replace or regular expression to match. 

770 value : object 

771 Replacement object. 

772 mask : np.ndarray[bool] 

773 True indicate corresponding element is ignored. 

774 inplace : bool, default True 

775 Perform inplace modification. 

776 regex : bool, default False 

777 If true, perform regular expression substitution. 

778 

779 Returns 

780 ------- 

781 List[Block] 

782 """ 

783 if should_use_regex(regex, to_replace): 

784 return self._replace_regex( 

785 to_replace, 

786 value, 

787 inplace=inplace, 

788 convert=False, 

789 mask=mask, 

790 ) 

791 else: 

792 if value is None: 

793 # gh-45601, gh-45836, gh-46634 

794 if mask.any(): 

795 nb = self.astype(np.dtype(object), copy=False) 

796 if nb is self and not inplace: 

797 nb = nb.copy() 

798 putmask_inplace(nb.values, mask, value) 

799 return [nb] 

800 return [self] if inplace else [self.copy()] 

801 return self.replace( 

802 to_replace=to_replace, value=value, inplace=inplace, mask=mask 

803 ) 

804 

805 # --------------------------------------------------------------------- 

806 # 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock 

807 # but not ExtensionBlock 

808 

809 def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray: 

810 """ 

811 For compatibility with 1D-only ExtensionArrays. 

812 """ 

813 return arg 

814 

815 def _unwrap_setitem_indexer(self, indexer): 

816 """ 

817 For compatibility with 1D-only ExtensionArrays. 

818 """ 

819 return indexer 

820 

821 # NB: this cannot be made cache_readonly because in mgr.set_values we pin 

822 # new .values that can have different shape GH#42631 

823 @property 

824 def shape(self) -> Shape: 

825 return self.values.shape 

826 

827 def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray: 

828 # In the case where we have a tuple[slice, int], the slice will always 

829 # be slice(None) 

830 # Note: only reached with self.ndim == 2 

831 # Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]" 

832 # for "Union[ndarray[Any, Any], ExtensionArray]"; expected type 

833 # "Union[int, integer[Any]]" 

834 return self.values[i] # type: ignore[index] 

835 

836 def _slice( 

837 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp] 

838 ) -> ArrayLike: 

839 """return a slice of my values""" 

840 

841 return self.values[slicer] 

842 

843 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None: 

844 """ 

845 Modify block values in-place with new item value. 

846 

847 If copy=True, first copy the underlying values in place before modifying 

848 (for Copy-on-Write). 

849 

850 Notes 

851 ----- 

852 `set_inplace` never creates a new array or new Block, whereas `setitem` 

853 _may_ create a new array and always creates a new Block. 

854 

855 Caller is responsible for checking values.dtype == self.dtype. 

856 """ 

857 if copy: 

858 self.values = self.values.copy() 

859 self.values[locs] = values 

860 

861 def take_nd( 

862 self, 

863 indexer: npt.NDArray[np.intp], 

864 axis: int, 

865 new_mgr_locs: BlockPlacement | None = None, 

866 fill_value=lib.no_default, 

867 ) -> Block: 

868 """ 

869 Take values according to indexer and return them as a block. 

870 """ 

871 values = self.values 

872 

873 if fill_value is lib.no_default: 

874 fill_value = self.fill_value 

875 allow_fill = False 

876 else: 

877 allow_fill = True 

878 

879 # Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype 

880 new_values = algos.take_nd( 

881 values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value 

882 ) 

883 

884 # Called from three places in managers, all of which satisfy 

885 # this assertion 

886 assert not (axis == 0 and new_mgr_locs is None) 

887 if new_mgr_locs is None: 

888 new_mgr_locs = self._mgr_locs 

889 

890 if not is_dtype_equal(new_values.dtype, self.dtype): 

891 return self.make_block(new_values, new_mgr_locs) 

892 else: 

893 return self.make_block_same_class(new_values, new_mgr_locs) 

894 

895 def _unstack( 

896 self, 

897 unstacker, 

898 fill_value, 

899 new_placement: npt.NDArray[np.intp], 

900 needs_masking: npt.NDArray[np.bool_], 

901 ): 

902 """ 

903 Return a list of unstacked blocks of self 

904 

905 Parameters 

906 ---------- 

907 unstacker : reshape._Unstacker 

908 fill_value : int 

909 Only used in ExtensionBlock._unstack 

910 new_placement : np.ndarray[np.intp] 

911 allow_fill : bool 

912 needs_masking : np.ndarray[bool] 

913 

914 Returns 

915 ------- 

916 blocks : list of Block 

917 New blocks of unstacked values. 

918 mask : array-like of bool 

919 The mask of columns of `blocks` we should keep. 

920 """ 

921 new_values, mask = unstacker.get_new_values( 

922 self.values.T, fill_value=fill_value 

923 ) 

924 

925 mask = mask.any(0) 

926 # TODO: in all tests we have mask.all(); can we rely on that? 

927 

928 # Note: these next two lines ensure that 

929 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks) 

930 # which the calling function needs in order to pass verify_integrity=False 

931 # to the BlockManager constructor 

932 new_values = new_values.T[mask] 

933 new_placement = new_placement[mask] 

934 

935 bp = BlockPlacement(new_placement) 

936 blocks = [new_block_2d(new_values, placement=bp)] 

937 return blocks, mask 

938 

939 # --------------------------------------------------------------------- 

940 

941 def setitem(self, indexer, value) -> Block: 

942 """ 

943 Attempt self.values[indexer] = value, possibly creating a new array. 

944 

945 Parameters 

946 ---------- 

947 indexer : tuple, list-like, array-like, slice, int 

948 The subset of self.values to set 

949 value : object 

950 The value being set 

951 

952 Returns 

953 ------- 

954 Block 

955 

956 Notes 

957 ----- 

958 `indexer` is a direct slice/positional indexer. `value` must 

959 be a compatible shape. 

960 """ 

961 

962 value = self._standardize_fill_value(value) 

963 

964 values = cast(np.ndarray, self.values) 

965 if self.ndim == 2: 

966 values = values.T 

967 

968 # length checking 

969 check_setitem_lengths(indexer, value, values) 

970 

971 value = extract_array(value, extract_numpy=True) 

972 try: 

973 casted = np_can_hold_element(values.dtype, value) 

974 except LossySetitemError: 

975 # current dtype cannot store value, coerce to common dtype 

976 nb = self.coerce_to_target_dtype(value) 

977 return nb.setitem(indexer, value) 

978 else: 

979 if self.dtype == _dtype_obj: 

980 # TODO: avoid having to construct values[indexer] 

981 vi = values[indexer] 

982 if lib.is_list_like(vi): 

983 # checking lib.is_scalar here fails on 

984 # test_iloc_setitem_custom_object 

985 casted = setitem_datetimelike_compat(values, len(vi), casted) 

986 values[indexer] = casted 

987 return self 

988 

989 def putmask(self, mask, new) -> list[Block]: 

990 """ 

991 putmask the data to the block; it is possible that we may create a 

992 new dtype of block 

993 

994 Return the resulting block(s). 

995 

996 Parameters 

997 ---------- 

998 mask : np.ndarray[bool], SparseArray[bool], or BooleanArray 

999 new : a ndarray/object 

1000 

1001 Returns 

1002 ------- 

1003 List[Block] 

1004 """ 

1005 orig_mask = mask 

1006 values = cast(np.ndarray, self.values) 

1007 mask, noop = validate_putmask(values.T, mask) 

1008 assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame)) 

1009 

1010 if new is lib.no_default: 

1011 new = self.fill_value 

1012 

1013 new = self._standardize_fill_value(new) 

1014 new = extract_array(new, extract_numpy=True) 

1015 

1016 if noop: 

1017 return [self] 

1018 

1019 try: 

1020 casted = np_can_hold_element(values.dtype, new) 

1021 putmask_without_repeat(values.T, mask, casted) 

1022 return [self] 

1023 except LossySetitemError: 

1024 

1025 if self.ndim == 1 or self.shape[0] == 1: 

1026 # no need to split columns 

1027 

1028 if not is_list_like(new): 

1029 # using just new[indexer] can't save us the need to cast 

1030 return self.coerce_to_target_dtype(new).putmask(mask, new) 

1031 else: 

1032 indexer = mask.nonzero()[0] 

1033 nb = self.setitem(indexer, new[indexer]) 

1034 return [nb] 

1035 

1036 else: 

1037 is_array = isinstance(new, np.ndarray) 

1038 

1039 res_blocks = [] 

1040 nbs = self._split() 

1041 for i, nb in enumerate(nbs): 

1042 n = new 

1043 if is_array: 

1044 # we have a different value per-column 

1045 n = new[:, i : i + 1] 

1046 

1047 submask = orig_mask[:, i : i + 1] 

1048 rbs = nb.putmask(submask, n) 

1049 res_blocks.extend(rbs) 

1050 return res_blocks 

1051 

1052 def where(self, other, cond, _downcast="infer") -> list[Block]: 

1053 """ 

1054 evaluate the block; return result block(s) from the result 

1055 

1056 Parameters 

1057 ---------- 

1058 other : a ndarray/object 

1059 cond : np.ndarray[bool], SparseArray[bool], or BooleanArray 

1060 _downcast : str or None, default "infer" 

1061 Private because we only specify it when calling from fillna. 

1062 

1063 Returns 

1064 ------- 

1065 List[Block] 

1066 """ 

1067 assert cond.ndim == self.ndim 

1068 assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame)) 

1069 

1070 transpose = self.ndim == 2 

1071 

1072 cond = extract_bool_array(cond) 

1073 

1074 # EABlocks override where 

1075 values = cast(np.ndarray, self.values) 

1076 orig_other = other 

1077 if transpose: 

1078 values = values.T 

1079 

1080 icond, noop = validate_putmask(values, ~cond) 

1081 if noop: 

1082 # GH-39595: Always return a copy; short-circuit up/downcasting 

1083 return [self.copy()] 

1084 

1085 if other is lib.no_default: 

1086 other = self.fill_value 

1087 

1088 other = self._standardize_fill_value(other) 

1089 

1090 try: 

1091 # try/except here is equivalent to a self._can_hold_element check, 

1092 # but this gets us back 'casted' which we will re-use below; 

1093 # without using 'casted', expressions.where may do unwanted upcasts. 

1094 casted = np_can_hold_element(values.dtype, other) 

1095 except (ValueError, TypeError, LossySetitemError): 

1096 # we cannot coerce, return a compat dtype 

1097 

1098 if self.ndim == 1 or self.shape[0] == 1: 

1099 # no need to split columns 

1100 

1101 block = self.coerce_to_target_dtype(other) 

1102 blocks = block.where(orig_other, cond) 

1103 return self._maybe_downcast(blocks, downcast=_downcast) 

1104 

1105 else: 

1106 # since _maybe_downcast would split blocks anyway, we 

1107 # can avoid some potential upcast/downcast by splitting 

1108 # on the front end. 

1109 is_array = isinstance(other, (np.ndarray, ExtensionArray)) 

1110 

1111 res_blocks = [] 

1112 nbs = self._split() 

1113 for i, nb in enumerate(nbs): 

1114 oth = other 

1115 if is_array: 

1116 # we have a different value per-column 

1117 oth = other[:, i : i + 1] 

1118 

1119 submask = cond[:, i : i + 1] 

1120 rbs = nb.where(oth, submask, _downcast=_downcast) 

1121 res_blocks.extend(rbs) 

1122 return res_blocks 

1123 

1124 else: 

1125 other = casted 

1126 alt = setitem_datetimelike_compat(values, icond.sum(), other) 

1127 if alt is not other: 

1128 if is_list_like(other) and len(other) < len(values): 

1129 # call np.where with other to get the appropriate ValueError 

1130 np.where(~icond, values, other) 

1131 raise NotImplementedError( 

1132 "This should not be reached; call to np.where above is " 

1133 "expected to raise ValueError. Please report a bug at " 

1134 "github.com/pandas-dev/pandas" 

1135 ) 

1136 result = values.copy() 

1137 np.putmask(result, icond, alt) 

1138 else: 

1139 # By the time we get here, we should have all Series/Index 

1140 # args extracted to ndarray 

1141 if ( 

1142 is_list_like(other) 

1143 and not isinstance(other, np.ndarray) 

1144 and len(other) == self.shape[-1] 

1145 ): 

1146 # If we don't do this broadcasting here, then expressions.where 

1147 # will broadcast a 1D other to be row-like instead of 

1148 # column-like. 

1149 other = np.array(other).reshape(values.shape) 

1150 # If lengths don't match (or len(other)==1), we will raise 

1151 # inside expressions.where, see test_series_where 

1152 

1153 # Note: expressions.where may upcast. 

1154 result = expressions.where(~icond, values, other) 

1155 # The np_can_hold_element check _should_ ensure that we always 

1156 # have result.dtype == self.dtype here. 

1157 

1158 if transpose: 

1159 result = result.T 

1160 

1161 return [self.make_block(result)] 

1162 

1163 def fillna( 

1164 self, value, limit: int | None = None, inplace: bool = False, downcast=None 

1165 ) -> list[Block]: 

1166 """ 

1167 fillna on the block with the value. If we fail, then convert to 

1168 ObjectBlock and try again 

1169 """ 

1170 # Caller is responsible for validating limit; if int it is strictly positive 

1171 inplace = validate_bool_kwarg(inplace, "inplace") 

1172 

1173 if not self._can_hold_na: 

1174 # can short-circuit the isna call 

1175 noop = True 

1176 else: 

1177 mask = isna(self.values) 

1178 mask, noop = validate_putmask(self.values, mask) 

1179 

1180 if noop: 

1181 # we can't process the value, but nothing to do 

1182 if inplace: 

1183 # Arbitrarily imposing the convention that we ignore downcast 

1184 # on no-op when inplace=True 

1185 return [self] 

1186 else: 

1187 # GH#45423 consistent downcasting on no-ops. 

1188 nb = self.copy() 

1189 nbs = nb._maybe_downcast([nb], downcast=downcast) 

1190 return nbs 

1191 

1192 if limit is not None: 

1193 mask[mask.cumsum(self.ndim - 1) > limit] = False 

1194 

1195 if inplace: 

1196 nbs = self.putmask(mask.T, value) 

1197 else: 

1198 # without _downcast, we would break 

1199 # test_fillna_dtype_conversion_equiv_replace 

1200 nbs = self.where(value, ~mask.T, _downcast=False) 

1201 

1202 # Note: blk._maybe_downcast vs self._maybe_downcast(nbs) 

1203 # makes a difference bc blk may have object dtype, which has 

1204 # different behavior in _maybe_downcast. 

1205 return extend_blocks( 

1206 [blk._maybe_downcast([blk], downcast=downcast) for blk in nbs] 

1207 ) 

1208 

1209 def interpolate( 

1210 self, 

1211 method: str = "pad", 

1212 axis: int = 0, 

1213 index: Index | None = None, 

1214 inplace: bool = False, 

1215 limit: int | None = None, 

1216 limit_direction: str = "forward", 

1217 limit_area: str | None = None, 

1218 fill_value: Any | None = None, 

1219 downcast: str | None = None, 

1220 **kwargs, 

1221 ) -> list[Block]: 

1222 

1223 inplace = validate_bool_kwarg(inplace, "inplace") 

1224 

1225 if not self._can_hold_na: 

1226 # If there are no NAs, then interpolate is a no-op 

1227 return [self] if inplace else [self.copy()] 

1228 

1229 try: 

1230 m = missing.clean_fill_method(method) 

1231 except ValueError: 

1232 m = None 

1233 if m is None and self.dtype.kind != "f": 

1234 # only deal with floats 

1235 # bc we already checked that can_hold_na, we dont have int dtype here 

1236 # test_interp_basic checks that we make a copy here 

1237 return [self] if inplace else [self.copy()] 

1238 

1239 if self.is_object and self.ndim == 2 and self.shape[0] != 1 and axis == 0: 

1240 # split improves performance in ndarray.copy() 

1241 return self.split_and_operate( 

1242 type(self).interpolate, 

1243 method, 

1244 axis, 

1245 index, 

1246 inplace, 

1247 limit, 

1248 limit_direction, 

1249 limit_area, 

1250 fill_value, 

1251 downcast, 

1252 **kwargs, 

1253 ) 

1254 

1255 data = self.values if inplace else self.values.copy() 

1256 data = cast(np.ndarray, data) # bc overridden by ExtensionBlock 

1257 

1258 missing.interpolate_array_2d( 

1259 data, 

1260 method=method, 

1261 axis=axis, 

1262 index=index, 

1263 limit=limit, 

1264 limit_direction=limit_direction, 

1265 limit_area=limit_area, 

1266 fill_value=fill_value, 

1267 **kwargs, 

1268 ) 

1269 

1270 nb = self.make_block_same_class(data) 

1271 return nb._maybe_downcast([nb], downcast) 

1272 

1273 def diff(self, n: int, axis: int = 1) -> list[Block]: 

1274 """return block for the diff of the values""" 

1275 new_values = algos.diff(self.values, n, axis=axis) 

1276 return [self.make_block(values=new_values)] 

1277 

1278 def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: 

1279 """shift the block by periods, possibly upcast""" 

1280 # convert integer to float if necessary. need to do a lot more than 

1281 # that, handle boolean etc also 

1282 

1283 # Note: periods is never 0 here, as that is handled at the top of 

1284 # NDFrame.shift. If that ever changes, we can do a check for periods=0 

1285 # and possibly avoid coercing. 

1286 

1287 if not lib.is_scalar(fill_value) and self.dtype != _dtype_obj: 

1288 # with object dtype there is nothing to promote, and the user can 

1289 # pass pretty much any weird fill_value they like 

1290 # see test_shift_object_non_scalar_fill 

1291 raise ValueError("fill_value must be a scalar") 

1292 

1293 fill_value = self._standardize_fill_value(fill_value) 

1294 

1295 try: 

1296 # error: Argument 1 to "np_can_hold_element" has incompatible type 

1297 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" 

1298 casted = np_can_hold_element( 

1299 self.dtype, fill_value # type: ignore[arg-type] 

1300 ) 

1301 except LossySetitemError: 

1302 nb = self.coerce_to_target_dtype(fill_value) 

1303 return nb.shift(periods, axis=axis, fill_value=fill_value) 

1304 

1305 else: 

1306 values = cast(np.ndarray, self.values) 

1307 new_values = shift(values, periods, axis, casted) 

1308 return [self.make_block(new_values)] 

1309 

1310 @final 

1311 def quantile( 

1312 self, qs: Float64Index, interpolation="linear", axis: int = 0 

1313 ) -> Block: 

1314 """ 

1315 compute the quantiles of the 

1316 

1317 Parameters 

1318 ---------- 

1319 qs : Float64Index 

1320 List of the quantiles to be computed. 

1321 interpolation : str, default 'linear' 

1322 Type of interpolation. 

1323 axis : int, default 0 

1324 Axis to compute. 

1325 

1326 Returns 

1327 ------- 

1328 Block 

1329 """ 

1330 # We should always have ndim == 2 because Series dispatches to DataFrame 

1331 assert self.ndim == 2 

1332 assert axis == 1 # only ever called this way 

1333 assert is_list_like(qs) # caller is responsible for this 

1334 

1335 result = quantile_compat(self.values, np.asarray(qs._values), interpolation) 

1336 # ensure_block_shape needed for cases where we start with EA and result 

1337 # is ndarray, e.g. IntegerArray, SparseArray 

1338 result = ensure_block_shape(result, ndim=2) 

1339 return new_block_2d(result, placement=self._mgr_locs) 

1340 

1341 # --------------------------------------------------------------------- 

1342 # Abstract Methods Overridden By EABackedBlock and NumpyBlock 

1343 

1344 def delete(self, loc) -> Block: 

1345 """ 

1346 Return a new Block with the given loc(s) deleted. 

1347 """ 

1348 raise AbstractMethodError(self) 

1349 

1350 @property 

1351 def is_view(self) -> bool: 

1352 """return a boolean if I am possibly a view""" 

1353 raise AbstractMethodError(self) 

1354 

1355 @property 

1356 def array_values(self) -> ExtensionArray: 

1357 """ 

1358 The array that Series.array returns. Always an ExtensionArray. 

1359 """ 

1360 raise AbstractMethodError(self) 

1361 

1362 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: 

1363 """ 

1364 return an internal format, currently just the ndarray 

1365 this is often overridden to handle to_dense like operations 

1366 """ 

1367 raise AbstractMethodError(self) 

1368 

1369 def values_for_json(self) -> np.ndarray: 

1370 raise AbstractMethodError(self) 

1371 

1372 

1373class EABackedBlock(Block): 

1374 """ 

1375 Mixin for Block subclasses backed by ExtensionArray. 

1376 """ 

1377 

1378 values: ExtensionArray 

1379 

1380 def setitem(self, indexer, value): 

1381 """ 

1382 Attempt self.values[indexer] = value, possibly creating a new array. 

1383 

1384 This differs from Block.setitem by not allowing setitem to change 

1385 the dtype of the Block. 

1386 

1387 Parameters 

1388 ---------- 

1389 indexer : tuple, list-like, array-like, slice, int 

1390 The subset of self.values to set 

1391 value : object 

1392 The value being set 

1393 

1394 Returns 

1395 ------- 

1396 Block 

1397 

1398 Notes 

1399 ----- 

1400 `indexer` is a direct slice/positional indexer. `value` must 

1401 be a compatible shape. 

1402 """ 

1403 orig_indexer = indexer 

1404 orig_value = value 

1405 

1406 indexer = self._unwrap_setitem_indexer(indexer) 

1407 value = self._maybe_squeeze_arg(value) 

1408 

1409 values = self.values 

1410 if values.ndim == 2: 

1411 # TODO(GH#45419): string[pyarrow] tests break if we transpose 

1412 # unconditionally 

1413 values = values.T 

1414 check_setitem_lengths(indexer, value, values) 

1415 

1416 try: 

1417 values[indexer] = value 

1418 except (ValueError, TypeError) as err: 

1419 _catch_deprecated_value_error(err) 

1420 

1421 if is_interval_dtype(self.dtype): 

1422 # see TestSetitemFloatIntervalWithIntIntervalValues 

1423 nb = self.coerce_to_target_dtype(orig_value) 

1424 return nb.setitem(orig_indexer, orig_value) 

1425 

1426 elif isinstance(self, NDArrayBackedExtensionBlock): 

1427 nb = self.coerce_to_target_dtype(orig_value) 

1428 return nb.setitem(orig_indexer, orig_value) 

1429 

1430 else: 

1431 raise 

1432 

1433 else: 

1434 return self 

1435 

1436 def where(self, other, cond, _downcast="infer") -> list[Block]: 

1437 # _downcast private bc we only specify it when calling from fillna 

1438 arr = self.values.T 

1439 

1440 cond = extract_bool_array(cond) 

1441 

1442 orig_other = other 

1443 orig_cond = cond 

1444 other = self._maybe_squeeze_arg(other) 

1445 cond = self._maybe_squeeze_arg(cond) 

1446 

1447 if other is lib.no_default: 

1448 other = self.fill_value 

1449 

1450 icond, noop = validate_putmask(arr, ~cond) 

1451 if noop: 

1452 # GH#44181, GH#45135 

1453 # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast 

1454 return [self.copy()] 

1455 

1456 try: 

1457 res_values = arr._where(cond, other).T 

1458 except (ValueError, TypeError) as err: 

1459 _catch_deprecated_value_error(err) 

1460 

1461 if self.ndim == 1 or self.shape[0] == 1: 

1462 

1463 if is_interval_dtype(self.dtype): 

1464 # TestSetitemFloatIntervalWithIntIntervalValues 

1465 blk = self.coerce_to_target_dtype(orig_other) 

1466 nbs = blk.where(orig_other, orig_cond) 

1467 return self._maybe_downcast(nbs, downcast=_downcast) 

1468 

1469 elif isinstance(self, NDArrayBackedExtensionBlock): 

1470 # NB: not (yet) the same as 

1471 # isinstance(values, NDArrayBackedExtensionArray) 

1472 blk = self.coerce_to_target_dtype(orig_other) 

1473 nbs = blk.where(orig_other, orig_cond) 

1474 return self._maybe_downcast(nbs, downcast=_downcast) 

1475 

1476 else: 

1477 raise 

1478 

1479 else: 

1480 # Same pattern we use in Block.putmask 

1481 is_array = isinstance(orig_other, (np.ndarray, ExtensionArray)) 

1482 

1483 res_blocks = [] 

1484 nbs = self._split() 

1485 for i, nb in enumerate(nbs): 

1486 n = orig_other 

1487 if is_array: 

1488 # we have a different value per-column 

1489 n = orig_other[:, i : i + 1] 

1490 

1491 submask = orig_cond[:, i : i + 1] 

1492 rbs = nb.where(n, submask) 

1493 res_blocks.extend(rbs) 

1494 return res_blocks 

1495 

1496 nb = self.make_block_same_class(res_values) 

1497 return [nb] 

1498 

1499 def putmask(self, mask, new) -> list[Block]: 

1500 """ 

1501 See Block.putmask.__doc__ 

1502 """ 

1503 mask = extract_bool_array(mask) 

1504 

1505 values = self.values 

1506 if values.ndim == 2: 

1507 values = values.T 

1508 

1509 orig_new = new 

1510 orig_mask = mask 

1511 new = self._maybe_squeeze_arg(new) 

1512 mask = self._maybe_squeeze_arg(mask) 

1513 

1514 if not mask.any(): 

1515 return [self] 

1516 

1517 try: 

1518 # Caller is responsible for ensuring matching lengths 

1519 values._putmask(mask, new) 

1520 except (TypeError, ValueError) as err: 

1521 _catch_deprecated_value_error(err) 

1522 

1523 if self.ndim == 1 or self.shape[0] == 1: 

1524 

1525 if is_interval_dtype(self.dtype): 

1526 # Discussion about what we want to support in the general 

1527 # case GH#39584 

1528 blk = self.coerce_to_target_dtype(orig_new) 

1529 return blk.putmask(orig_mask, orig_new) 

1530 

1531 elif isinstance(self, NDArrayBackedExtensionBlock): 

1532 # NB: not (yet) the same as 

1533 # isinstance(values, NDArrayBackedExtensionArray) 

1534 blk = self.coerce_to_target_dtype(orig_new) 

1535 return blk.putmask(orig_mask, orig_new) 

1536 

1537 else: 

1538 raise 

1539 

1540 else: 

1541 # Same pattern we use in Block.putmask 

1542 is_array = isinstance(orig_new, (np.ndarray, ExtensionArray)) 

1543 

1544 res_blocks = [] 

1545 nbs = self._split() 

1546 for i, nb in enumerate(nbs): 

1547 n = orig_new 

1548 if is_array: 

1549 # we have a different value per-column 

1550 n = orig_new[:, i : i + 1] 

1551 

1552 submask = orig_mask[:, i : i + 1] 

1553 rbs = nb.putmask(submask, n) 

1554 res_blocks.extend(rbs) 

1555 return res_blocks 

1556 

1557 return [self] 

1558 

1559 def fillna( 

1560 self, value, limit: int | None = None, inplace: bool = False, downcast=None 

1561 ) -> list[Block]: 

1562 # Caller is responsible for validating limit; if int it is strictly positive 

1563 

1564 if self.dtype.kind == "m": 

1565 try: 

1566 res_values = self.values.fillna(value, limit=limit) 

1567 except (ValueError, TypeError): 

1568 # GH#45746 

1569 warnings.warn( 

1570 "The behavior of fillna with timedelta64[ns] dtype and " 

1571 f"an incompatible value ({type(value)}) is deprecated. " 

1572 "In a future version, this will cast to a common dtype " 

1573 "(usually object) instead of raising, matching the " 

1574 "behavior of other dtypes.", 

1575 FutureWarning, 

1576 stacklevel=find_stack_level(), 

1577 ) 

1578 raise 

1579 else: 

1580 res_blk = self.make_block(res_values) 

1581 return [res_blk] 

1582 

1583 # TODO: since this now dispatches to super, which in turn dispatches 

1584 # to putmask, it may *actually* respect 'inplace=True'. If so, add 

1585 # tests for this. 

1586 return super().fillna(value, limit=limit, inplace=inplace, downcast=downcast) 

1587 

1588 def delete(self, loc) -> Block: 

1589 # This will be unnecessary if/when __array_function__ is implemented 

1590 values = self.values.delete(loc) 

1591 mgr_locs = self._mgr_locs.delete(loc) 

1592 return type(self)(values, placement=mgr_locs, ndim=self.ndim) 

1593 

1594 @cache_readonly 

1595 def array_values(self) -> ExtensionArray: 

1596 return self.values 

1597 

1598 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: 

1599 """ 

1600 return object dtype as boxed values, such as Timestamps/Timedelta 

1601 """ 

1602 values: ArrayLike = self.values 

1603 if dtype == _dtype_obj: 

1604 values = values.astype(object) 

1605 # TODO(EA2D): reshape not needed with 2D EAs 

1606 return np.asarray(values).reshape(self.shape) 

1607 

1608 def values_for_json(self) -> np.ndarray: 

1609 return np.asarray(self.values) 

1610 

1611 def interpolate( 

1612 self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs 

1613 ): 

1614 values = self.values 

1615 if values.ndim == 2 and axis == 0: 

1616 # NDArrayBackedExtensionArray.fillna assumes axis=1 

1617 new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T 

1618 else: 

1619 new_values = values.fillna(value=fill_value, method=method, limit=limit) 

1620 return self.make_block_same_class(new_values) 

1621 

1622 

1623class ExtensionBlock(libinternals.Block, EABackedBlock): 

1624 """ 

1625 Block for holding extension types. 

1626 

1627 Notes 

1628 ----- 

1629 This holds all 3rd-party extension array types. It's also the immediate 

1630 parent class for our internal extension types' blocks, CategoricalBlock. 

1631 

1632 ExtensionArrays are limited to 1-D. 

1633 """ 

1634 

1635 _can_consolidate = False 

1636 _validate_ndim = False 

1637 is_extension = True 

1638 

1639 values: ExtensionArray 

1640 

1641 @cache_readonly 

1642 def shape(self) -> Shape: 

1643 # TODO(EA2D): override unnecessary with 2D EAs 

1644 if self.ndim == 1: 

1645 return (len(self.values),) 

1646 return len(self._mgr_locs), len(self.values) 

1647 

1648 def iget(self, i: int | tuple[int, int] | tuple[slice, int]): 

1649 # In the case where we have a tuple[slice, int], the slice will always 

1650 # be slice(None) 

1651 # We _could_ make the annotation more specific, but mypy would 

1652 # complain about override mismatch: 

1653 # Literal[0] | tuple[Literal[0], int] | tuple[slice, int] 

1654 

1655 # Note: only reached with self.ndim == 2 

1656 

1657 if isinstance(i, tuple): 

1658 # TODO(EA2D): unnecessary with 2D EAs 

1659 col, loc = i 

1660 if not com.is_null_slice(col) and col != 0: 

1661 raise IndexError(f"{self} only contains one item") 

1662 elif isinstance(col, slice): 

1663 # the is_null_slice check above assures that col is slice(None) 

1664 # so what we want is a view on all our columns and row loc 

1665 if loc < 0: 

1666 loc += len(self.values) 

1667 # Note: loc:loc+1 vs [[loc]] makes a difference when called 

1668 # from fast_xs because we want to get a view back. 

1669 return self.values[loc : loc + 1] 

1670 return self.values[loc] 

1671 else: 

1672 if i != 0: 

1673 raise IndexError(f"{self} only contains one item") 

1674 return self.values 

1675 

1676 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None: 

1677 # When an ndarray, we should have locs.tolist() == [0] 

1678 # When a BlockPlacement we should have list(locs) == [0] 

1679 if copy: 

1680 self.values = self.values.copy() 

1681 self.values[:] = values 

1682 

1683 def _maybe_squeeze_arg(self, arg): 

1684 """ 

1685 If necessary, squeeze a (N, 1) ndarray to (N,) 

1686 """ 

1687 # e.g. if we are passed a 2D mask for putmask 

1688 if ( 

1689 isinstance(arg, (np.ndarray, ExtensionArray)) 

1690 and arg.ndim == self.values.ndim + 1 

1691 ): 

1692 # TODO(EA2D): unnecessary with 2D EAs 

1693 assert arg.shape[1] == 1 

1694 # error: No overload variant of "__getitem__" of "ExtensionArray" 

1695 # matches argument type "Tuple[slice, int]" 

1696 arg = arg[:, 0] # type: ignore[call-overload] 

1697 elif isinstance(arg, ABCDataFrame): 

1698 # 2022-01-06 only reached for setitem 

1699 # TODO: should we avoid getting here with DataFrame? 

1700 assert arg.shape[1] == 1 

1701 arg = arg._ixs(0, axis=1)._values 

1702 

1703 return arg 

1704 

1705 def _unwrap_setitem_indexer(self, indexer): 

1706 """ 

1707 Adapt a 2D-indexer to our 1D values. 

1708 

1709 This is intended for 'setitem', not 'iget' or '_slice'. 

1710 """ 

1711 # TODO: ATM this doesn't work for iget/_slice, can we change that? 

1712 

1713 if isinstance(indexer, tuple): 

1714 # TODO(EA2D): not needed with 2D EAs 

1715 # Should never have length > 2. Caller is responsible for checking. 

1716 # Length 1 is reached vis setitem_single_block and setitem_single_column 

1717 # each of which pass indexer=(pi,) 

1718 if len(indexer) == 2: 

1719 

1720 if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer): 

1721 # GH#44703 went through indexing.maybe_convert_ix 

1722 first, second = indexer 

1723 if not ( 

1724 second.size == 1 and (second == 0).all() and first.shape[1] == 1 

1725 ): 

1726 raise NotImplementedError( 

1727 "This should not be reached. Please report a bug at " 

1728 "github.com/pandas-dev/pandas/" 

1729 ) 

1730 indexer = first[:, 0] 

1731 

1732 elif lib.is_integer(indexer[1]) and indexer[1] == 0: 

1733 # reached via setitem_single_block passing the whole indexer 

1734 indexer = indexer[0] 

1735 

1736 elif com.is_null_slice(indexer[1]): 

1737 indexer = indexer[0] 

1738 

1739 elif is_list_like(indexer[1]) and indexer[1][0] == 0: 

1740 indexer = indexer[0] 

1741 

1742 else: 

1743 raise NotImplementedError( 

1744 "This should not be reached. Please report a bug at " 

1745 "github.com/pandas-dev/pandas/" 

1746 ) 

1747 return indexer 

1748 

1749 @property 

1750 def is_view(self) -> bool: 

1751 """Extension arrays are never treated as views.""" 

1752 return False 

1753 

1754 @cache_readonly 

1755 def is_numeric(self): 

1756 return self.values.dtype._is_numeric 

1757 

1758 def take_nd( 

1759 self, 

1760 indexer: npt.NDArray[np.intp], 

1761 axis: int = 0, 

1762 new_mgr_locs: BlockPlacement | None = None, 

1763 fill_value=lib.no_default, 

1764 ) -> Block: 

1765 """ 

1766 Take values according to indexer and return them as a block. 

1767 """ 

1768 if fill_value is lib.no_default: 

1769 fill_value = None 

1770 

1771 # TODO(EA2D): special case not needed with 2D EAs 

1772 # axis doesn't matter; we are really a single-dim object 

1773 # but are passed the axis depending on the calling routing 

1774 # if its REALLY axis 0, then this will be a reindex and not a take 

1775 new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True) 

1776 

1777 # Called from three places in managers, all of which satisfy 

1778 # this assertion 

1779 assert not (self.ndim == 1 and new_mgr_locs is None) 

1780 if new_mgr_locs is None: 

1781 new_mgr_locs = self._mgr_locs 

1782 

1783 return self.make_block_same_class(new_values, new_mgr_locs) 

1784 

1785 def _slice( 

1786 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp] 

1787 ) -> ExtensionArray: 

1788 """ 

1789 Return a slice of my values. 

1790 

1791 Parameters 

1792 ---------- 

1793 slicer : slice, ndarray[int], or ndarray[bool] 

1794 Valid (non-reducing) indexer for self.values. 

1795 

1796 Returns 

1797 ------- 

1798 ExtensionArray 

1799 """ 

1800 # Notes: ndarray[bool] is only reachable when via getitem_mgr, which 

1801 # is only for Series, i.e. self.ndim == 1. 

1802 

1803 # return same dims as we currently have 

1804 if self.ndim == 2: 

1805 # reached via getitem_block via _slice_take_blocks_ax0 

1806 # TODO(EA2D): won't be necessary with 2D EAs 

1807 

1808 if not isinstance(slicer, slice): 

1809 raise AssertionError( 

1810 "invalid slicing for a 1-ndim ExtensionArray", slicer 

1811 ) 

1812 # GH#32959 only full-slicers along fake-dim0 are valid 

1813 # TODO(EA2D): won't be necessary with 2D EAs 

1814 # range(1) instead of self._mgr_locs to avoid exception on [::-1] 

1815 # see test_iloc_getitem_slice_negative_step_ea_block 

1816 new_locs = range(1)[slicer] 

1817 if not len(new_locs): 

1818 raise AssertionError( 

1819 "invalid slicing for a 1-ndim ExtensionArray", slicer 

1820 ) 

1821 slicer = slice(None) 

1822 

1823 return self.values[slicer] 

1824 

1825 @final 

1826 def getitem_block_index(self, slicer: slice) -> ExtensionBlock: 

1827 """ 

1828 Perform __getitem__-like specialized to slicing along index. 

1829 """ 

1830 # GH#42787 in principle this is equivalent to values[..., slicer], but we don't 

1831 # require subclasses of ExtensionArray to support that form (for now). 

1832 new_values = self.values[slicer] 

1833 return type(self)(new_values, self._mgr_locs, ndim=self.ndim) 

1834 

1835 def diff(self, n: int, axis: int = 1) -> list[Block]: 

1836 if axis == 0 and n != 0: 

1837 # n==0 case will be a no-op so let is fall through 

1838 # Since we only have one column, the result will be all-NA. 

1839 # Create this result by shifting along axis=0 past the length of 

1840 # our values. 

1841 return super().diff(len(self.values), axis=0) 

1842 if axis == 1: 

1843 # TODO(EA2D): unnecessary with 2D EAs 

1844 # we are by definition 1D. 

1845 axis = 0 

1846 return super().diff(n, axis) 

1847 

1848 def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: 

1849 """ 

1850 Shift the block by `periods`. 

1851 

1852 Dispatches to underlying ExtensionArray and re-boxes in an 

1853 ExtensionBlock. 

1854 """ 

1855 new_values = self.values.shift(periods=periods, fill_value=fill_value) 

1856 return [self.make_block_same_class(new_values)] 

1857 

1858 def _unstack( 

1859 self, 

1860 unstacker, 

1861 fill_value, 

1862 new_placement: npt.NDArray[np.intp], 

1863 needs_masking: npt.NDArray[np.bool_], 

1864 ): 

1865 # ExtensionArray-safe unstack. 

1866 # We override ObjectBlock._unstack, which unstacks directly on the 

1867 # values of the array. For EA-backed blocks, this would require 

1868 # converting to a 2-D ndarray of objects. 

1869 # Instead, we unstack an ndarray of integer positions, followed by 

1870 # a `take` on the actual values. 

1871 

1872 # Caller is responsible for ensuring self.shape[-1] == len(unstacker.index) 

1873 new_values, mask = unstacker.arange_result 

1874 

1875 # Note: these next two lines ensure that 

1876 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks) 

1877 # which the calling function needs in order to pass verify_integrity=False 

1878 # to the BlockManager constructor 

1879 new_values = new_values.T[mask] 

1880 new_placement = new_placement[mask] 

1881 

1882 # needs_masking[i] calculated once in BlockManager.unstack tells 

1883 # us if there are any -1s in the relevant indices. When False, 

1884 # that allows us to go through a faster path in 'take', among 

1885 # other things avoiding e.g. Categorical._validate_scalar. 

1886 blocks = [ 

1887 # TODO: could cast to object depending on fill_value? 

1888 type(self)( 

1889 self.values.take( 

1890 indices, allow_fill=needs_masking[i], fill_value=fill_value 

1891 ), 

1892 BlockPlacement(place), 

1893 ndim=2, 

1894 ) 

1895 for i, (indices, place) in enumerate(zip(new_values, new_placement)) 

1896 ] 

1897 return blocks, mask 

1898 

1899 

1900class NumpyBlock(libinternals.NumpyBlock, Block): 

1901 values: np.ndarray 

1902 

1903 @property 

1904 def is_view(self) -> bool: 

1905 """return a boolean if I am possibly a view""" 

1906 return self.values.base is not None 

1907 

1908 @property 

1909 def array_values(self) -> ExtensionArray: 

1910 return PandasArray(self.values) 

1911 

1912 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: 

1913 if dtype == _dtype_obj: 

1914 return self.values.astype(_dtype_obj) 

1915 return self.values 

1916 

1917 def values_for_json(self) -> np.ndarray: 

1918 return self.values 

1919 

1920 def delete(self, loc) -> Block: 

1921 values = np.delete(self.values, loc, 0) 

1922 mgr_locs = self._mgr_locs.delete(loc) 

1923 return type(self)(values, placement=mgr_locs, ndim=self.ndim) 

1924 

1925 

1926class NumericBlock(NumpyBlock): 

1927 __slots__ = () 

1928 is_numeric = True 

1929 

1930 

1931class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock): 

1932 """ 

1933 Block backed by an NDArrayBackedExtensionArray 

1934 """ 

1935 

1936 values: NDArrayBackedExtensionArray 

1937 

1938 # error: Signature of "is_extension" incompatible with supertype "Block" 

1939 @cache_readonly 

1940 def is_extension(self) -> bool: # type: ignore[override] 

1941 # i.e. datetime64tz, PeriodDtype 

1942 return not isinstance(self.dtype, np.dtype) 

1943 

1944 @property 

1945 def is_view(self) -> bool: 

1946 """return a boolean if I am possibly a view""" 

1947 # check the ndarray values of the DatetimeIndex values 

1948 return self.values._ndarray.base is not None 

1949 

1950 def diff(self, n: int, axis: int = 0) -> list[Block]: 

1951 """ 

1952 1st discrete difference. 

1953 

1954 Parameters 

1955 ---------- 

1956 n : int 

1957 Number of periods to diff. 

1958 axis : int, default 0 

1959 Axis to diff upon. 

1960 

1961 Returns 

1962 ------- 

1963 A list with a new Block. 

1964 

1965 Notes 

1966 ----- 

1967 The arguments here are mimicking shift so they are called correctly 

1968 by apply. 

1969 """ 

1970 values = self.values 

1971 

1972 new_values = values - values.shift(n, axis=axis) 

1973 return [self.make_block(new_values)] 

1974 

1975 def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: 

1976 values = self.values 

1977 new_values = values.shift(periods, fill_value=fill_value, axis=axis) 

1978 return [self.make_block_same_class(new_values)] 

1979 

1980 

1981def _catch_deprecated_value_error(err: Exception) -> None: 

1982 """ 

1983 We catch ValueError for now, but only a specific one raised by DatetimeArray 

1984 which will no longer be raised in version.2.0. 

1985 """ 

1986 if isinstance(err, ValueError): 

1987 # TODO(2.0): once DTA._validate_setitem_value deprecation 

1988 # is enforced, stop catching ValueError here altogether 

1989 if isinstance(err, IncompatibleFrequency): 

1990 pass 

1991 elif "'value.closed' is" in str(err): 

1992 # IntervalDtype mismatched 'closed' 

1993 pass 

1994 elif "Timezones don't match" not in str(err): 

1995 raise 

1996 

1997 

1998class DatetimeLikeBlock(NDArrayBackedExtensionBlock): 

1999 """Block for datetime64[ns], timedelta64[ns].""" 

2000 

2001 __slots__ = () 

2002 is_numeric = False 

2003 values: DatetimeArray | TimedeltaArray 

2004 

2005 def values_for_json(self) -> np.ndarray: 

2006 return self.values._ndarray 

2007 

2008 

2009class DatetimeTZBlock(DatetimeLikeBlock): 

2010 """implement a datetime64 block with a tz attribute""" 

2011 

2012 values: DatetimeArray 

2013 

2014 __slots__ = () 

2015 is_extension = True 

2016 _validate_ndim = True 

2017 _can_consolidate = False 

2018 

2019 # Don't use values_for_json from DatetimeLikeBlock since it is 

2020 # an invalid optimization here(drop the tz) 

2021 values_for_json = NDArrayBackedExtensionBlock.values_for_json 

2022 

2023 

2024class ObjectBlock(NumpyBlock): 

2025 __slots__ = () 

2026 is_object = True 

2027 

2028 @maybe_split 

2029 def reduce(self, func, ignore_failures: bool = False) -> list[Block]: 

2030 """ 

2031 For object-dtype, we operate column-wise. 

2032 """ 

2033 assert self.ndim == 2 

2034 

2035 try: 

2036 res = func(self.values) 

2037 except TypeError: 

2038 if not ignore_failures: 

2039 raise 

2040 return [] 

2041 

2042 assert isinstance(res, np.ndarray) 

2043 assert res.ndim == 1 

2044 res = res.reshape(1, -1) 

2045 return [self.make_block_same_class(res)] 

2046 

2047 @maybe_split 

2048 def convert( 

2049 self, 

2050 copy: bool = True, 

2051 datetime: bool = True, 

2052 numeric: bool = True, 

2053 timedelta: bool = True, 

2054 ) -> list[Block]: 

2055 """ 

2056 attempt to cast any object types to better types return a copy of 

2057 the block (if copy = True) by definition we ARE an ObjectBlock!!!!! 

2058 """ 

2059 values = self.values 

2060 if values.ndim == 2: 

2061 # maybe_split ensures we only get here with values.shape[0] == 1, 

2062 # avoid doing .ravel as that might make a copy 

2063 values = values[0] 

2064 

2065 res_values = soft_convert_objects( 

2066 values, 

2067 datetime=datetime, 

2068 numeric=numeric, 

2069 timedelta=timedelta, 

2070 copy=copy, 

2071 ) 

2072 res_values = ensure_block_shape(res_values, self.ndim) 

2073 return [self.make_block(res_values)] 

2074 

2075 

2076class CategoricalBlock(ExtensionBlock): 

2077 # this Block type is kept for backwards-compatibility 

2078 __slots__ = () 

2079 

2080 # GH#43232, GH#43334 self.values.dtype can be changed inplace until 2.0, 

2081 # so this cannot be cached 

2082 @property 

2083 def dtype(self) -> DtypeObj: 

2084 return self.values.dtype 

2085 

2086 

2087# ----------------------------------------------------------------- 

2088# Constructor Helpers 

2089 

2090 

2091def maybe_coerce_values(values: ArrayLike) -> ArrayLike: 

2092 """ 

2093 Input validation for values passed to __init__. Ensure that 

2094 any datetime64/timedelta64 dtypes are in nanoseconds. Ensure 

2095 that we do not have string dtypes. 

2096 

2097 Parameters 

2098 ---------- 

2099 values : np.ndarray or ExtensionArray 

2100 

2101 Returns 

2102 ------- 

2103 values : np.ndarray or ExtensionArray 

2104 """ 

2105 # Caller is responsible for ensuring PandasArray is already extracted. 

2106 

2107 if isinstance(values, np.ndarray): 

2108 values = ensure_wrapped_if_datetimelike(values) 

2109 

2110 if issubclass(values.dtype.type, str): 

2111 values = np.array(values, dtype=object) 

2112 

2113 if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None: 

2114 # freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame 

2115 values = values._with_freq(None) 

2116 

2117 return values 

2118 

2119 

2120def get_block_type(dtype: DtypeObj): 

2121 """ 

2122 Find the appropriate Block subclass to use for the given values and dtype. 

2123 

2124 Parameters 

2125 ---------- 

2126 dtype : numpy or pandas dtype 

2127 

2128 Returns 

2129 ------- 

2130 cls : class, subclass of Block 

2131 """ 

2132 # We use vtype and kind checks because they are much more performant 

2133 # than is_foo_dtype 

2134 vtype = dtype.type 

2135 kind = dtype.kind 

2136 

2137 cls: type[Block] 

2138 

2139 if isinstance(dtype, SparseDtype): 

2140 # Need this first(ish) so that Sparse[datetime] is sparse 

2141 cls = ExtensionBlock 

2142 elif isinstance(dtype, CategoricalDtype): 

2143 cls = CategoricalBlock 

2144 elif vtype is Timestamp: 

2145 cls = DatetimeTZBlock 

2146 elif isinstance(dtype, PeriodDtype): 

2147 cls = NDArrayBackedExtensionBlock 

2148 elif isinstance(dtype, ExtensionDtype): 

2149 # Note: need to be sure PandasArray is unwrapped before we get here 

2150 cls = ExtensionBlock 

2151 

2152 elif kind in ["M", "m"]: 

2153 cls = DatetimeLikeBlock 

2154 elif kind in ["f", "c", "i", "u", "b"]: 

2155 cls = NumericBlock 

2156 else: 

2157 cls = ObjectBlock 

2158 return cls 

2159 

2160 

2161def new_block_2d(values: ArrayLike, placement: BlockPlacement): 

2162 # new_block specialized to case with 

2163 # ndim=2 

2164 # isinstance(placement, BlockPlacement) 

2165 # check_ndim/ensure_block_shape already checked 

2166 klass = get_block_type(values.dtype) 

2167 

2168 values = maybe_coerce_values(values) 

2169 return klass(values, ndim=2, placement=placement) 

2170 

2171 

2172def new_block(values, placement, *, ndim: int) -> Block: 

2173 # caller is responsible for ensuring values is NOT a PandasArray 

2174 

2175 if not isinstance(placement, BlockPlacement): 

2176 placement = BlockPlacement(placement) 

2177 

2178 check_ndim(values, placement, ndim) 

2179 

2180 klass = get_block_type(values.dtype) 

2181 

2182 values = maybe_coerce_values(values) 

2183 return klass(values, ndim=ndim, placement=placement) 

2184 

2185 

2186def check_ndim(values, placement: BlockPlacement, ndim: int) -> None: 

2187 """ 

2188 ndim inference and validation. 

2189 

2190 Validates that values.ndim and ndim are consistent. 

2191 Validates that len(values) and len(placement) are consistent. 

2192 

2193 Parameters 

2194 ---------- 

2195 values : array-like 

2196 placement : BlockPlacement 

2197 ndim : int 

2198 

2199 Raises 

2200 ------ 

2201 ValueError : the number of dimensions do not match 

2202 """ 

2203 

2204 if values.ndim > ndim: 

2205 # Check for both np.ndarray and ExtensionArray 

2206 raise ValueError( 

2207 "Wrong number of dimensions. " 

2208 f"values.ndim > ndim [{values.ndim} > {ndim}]" 

2209 ) 

2210 

2211 elif not is_1d_only_ea_dtype(values.dtype): 

2212 # TODO(EA2D): special case not needed with 2D EAs 

2213 if values.ndim != ndim: 

2214 raise ValueError( 

2215 "Wrong number of dimensions. " 

2216 f"values.ndim != ndim [{values.ndim} != {ndim}]" 

2217 ) 

2218 if len(placement) != len(values): 

2219 raise ValueError( 

2220 f"Wrong number of items passed {len(values)}, " 

2221 f"placement implies {len(placement)}" 

2222 ) 

2223 elif ndim == 2 and len(placement) != 1: 

2224 # TODO(EA2D): special case unnecessary with 2D EAs 

2225 raise ValueError("need to split") 

2226 

2227 

2228def extract_pandas_array( 

2229 values: np.ndarray | ExtensionArray, dtype: DtypeObj | None, ndim: int 

2230) -> tuple[np.ndarray | ExtensionArray, DtypeObj | None]: 

2231 """ 

2232 Ensure that we don't allow PandasArray / PandasDtype in internals. 

2233 """ 

2234 # For now, blocks should be backed by ndarrays when possible. 

2235 if isinstance(values, ABCPandasArray): 

2236 values = values.to_numpy() 

2237 if ndim and ndim > 1: 

2238 # TODO(EA2D): special case not needed with 2D EAs 

2239 values = np.atleast_2d(values) 

2240 

2241 if isinstance(dtype, PandasDtype): 

2242 dtype = dtype.numpy_dtype 

2243 

2244 return values, dtype 

2245 

2246 

2247# ----------------------------------------------------------------- 

2248 

2249 

2250def extend_blocks(result, blocks=None) -> list[Block]: 

2251 """return a new extended blocks, given the result""" 

2252 if blocks is None: 

2253 blocks = [] 

2254 if isinstance(result, list): 

2255 for r in result: 

2256 if isinstance(r, list): 

2257 blocks.extend(r) 

2258 else: 

2259 blocks.append(r) 

2260 else: 

2261 assert isinstance(result, Block), type(result) 

2262 blocks.append(result) 

2263 return blocks 

2264 

2265 

2266def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: 

2267 """ 

2268 Reshape if possible to have values.ndim == ndim. 

2269 """ 

2270 

2271 if values.ndim < ndim: 

2272 if not is_1d_only_ea_dtype(values.dtype): 

2273 # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023 

2274 # block.shape is incorrect for "2D" ExtensionArrays 

2275 # We can't, and don't need to, reshape. 

2276 values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values) 

2277 values = values.reshape(1, -1) 

2278 

2279 return values 

2280 

2281 

2282def to_native_types( 

2283 values: ArrayLike, 

2284 *, 

2285 na_rep="nan", 

2286 quoting=None, 

2287 float_format=None, 

2288 decimal=".", 

2289 **kwargs, 

2290) -> np.ndarray: 

2291 """convert to our native types format""" 

2292 if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm": 

2293 # GH#40754 Convert categorical datetimes to datetime array 

2294 values = algos.take_nd( 

2295 values.categories._values, 

2296 ensure_platform_int(values._codes), 

2297 fill_value=na_rep, 

2298 ) 

2299 

2300 values = ensure_wrapped_if_datetimelike(values) 

2301 

2302 if isinstance(values, (DatetimeArray, TimedeltaArray)): 

2303 if values.ndim == 1: 

2304 result = values._format_native_types(na_rep=na_rep, **kwargs) 

2305 result = result.astype(object, copy=False) 

2306 return result 

2307 

2308 # GH#21734 Process every column separately, they might have different formats 

2309 results_converted = [] 

2310 for i in range(len(values)): 

2311 result = values[i, :]._format_native_types(na_rep=na_rep, **kwargs) 

2312 results_converted.append(result.astype(object, copy=False)) 

2313 return np.vstack(results_converted) 

2314 

2315 elif values.dtype.kind == "f" and not is_sparse(values): 

2316 # see GH#13418: no special formatting is desired at the 

2317 # output (important for appropriate 'quoting' behaviour), 

2318 # so do not pass it through the FloatArrayFormatter 

2319 if float_format is None and decimal == ".": 

2320 mask = isna(values) 

2321 

2322 if not quoting: 

2323 values = values.astype(str) 

2324 else: 

2325 values = np.array(values, dtype="object") 

2326 

2327 values[mask] = na_rep 

2328 values = values.astype(object, copy=False) 

2329 return values 

2330 

2331 from pandas.io.formats.format import FloatArrayFormatter 

2332 

2333 formatter = FloatArrayFormatter( 

2334 values, 

2335 na_rep=na_rep, 

2336 float_format=float_format, 

2337 decimal=decimal, 

2338 quoting=quoting, 

2339 fixed_width=False, 

2340 ) 

2341 res = formatter.get_result_as_array() 

2342 res = res.astype(object, copy=False) 

2343 return res 

2344 

2345 elif isinstance(values, ExtensionArray): 

2346 mask = isna(values) 

2347 

2348 new_values = np.asarray(values.astype(object)) 

2349 new_values[mask] = na_rep 

2350 return new_values 

2351 

2352 else: 

2353 

2354 mask = isna(values) 

2355 itemsize = writers.word_len(na_rep) 

2356 

2357 if values.dtype != _dtype_obj and not quoting and itemsize: 

2358 values = values.astype(str) 

2359 if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: 

2360 # enlarge for the na_rep 

2361 values = values.astype(f"<U{itemsize}") 

2362 else: 

2363 values = np.array(values, dtype="object") 

2364 

2365 values[mask] = na_rep 

2366 values = values.astype(object, copy=False) 

2367 return values 

2368 

2369 

2370def external_values(values: ArrayLike) -> ArrayLike: 

2371 """ 

2372 The array that Series.values returns (public attribute). 

2373 

2374 This has some historical constraints, and is overridden in block 

2375 subclasses to return the correct array (e.g. period returns 

2376 object ndarray and datetimetz a datetime64[ns] ndarray instead of 

2377 proper extension array). 

2378 """ 

2379 if isinstance(values, (PeriodArray, IntervalArray)): 

2380 return values.astype(object) 

2381 elif isinstance(values, (DatetimeArray, TimedeltaArray)): 

2382 # NB: for datetime64tz this is different from np.asarray(values), since 

2383 # that returns an object-dtype ndarray of Timestamps. 

2384 # Avoid FutureWarning in .astype in casting from dt64tz to dt64 

2385 return values._data 

2386 else: 

2387 return values