Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/internals/blocks.py: 19%

1from __future__ import annotations

3from functools import wraps

4import re

5from typing import (

6 TYPE_CHECKING,

7 Any,

8 Callable,

9 Iterable,

10 Sequence,

11 cast,

12 final,

13)

14import warnings

16import numpy as np

18from pandas._libs import (

19 Timestamp,

20 internals as libinternals,

21 lib,

22 writers,

23)

24from pandas._libs.internals import BlockPlacement

25from pandas._libs.tslibs import IncompatibleFrequency

26from pandas._typing import (

27 ArrayLike,

28 DtypeObj,

29 F,

30 IgnoreRaise,

31 Shape,

32 npt,

33)

34from pandas.errors import AbstractMethodError

35from pandas.util._decorators import cache_readonly

36from pandas.util._exceptions import find_stack_level

37from pandas.util._validators import validate_bool_kwarg

39from pandas.core.dtypes.astype import astype_array_safe

40from pandas.core.dtypes.cast import (

41 LossySetitemError,

42 can_hold_element,

43 find_result_type,

44 maybe_downcast_to_dtype,

45 np_can_hold_element,

46 soft_convert_objects,

47)

48from pandas.core.dtypes.common import (

49 ensure_platform_int,

50 is_1d_only_ea_dtype,

51 is_1d_only_ea_obj,

52 is_dtype_equal,

53 is_interval_dtype,

54 is_list_like,

55 is_sparse,

56 is_string_dtype,

57)

58from pandas.core.dtypes.dtypes import (

59 CategoricalDtype,

60 ExtensionDtype,

61 PandasDtype,

62 PeriodDtype,

63)

64from pandas.core.dtypes.generic import (

65 ABCDataFrame,

66 ABCIndex,

67 ABCPandasArray,

68 ABCSeries,

69)

70from pandas.core.dtypes.inference import is_inferred_bool_dtype

71from pandas.core.dtypes.missing import (

72 is_valid_na_for_dtype,

73 isna,

74 na_value_for_dtype,

75)

77import pandas.core.algorithms as algos

78from pandas.core.array_algos.putmask import (

79 extract_bool_array,

80 putmask_inplace,

81 putmask_without_repeat,

82 setitem_datetimelike_compat,

83 validate_putmask,

84)

85from pandas.core.array_algos.quantile import quantile_compat

86from pandas.core.array_algos.replace import (

87 compare_or_regex_search,

88 replace_regex,

89 should_use_regex,

90)

91from pandas.core.array_algos.transforms import shift

92from pandas.core.arrays import (

93 Categorical,

94 DatetimeArray,

95 ExtensionArray,

96 IntervalArray,

97 PandasArray,

98 PeriodArray,

99 TimedeltaArray,

100)

101from pandas.core.arrays.sparse import SparseDtype

102from pandas.core.base import PandasObject

103import pandas.core.common as com

104import pandas.core.computation.expressions as expressions

105from pandas.core.construction import (

106 ensure_wrapped_if_datetimelike,

107 extract_array,

108)

109from pandas.core.indexers import check_setitem_lengths

110import pandas.core.missing as missing

111

112if TYPE_CHECKING: 112 ↛ 113line 112 didn't jump to line 113, because the condition on line 112 was never true

113 from pandas import (

114 Float64Index,

115 Index,

116 )

117 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray

118

119# comparison is faster than is_object_dtype

120_dtype_obj = np.dtype("object")

121

122

123def maybe_split(meth: F) -> F:

124 """

125 If we have a multi-column block, split and operate block-wise. Otherwise

126 use the original method.

127 """

128

129 @wraps(meth)

130 def newfunc(self, *args, **kwargs) -> list[Block]:

131

132 if self.ndim == 1 or self.shape[0] == 1:

133 return meth(self, *args, **kwargs)

134 else:

135 # Split and operate column-by-column

136 return self.split_and_operate(meth, *args, **kwargs)

137

138 return cast(F, newfunc)

139

140

141class Block(PandasObject):

142 """

143 Canonical n-dimensional unit of homogeneous dtype contained in a pandas

144 data structure

145

146 Index-ignorant; let the container take care of that

147 """

148

149 values: np.ndarray | ExtensionArray

150 ndim: int

151 __init__: Callable

152

153 __slots__ = ()

154 is_numeric = False

155 is_object = False

156 is_extension = False

157 _can_consolidate = True

158 _validate_ndim = True

159

160 @final

161 @cache_readonly

162 def _consolidate_key(self):

163 return self._can_consolidate, self.dtype.name

164

165 @final

166 @cache_readonly

167 def _can_hold_na(self) -> bool:

168 """

169 Can we store NA values in this Block?

170 """

171 dtype = self.dtype

172 if isinstance(dtype, np.dtype):

173 return dtype.kind not in ["b", "i", "u"]

174 return dtype._can_hold_na

175

176 @final

177 @cache_readonly

178 def is_categorical(self) -> bool:

179 warnings.warn(

180 "Block.is_categorical is deprecated and will be removed in a "

181 "future version. Use isinstance(block.values, Categorical) "

182 "instead. See https://github.com/pandas-dev/pandas/issues/40226",

183 DeprecationWarning,

184 stacklevel=find_stack_level(),

185 )

186 return isinstance(self.values, Categorical)

187

188 @final

189 @property

190 def is_bool(self) -> bool:

191 """

192 We can be bool if a) we are bool dtype or b) object dtype with bool objects.

193 """

194 return is_inferred_bool_dtype(self.values)

195

196 @final

197 def external_values(self):

198 return external_values(self.values)

199

200 @final

201 @cache_readonly

202 def fill_value(self):

203 # Used in reindex_indexer

204 return na_value_for_dtype(self.dtype, compat=False)

205

206 @final

207 def _standardize_fill_value(self, value):

208 # if we are passed a scalar None, convert it here

209 if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype):

210 value = self.fill_value

211 return value

212

213 @property

214 def mgr_locs(self) -> BlockPlacement:

215 return self._mgr_locs

216

217 @mgr_locs.setter

218 def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None:

219 self._mgr_locs = new_mgr_locs

220

221 @final

222 def make_block(self, values, placement=None) -> Block:

223 """

224 Create a new block, with type inference propagate any values that are

225 not specified

226 """

227 if placement is None:

228 placement = self._mgr_locs

229 if self.is_extension:

230 values = ensure_block_shape(values, ndim=self.ndim)

231

232 # TODO: perf by not going through new_block

233 # We assume maybe_coerce_values has already been called

234 return new_block(values, placement=placement, ndim=self.ndim)

235

236 @final

237 def make_block_same_class(

238 self, values, placement: BlockPlacement | None = None

239 ) -> Block:

240 """Wrap given values in a block of same type as self."""

241 if placement is None:

242 placement = self._mgr_locs

243

244 if values.dtype.kind in ["m", "M"]:

245

246 new_values = ensure_wrapped_if_datetimelike(values)

247 if new_values is not values:

248 # TODO(2.0): remove once fastparquet has stopped relying on it

249 warnings.warn(

250 "In a future version, Block.make_block_same_class will "

251 "assume that datetime64 and timedelta64 ndarrays have "

252 "already been cast to DatetimeArray and TimedeltaArray, "

253 "respectively.",

254 DeprecationWarning,

255 stacklevel=find_stack_level(),

256 )

257 values = new_values

258

259 # We assume maybe_coerce_values has already been called

260 return type(self)(values, placement=placement, ndim=self.ndim)

261

262 @final

263 def __repr__(self) -> str:

264 # don't want to print out all of the items here

265 name = type(self).__name__

266 if self.ndim == 1:

267 result = f"{name}: {len(self)} dtype: {self.dtype}"

268 else:

269

270 shape = " x ".join([str(s) for s in self.shape])

271 result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}"

272

273 return result

274

275 @final

276 def __len__(self) -> int:

277 return len(self.values)

278

279 @final

280 def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:

281 """

282 Perform __getitem__-like, return result as block.

283

284 Only supports slices that preserve dimensionality.

285 """

286 # Note: the only place where we are called with ndarray[intp]

287 # is from internals.concat, and we can verify that never happens

288 # with 1-column blocks, i.e. never for ExtensionBlock.

289

290 # Invalid index type "Union[slice, ndarray[Any, dtype[signedinteger[Any]]]]"

291 # for "BlockPlacement"; expected type "Union[slice, Sequence[int]]"

292 new_mgr_locs = self._mgr_locs[slicer] # type: ignore[index]

293

294 new_values = self._slice(slicer)

295

296 if new_values.ndim != self.values.ndim:

297 raise ValueError("Only same dim slicing is allowed")

298

299 return type(self)(new_values, new_mgr_locs, self.ndim)

300

301 @final

302 def getitem_block_columns(

303 self, slicer: slice, new_mgr_locs: BlockPlacement

304 ) -> Block:

305 """

306 Perform __getitem__-like, return result as block.

307

308 Only supports slices that preserve dimensionality.

309 """

310 new_values = self._slice(slicer)

311

312 if new_values.ndim != self.values.ndim:

313 raise ValueError("Only same dim slicing is allowed")

314

315 return type(self)(new_values, new_mgr_locs, self.ndim)

316

317 @final

318 def _can_hold_element(self, element: Any) -> bool:

319 """require the same dtype as ourselves"""

320 element = extract_array(element, extract_numpy=True)

321 return can_hold_element(self.values, element)

322

323 @final

324 def should_store(self, value: ArrayLike) -> bool:

325 """

326 Should we set self.values[indexer] = value inplace or do we need to cast?

327

328 Parameters

329 ----------

330 value : np.ndarray or ExtensionArray

331

332 Returns

333 -------

334 bool

335 """

336 # faster equivalent to is_dtype_equal(value.dtype, self.dtype)

337 try:

338 return value.dtype == self.dtype

339 except TypeError:

340 return False

341

342 # ---------------------------------------------------------------------

343 # Apply/Reduce and Helpers

344

345 @final

346 def apply(self, func, **kwargs) -> list[Block]:

347 """

348 apply the function to my values; return a block if we are not

349 one

350 """

351 result = func(self.values, **kwargs)

352

353 return self._split_op_result(result)

354

355 def reduce(self, func, ignore_failures: bool = False) -> list[Block]:

356 # We will apply the function and reshape the result into a single-row

357 # Block with the same mgr_locs; squeezing will be done at a higher level

358 assert self.ndim == 2

359

360 try:

361 result = func(self.values)

362 except (TypeError, NotImplementedError):

363 if ignore_failures:

364 return []

365 raise

366

367 if self.values.ndim == 1:

368 # TODO(EA2D): special case not needed with 2D EAs

369 res_values = np.array([[result]])

370 else:

371 res_values = result.reshape(-1, 1)

372

373 nb = self.make_block(res_values)

374 return [nb]

375

376 @final

377 def _split_op_result(self, result: ArrayLike) -> list[Block]:

378 # See also: split_and_operate

379 if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype):

380 # TODO(EA2D): unnecessary with 2D EAs

381 # if we get a 2D ExtensionArray, we need to split it into 1D pieces

382 nbs = []

383 for i, loc in enumerate(self._mgr_locs):

384 if not is_1d_only_ea_obj(result):

385 vals = result[i : i + 1]

386 else:

387 vals = result[i]

388

389 block = self.make_block(values=vals, placement=loc)

390 nbs.append(block)

391 return nbs

392

393 nb = self.make_block(result)

394

395 return [nb]

396

397 @final

398 def _split(self) -> list[Block]:

399 """

400 Split a block into a list of single-column blocks.

401 """

402 assert self.ndim == 2

403

404 new_blocks = []

405 for i, ref_loc in enumerate(self._mgr_locs):

406 vals = self.values[slice(i, i + 1)]

407

408 bp = BlockPlacement(ref_loc)

409 nb = type(self)(vals, placement=bp, ndim=2)

410 new_blocks.append(nb)

411 return new_blocks

412

413 @final

414 def split_and_operate(self, func, *args, **kwargs) -> list[Block]:

415 """

416 Split the block and apply func column-by-column.

417

418 Parameters

419 ----------

420 func : Block method

421 *args

422 **kwargs

423

424 Returns

425 -------

426 List[Block]

427 """

428 assert self.ndim == 2 and self.shape[0] != 1

429

430 res_blocks = []

431 for nb in self._split():

432 rbs = func(nb, *args, **kwargs)

433 res_blocks.extend(rbs)

434 return res_blocks

435

436 # ---------------------------------------------------------------------

437 # Up/Down-casting

438

439 @final

440 def coerce_to_target_dtype(self, other) -> Block:

441 """

442 coerce the current block to a dtype compat for other

443 we will return a block, possibly object, and not raise

444

445 we can also safely try to coerce to the same dtype

446 and will receive the same block

447 """

448 new_dtype = find_result_type(self.values, other)

449

450 return self.astype(new_dtype, copy=False)

451

452 @final

453 def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]:

454 if downcast is False:

455 return blocks

456

457 if self.dtype == _dtype_obj:

458 # GH#44241 We downcast regardless of the argument;

459 # respecting 'downcast=None' may be worthwhile at some point,

460 # but ATM it breaks too much existing code.

461 # split and convert the blocks

462

463 return extend_blocks(

464 [blk.convert(datetime=True, numeric=False) for blk in blocks]

465 )

466

467 if downcast is None:

468 return blocks

469

470 return extend_blocks([b._downcast_2d(downcast) for b in blocks])

471

472 @final

473 @maybe_split

474 def _downcast_2d(self, dtype) -> list[Block]:

475 """

476 downcast specialized to 2D case post-validation.

477

478 Refactored to allow use of maybe_split.

479 """

480 new_values = maybe_downcast_to_dtype(self.values, dtype=dtype)

481 return [self.make_block(new_values)]

482

483 def convert(

484 self,

485 copy: bool = True,

486 datetime: bool = True,

487 numeric: bool = True,

488 timedelta: bool = True,

489 ) -> list[Block]:

490 """

491 attempt to coerce any object types to better types return a copy

492 of the block (if copy = True) by definition we are not an ObjectBlock

493 here!

494 """

495 return [self.copy()] if copy else [self]

496

497 # ---------------------------------------------------------------------

498 # Array-Like Methods

499

500 @cache_readonly

501 def dtype(self) -> DtypeObj:

502 return self.values.dtype

503

504 @final

505 def astype(

506 self, dtype: DtypeObj, copy: bool = False, errors: IgnoreRaise = "raise"

507 ) -> Block:

508 """

509 Coerce to the new dtype.

510

511 Parameters

512 ----------

513 dtype : np.dtype or ExtensionDtype

514 copy : bool, default False

515 copy if indicated

516 errors : str, {'raise', 'ignore'}, default 'raise'

517 - ``raise`` : allow exceptions to be raised

518 - ``ignore`` : suppress exceptions. On error return original object

519

520 Returns

521 -------

522 Block

523 """

524 values = self.values

525

526 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)

527

528 new_values = maybe_coerce_values(new_values)

529 newb = self.make_block(new_values)

530 if newb.shape != self.shape:

531 raise TypeError(

532 f"cannot set astype for copy = [{copy}] for dtype "

533 f"({self.dtype.name} [{self.shape}]) to different shape "

534 f"({newb.dtype.name} [{newb.shape}])"

535 )

536 return newb

537

538 @final

539 def to_native_types(self, na_rep="nan", quoting=None, **kwargs) -> Block:

540 """convert to our native types format"""

541 result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs)

542 return self.make_block(result)

543

544 @final

545 def copy(self, deep: bool = True) -> Block:

546 """copy constructor"""

547 values = self.values

548 if deep:

549 values = values.copy()

550 return type(self)(values, placement=self._mgr_locs, ndim=self.ndim)

551

552 # ---------------------------------------------------------------------

553 # Replace

554

555 @final

556 def replace(

557 self,

558 to_replace,

559 value,

560 inplace: bool = False,

561 # mask may be pre-computed if we're called from replace_list

562 mask: npt.NDArray[np.bool_] | None = None,

563 ) -> list[Block]:

564 """

565 replace the to_replace value with value, possible to create new

566 blocks here this is just a call to putmask.

567 """

568

569 # Note: the checks we do in NDFrame.replace ensure we never get

570 # here with listlike to_replace or value, as those cases

571 # go through replace_list

572 values = self.values

573

574 if isinstance(values, Categorical):

575 # TODO: avoid special-casing

576 blk = self if inplace else self.copy()

577 # error: Item "ExtensionArray" of "Union[ndarray[Any, Any],

578 # ExtensionArray]" has no attribute "_replace"

579 blk.values._replace( # type: ignore[union-attr]

580 to_replace=to_replace, value=value, inplace=True

581 )

582 return [blk]

583

584 if not self._can_hold_element(to_replace):

585 # We cannot hold `to_replace`, so we know immediately that

586 # replacing it is a no-op.

587 # Note: If to_replace were a list, NDFrame.replace would call

588 # replace_list instead of replace.

589 return [self] if inplace else [self.copy()]

590

591 if mask is None:

592 mask = missing.mask_missing(values, to_replace)

593 if not mask.any():

594 # Note: we get here with test_replace_extension_other incorrectly

595 # bc _can_hold_element is incorrect.

596 return [self] if inplace else [self.copy()]

597

598 elif self._can_hold_element(value):

599 blk = self if inplace else self.copy()

600 putmask_inplace(blk.values, mask, value)

601 if not (self.is_object and value is None):

602 # if the user *explicitly* gave None, we keep None, otherwise

603 # may downcast to NaN

604 blocks = blk.convert(numeric=False, copy=False)

605 else:

606 blocks = [blk]

607 return blocks

608

609 elif self.ndim == 1 or self.shape[0] == 1:

610 if value is None:

611 blk = self.astype(np.dtype(object))

612 else:

613 blk = self.coerce_to_target_dtype(value)

614 return blk.replace(

615 to_replace=to_replace,

616 value=value,

617 inplace=True,

618 mask=mask,

619 )

620

621 else:

622 # split so that we only upcast where necessary

623 blocks = []

624 for i, nb in enumerate(self._split()):

625 blocks.extend(

626 type(self).replace(

627 nb,

628 to_replace=to_replace,

629 value=value,

630 inplace=True,

631 mask=mask[i : i + 1],

632 )

633 )

634 return blocks

635

636 @final

637 def _replace_regex(

638 self,

639 to_replace,

640 value,

641 inplace: bool = False,

642 convert: bool = True,

643 mask=None,

644 ) -> list[Block]:

645 """

646 Replace elements by the given value.

647

648 Parameters

649 ----------

650 to_replace : object or pattern

651 Scalar to replace or regular expression to match.

652 value : object

653 Replacement object.

654 inplace : bool, default False

655 Perform inplace modification.

656 convert : bool, default True

657 If true, try to coerce any object types to better types.

658 mask : array-like of bool, optional

659 True indicate corresponding element is ignored.

660

661 Returns

662 -------

663 List[Block]

664 """

665 if not self._can_hold_element(to_replace):

666 # i.e. only ObjectBlock, but could in principle include a

667 # String ExtensionBlock

668 return [self] if inplace else [self.copy()]

669

670 rx = re.compile(to_replace)

671

672 new_values = self.values if inplace else self.values.copy()

673 replace_regex(new_values, rx, value, mask)

674

675 block = self.make_block(new_values)

676 return block.convert(numeric=False, copy=False)

677

678 @final

679 def replace_list(

680 self,

681 src_list: Iterable[Any],

682 dest_list: Sequence[Any],

683 inplace: bool = False,

684 regex: bool = False,

685 ) -> list[Block]:

686 """

687 See BlockManager.replace_list docstring.

688 """

689 values = self.values

690

691 # Exclude anything that we know we won't contain

692 pairs = [

693 (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)

694 ]

695 if not len(pairs):

696 # shortcut, nothing to replace

697 return [self] if inplace else [self.copy()]

698

699 src_len = len(pairs) - 1

700

701 if is_string_dtype(values.dtype):

702 # Calculate the mask once, prior to the call of comp

703 # in order to avoid repeating the same computations

704 mask = ~isna(values)

705 masks = [

706 compare_or_regex_search(values, s[0], regex=regex, mask=mask)

707 for s in pairs

708 ]

709 else:

710 # GH#38086 faster if we know we dont need to check for regex

711 masks = [missing.mask_missing(values, s[0]) for s in pairs]

712

713 # error: Argument 1 to "extract_bool_array" has incompatible type

714 # "Union[ExtensionArray, ndarray, bool]"; expected "Union[ExtensionArray,

715 # ndarray]"

716 masks = [extract_bool_array(x) for x in masks] # type: ignore[arg-type]

717

718 rb = [self if inplace else self.copy()]

719 for i, (src, dest) in enumerate(pairs):

720 convert = i == src_len # only convert once at the end

721 new_rb: list[Block] = []

722

723 # GH-39338: _replace_coerce can split a block into

724 # single-column blocks, so track the index so we know

725 # where to index into the mask

726 for blk_num, blk in enumerate(rb):

727 if len(rb) == 1:

728 m = masks[i]

729 else:

730 mib = masks[i]

731 assert not isinstance(mib, bool)

732 m = mib[blk_num : blk_num + 1]

733

734 # error: Argument "mask" to "_replace_coerce" of "Block" has

735 # incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]";

736 # expected "ndarray[Any, dtype[bool_]]"

737 result = blk._replace_coerce(

738 to_replace=src,

739 value=dest,

740 mask=m, # type: ignore[arg-type]

741 inplace=inplace,

742 regex=regex,

743 )

744 if convert and blk.is_object and not all(x is None for x in dest_list):

745 # GH#44498 avoid unwanted cast-back

746 result = extend_blocks(

747 [b.convert(numeric=False, copy=True) for b in result]

748 )

749 new_rb.extend(result)

750 rb = new_rb

751 return rb

752

753 @final

754 def _replace_coerce(

755 self,

756 to_replace,

757 value,

758 mask: npt.NDArray[np.bool_],

759 inplace: bool = True,

760 regex: bool = False,

761 ) -> list[Block]:

762 """

763 Replace value corresponding to the given boolean array with another

764 value.

765

766 Parameters

767 ----------

768 to_replace : object or pattern

769 Scalar to replace or regular expression to match.

770 value : object

771 Replacement object.

772 mask : np.ndarray[bool]

773 True indicate corresponding element is ignored.

774 inplace : bool, default True

775 Perform inplace modification.

776 regex : bool, default False

777 If true, perform regular expression substitution.

778

779 Returns

780 -------

781 List[Block]

782 """

783 if should_use_regex(regex, to_replace):

784 return self._replace_regex(

785 to_replace,

786 value,

787 inplace=inplace,

788 convert=False,

789 mask=mask,

790 )

791 else:

792 if value is None:

793 # gh-45601, gh-45836, gh-46634

794 if mask.any():

795 nb = self.astype(np.dtype(object), copy=False)

796 if nb is self and not inplace:

797 nb = nb.copy()

798 putmask_inplace(nb.values, mask, value)

799 return [nb]

800 return [self] if inplace else [self.copy()]

801 return self.replace(

802 to_replace=to_replace, value=value, inplace=inplace, mask=mask

803 )

804

805 # ---------------------------------------------------------------------

806 # 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock

807 # but not ExtensionBlock

808

809 def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray:

810 """

811 For compatibility with 1D-only ExtensionArrays.

812 """

813 return arg

814

815 def _unwrap_setitem_indexer(self, indexer):

816 """

817 For compatibility with 1D-only ExtensionArrays.

818 """

819 return indexer

820

821 # NB: this cannot be made cache_readonly because in mgr.set_values we pin

822 # new .values that can have different shape GH#42631

823 @property

824 def shape(self) -> Shape:

825 return self.values.shape

826

827 def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray:

828 # In the case where we have a tuple[slice, int], the slice will always

829 # be slice(None)

830 # Note: only reached with self.ndim == 2

831 # Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]"

832 # for "Union[ndarray[Any, Any], ExtensionArray]"; expected type

833 # "Union[int, integer[Any]]"

834 return self.values[i] # type: ignore[index]

835

836 def _slice(

837 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]

838 ) -> ArrayLike:

839 """return a slice of my values"""

840

841 return self.values[slicer]

842

843 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:

844 """

845 Modify block values in-place with new item value.

846

847 If copy=True, first copy the underlying values in place before modifying

848 (for Copy-on-Write).

849

850 Notes

851 -----

852 `set_inplace` never creates a new array or new Block, whereas `setitem`

853 _may_ create a new array and always creates a new Block.

854

855 Caller is responsible for checking values.dtype == self.dtype.

856 """

857 if copy:

858 self.values = self.values.copy()

859 self.values[locs] = values

860

861 def take_nd(

862 self,

863 indexer: npt.NDArray[np.intp],

864 axis: int,

865 new_mgr_locs: BlockPlacement | None = None,

866 fill_value=lib.no_default,

867 ) -> Block:

868 """

869 Take values according to indexer and return them as a block.

870 """

871 values = self.values

872

873 if fill_value is lib.no_default:

874 fill_value = self.fill_value

875 allow_fill = False

876 else:

877 allow_fill = True

878

879 # Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype

880 new_values = algos.take_nd(

881 values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value

882 )

883

884 # Called from three places in managers, all of which satisfy

885 # this assertion

886 assert not (axis == 0 and new_mgr_locs is None)

887 if new_mgr_locs is None:

888 new_mgr_locs = self._mgr_locs

889

890 if not is_dtype_equal(new_values.dtype, self.dtype):

891 return self.make_block(new_values, new_mgr_locs)

892 else:

893 return self.make_block_same_class(new_values, new_mgr_locs)

894

895 def _unstack(

896 self,

897 unstacker,

898 fill_value,

899 new_placement: npt.NDArray[np.intp],

900 needs_masking: npt.NDArray[np.bool_],

901 ):

902 """

903 Return a list of unstacked blocks of self

904

905 Parameters

906 ----------

907 unstacker : reshape._Unstacker

908 fill_value : int

909 Only used in ExtensionBlock._unstack

910 new_placement : np.ndarray[np.intp]

911 allow_fill : bool

912 needs_masking : np.ndarray[bool]

913

914 Returns

915 -------

916 blocks : list of Block

917 New blocks of unstacked values.

918 mask : array-like of bool

919 The mask of columns of `blocks` we should keep.

920 """

921 new_values, mask = unstacker.get_new_values(

922 self.values.T, fill_value=fill_value

923 )

924

925 mask = mask.any(0)

926 # TODO: in all tests we have mask.all(); can we rely on that?

927

928 # Note: these next two lines ensure that

929 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)

930 # which the calling function needs in order to pass verify_integrity=False

931 # to the BlockManager constructor

932 new_values = new_values.T[mask]

933 new_placement = new_placement[mask]

934

935 bp = BlockPlacement(new_placement)

936 blocks = [new_block_2d(new_values, placement=bp)]

937 return blocks, mask

938

939 # ---------------------------------------------------------------------

940

941 def setitem(self, indexer, value) -> Block:

942 """

943 Attempt self.values[indexer] = value, possibly creating a new array.

944

945 Parameters

946 ----------

947 indexer : tuple, list-like, array-like, slice, int

948 The subset of self.values to set

949 value : object

950 The value being set

951

952 Returns

953 -------

954 Block

955

956 Notes

957 -----

958 `indexer` is a direct slice/positional indexer. `value` must

959 be a compatible shape.

960 """

961

962 value = self._standardize_fill_value(value)

963

964 values = cast(np.ndarray, self.values)

965 if self.ndim == 2:

966 values = values.T

967

968 # length checking

969 check_setitem_lengths(indexer, value, values)

970

971 value = extract_array(value, extract_numpy=True)

972 try:

973 casted = np_can_hold_element(values.dtype, value)

974 except LossySetitemError:

975 # current dtype cannot store value, coerce to common dtype

976 nb = self.coerce_to_target_dtype(value)

977 return nb.setitem(indexer, value)

978 else:

979 if self.dtype == _dtype_obj:

980 # TODO: avoid having to construct values[indexer]

981 vi = values[indexer]

982 if lib.is_list_like(vi):

983 # checking lib.is_scalar here fails on

984 # test_iloc_setitem_custom_object

985 casted = setitem_datetimelike_compat(values, len(vi), casted)

986 values[indexer] = casted

987 return self

988

989 def putmask(self, mask, new) -> list[Block]:

990 """

991 putmask the data to the block; it is possible that we may create a

992 new dtype of block

993

994 Return the resulting block(s).

995

996 Parameters

997 ----------

998 mask : np.ndarray[bool], SparseArray[bool], or BooleanArray

999 new : a ndarray/object

1000

1001 Returns

1002 -------

1003 List[Block]

1004 """

1005 orig_mask = mask

1006 values = cast(np.ndarray, self.values)

1007 mask, noop = validate_putmask(values.T, mask)

1008 assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame))

1009

1010 if new is lib.no_default:

1011 new = self.fill_value

1012

1013 new = self._standardize_fill_value(new)

1014 new = extract_array(new, extract_numpy=True)

1015

1016 if noop:

1017 return [self]

1018

1019 try:

1020 casted = np_can_hold_element(values.dtype, new)

1021 putmask_without_repeat(values.T, mask, casted)

1022 return [self]

1023 except LossySetitemError:

1024

1025 if self.ndim == 1 or self.shape[0] == 1:

1026 # no need to split columns

1027

1028 if not is_list_like(new):

1029 # using just new[indexer] can't save us the need to cast

1030 return self.coerce_to_target_dtype(new).putmask(mask, new)

1031 else:

1032 indexer = mask.nonzero()[0]

1033 nb = self.setitem(indexer, new[indexer])

1034 return [nb]

1035

1036 else:

1037 is_array = isinstance(new, np.ndarray)

1038

1039 res_blocks = []

1040 nbs = self._split()

1041 for i, nb in enumerate(nbs):

1042 n = new

1043 if is_array:

1044 # we have a different value per-column

1045 n = new[:, i : i + 1]

1046

1047 submask = orig_mask[:, i : i + 1]

1048 rbs = nb.putmask(submask, n)

1049 res_blocks.extend(rbs)

1050 return res_blocks

1051

1052 def where(self, other, cond, _downcast="infer") -> list[Block]:

1053 """

1054 evaluate the block; return result block(s) from the result

1055

1056 Parameters

1057 ----------

1058 other : a ndarray/object

1059 cond : np.ndarray[bool], SparseArray[bool], or BooleanArray

1060 _downcast : str or None, default "infer"

1061 Private because we only specify it when calling from fillna.

1062

1063 Returns

1064 -------

1065 List[Block]

1066 """

1067 assert cond.ndim == self.ndim

1068 assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame))

1069

1070 transpose = self.ndim == 2

1071

1072 cond = extract_bool_array(cond)

1073

1074 # EABlocks override where

1075 values = cast(np.ndarray, self.values)

1076 orig_other = other

1077 if transpose:

1078 values = values.T

1079

1080 icond, noop = validate_putmask(values, ~cond)

1081 if noop:

1082 # GH-39595: Always return a copy; short-circuit up/downcasting

1083 return [self.copy()]

1084

1085 if other is lib.no_default:

1086 other = self.fill_value

1087

1088 other = self._standardize_fill_value(other)

1089

1090 try:

1091 # try/except here is equivalent to a self._can_hold_element check,

1092 # but this gets us back 'casted' which we will re-use below;

1093 # without using 'casted', expressions.where may do unwanted upcasts.

1094 casted = np_can_hold_element(values.dtype, other)

1095 except (ValueError, TypeError, LossySetitemError):

1096 # we cannot coerce, return a compat dtype

1097

1098 if self.ndim == 1 or self.shape[0] == 1:

1099 # no need to split columns

1100

1101 block = self.coerce_to_target_dtype(other)

1102 blocks = block.where(orig_other, cond)

1103 return self._maybe_downcast(blocks, downcast=_downcast)

1104

1105 else:

1106 # since _maybe_downcast would split blocks anyway, we

1107 # can avoid some potential upcast/downcast by splitting

1108 # on the front end.

1109 is_array = isinstance(other, (np.ndarray, ExtensionArray))

1110

1111 res_blocks = []

1112 nbs = self._split()

1113 for i, nb in enumerate(nbs):

1114 oth = other

1115 if is_array:

1116 # we have a different value per-column

1117 oth = other[:, i : i + 1]

1118

1119 submask = cond[:, i : i + 1]

1120 rbs = nb.where(oth, submask, _downcast=_downcast)

1121 res_blocks.extend(rbs)

1122 return res_blocks

1123

1124 else:

1125 other = casted

1126 alt = setitem_datetimelike_compat(values, icond.sum(), other)

1127 if alt is not other:

1128 if is_list_like(other) and len(other) < len(values):

1129 # call np.where with other to get the appropriate ValueError

1130 np.where(~icond, values, other)

1131 raise NotImplementedError(

1132 "This should not be reached; call to np.where above is "

1133 "expected to raise ValueError. Please report a bug at "

1134 "github.com/pandas-dev/pandas"

1135 )

1136 result = values.copy()

1137 np.putmask(result, icond, alt)

1138 else:

1139 # By the time we get here, we should have all Series/Index

1140 # args extracted to ndarray

1141 if (

1142 is_list_like(other)

1143 and not isinstance(other, np.ndarray)

1144 and len(other) == self.shape[-1]

1145 ):

1146 # If we don't do this broadcasting here, then expressions.where

1147 # will broadcast a 1D other to be row-like instead of

1148 # column-like.

1149 other = np.array(other).reshape(values.shape)

1150 # If lengths don't match (or len(other)==1), we will raise

1151 # inside expressions.where, see test_series_where

1152

1153 # Note: expressions.where may upcast.

1154 result = expressions.where(~icond, values, other)

1155 # The np_can_hold_element check _should_ ensure that we always

1156 # have result.dtype == self.dtype here.

1157

1158 if transpose:

1159 result = result.T

1160

1161 return [self.make_block(result)]

1162

1163 def fillna(

1164 self, value, limit: int | None = None, inplace: bool = False, downcast=None

1165 ) -> list[Block]:

1166 """

1167 fillna on the block with the value. If we fail, then convert to

1168 ObjectBlock and try again

1169 """

1170 # Caller is responsible for validating limit; if int it is strictly positive

1171 inplace = validate_bool_kwarg(inplace, "inplace")

1172

1173 if not self._can_hold_na:

1174 # can short-circuit the isna call

1175 noop = True

1176 else:

1177 mask = isna(self.values)

1178 mask, noop = validate_putmask(self.values, mask)

1179

1180 if noop:

1181 # we can't process the value, but nothing to do

1182 if inplace:

1183 # Arbitrarily imposing the convention that we ignore downcast

1184 # on no-op when inplace=True

1185 return [self]

1186 else:

1187 # GH#45423 consistent downcasting on no-ops.

1188 nb = self.copy()

1189 nbs = nb._maybe_downcast([nb], downcast=downcast)

1190 return nbs

1191

1192 if limit is not None:

1193 mask[mask.cumsum(self.ndim - 1) > limit] = False

1194

1195 if inplace:

1196 nbs = self.putmask(mask.T, value)

1197 else:

1198 # without _downcast, we would break

1199 # test_fillna_dtype_conversion_equiv_replace

1200 nbs = self.where(value, ~mask.T, _downcast=False)

1201

1202 # Note: blk._maybe_downcast vs self._maybe_downcast(nbs)

1203 # makes a difference bc blk may have object dtype, which has

1204 # different behavior in _maybe_downcast.

1205 return extend_blocks(

1206 [blk._maybe_downcast([blk], downcast=downcast) for blk in nbs]

1207 )

1208

1209 def interpolate(

1210 self,

1211 method: str = "pad",

1212 axis: int = 0,

1213 index: Index | None = None,

1214 inplace: bool = False,

1215 limit: int | None = None,

1216 limit_direction: str = "forward",

1217 limit_area: str | None = None,

1218 fill_value: Any | None = None,

1219 downcast: str | None = None,

1220 **kwargs,

1221 ) -> list[Block]:

1222

1223 inplace = validate_bool_kwarg(inplace, "inplace")

1224

1225 if not self._can_hold_na:

1226 # If there are no NAs, then interpolate is a no-op

1227 return [self] if inplace else [self.copy()]

1228

1229 try:

1230 m = missing.clean_fill_method(method)

1231 except ValueError:

1232 m = None

1233 if m is None and self.dtype.kind != "f":

1234 # only deal with floats

1235 # bc we already checked that can_hold_na, we dont have int dtype here

1236 # test_interp_basic checks that we make a copy here

1237 return [self] if inplace else [self.copy()]

1238

1239 if self.is_object and self.ndim == 2 and self.shape[0] != 1 and axis == 0:

1240 # split improves performance in ndarray.copy()

1241 return self.split_and_operate(

1242 type(self).interpolate,

1243 method,

1244 axis,

1245 index,

1246 inplace,

1247 limit,

1248 limit_direction,

1249 limit_area,

1250 fill_value,

1251 downcast,

1252 **kwargs,

1253 )

1254

1255 data = self.values if inplace else self.values.copy()

1256 data = cast(np.ndarray, data) # bc overridden by ExtensionBlock

1257

1258 missing.interpolate_array_2d(

1259 data,

1260 method=method,

1261 axis=axis,

1262 index=index,

1263 limit=limit,

1264 limit_direction=limit_direction,

1265 limit_area=limit_area,

1266 fill_value=fill_value,

1267 **kwargs,

1268 )

1269

1270 nb = self.make_block_same_class(data)

1271 return nb._maybe_downcast([nb], downcast)

1272

1273 def diff(self, n: int, axis: int = 1) -> list[Block]:

1274 """return block for the diff of the values"""

1275 new_values = algos.diff(self.values, n, axis=axis)

1276 return [self.make_block(values=new_values)]

1277

1278 def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]:

1279 """shift the block by periods, possibly upcast"""

1280 # convert integer to float if necessary. need to do a lot more than

1281 # that, handle boolean etc also

1282

1283 # Note: periods is never 0 here, as that is handled at the top of

1284 # NDFrame.shift. If that ever changes, we can do a check for periods=0

1285 # and possibly avoid coercing.

1286

1287 if not lib.is_scalar(fill_value) and self.dtype != _dtype_obj:

1288 # with object dtype there is nothing to promote, and the user can

1289 # pass pretty much any weird fill_value they like

1290 # see test_shift_object_non_scalar_fill

1291 raise ValueError("fill_value must be a scalar")

1292

1293 fill_value = self._standardize_fill_value(fill_value)

1294

1295 try:

1296 # error: Argument 1 to "np_can_hold_element" has incompatible type

1297 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"

1298 casted = np_can_hold_element(

1299 self.dtype, fill_value # type: ignore[arg-type]

1300 )

1301 except LossySetitemError:

1302 nb = self.coerce_to_target_dtype(fill_value)

1303 return nb.shift(periods, axis=axis, fill_value=fill_value)

1304

1305 else:

1306 values = cast(np.ndarray, self.values)

1307 new_values = shift(values, periods, axis, casted)

1308 return [self.make_block(new_values)]

1309

1310 @final

1311 def quantile(

1312 self, qs: Float64Index, interpolation="linear", axis: int = 0

1313 ) -> Block:

1314 """

1315 compute the quantiles of the

1316

1317 Parameters

1318 ----------

1319 qs : Float64Index

1320 List of the quantiles to be computed.

1321 interpolation : str, default 'linear'

1322 Type of interpolation.

1323 axis : int, default 0

1324 Axis to compute.

1325

1326 Returns

1327 -------

1328 Block

1329 """

1330 # We should always have ndim == 2 because Series dispatches to DataFrame

1331 assert self.ndim == 2

1332 assert axis == 1 # only ever called this way

1333 assert is_list_like(qs) # caller is responsible for this

1334

1335 result = quantile_compat(self.values, np.asarray(qs._values), interpolation)

1336 # ensure_block_shape needed for cases where we start with EA and result

1337 # is ndarray, e.g. IntegerArray, SparseArray

1338 result = ensure_block_shape(result, ndim=2)

1339 return new_block_2d(result, placement=self._mgr_locs)

1340

1341 # ---------------------------------------------------------------------

1342 # Abstract Methods Overridden By EABackedBlock and NumpyBlock

1343

1344 def delete(self, loc) -> Block:

1345 """

1346 Return a new Block with the given loc(s) deleted.

1347 """

1348 raise AbstractMethodError(self)

1349

1350 @property

1351 def is_view(self) -> bool:

1352 """return a boolean if I am possibly a view"""

1353 raise AbstractMethodError(self)

1354

1355 @property

1356 def array_values(self) -> ExtensionArray:

1357 """

1358 The array that Series.array returns. Always an ExtensionArray.

1359 """

1360 raise AbstractMethodError(self)

1361

1362 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:

1363 """

1364 return an internal format, currently just the ndarray

1365 this is often overridden to handle to_dense like operations

1366 """

1367 raise AbstractMethodError(self)

1368

1369 def values_for_json(self) -> np.ndarray:

1370 raise AbstractMethodError(self)

1371

1372

1373class EABackedBlock(Block):

1374 """

1375 Mixin for Block subclasses backed by ExtensionArray.

1376 """

1377

1378 values: ExtensionArray

1379

1380 def setitem(self, indexer, value):

1381 """

1382 Attempt self.values[indexer] = value, possibly creating a new array.

1383

1384 This differs from Block.setitem by not allowing setitem to change

1385 the dtype of the Block.

1386

1387 Parameters

1388 ----------

1389 indexer : tuple, list-like, array-like, slice, int

1390 The subset of self.values to set

1391 value : object

1392 The value being set

1393

1394 Returns

1395 -------

1396 Block

1397

1398 Notes

1399 -----

1400 `indexer` is a direct slice/positional indexer. `value` must

1401 be a compatible shape.

1402 """

1403 orig_indexer = indexer

1404 orig_value = value

1405

1406 indexer = self._unwrap_setitem_indexer(indexer)

1407 value = self._maybe_squeeze_arg(value)

1408

1409 values = self.values

1410 if values.ndim == 2:

1411 # TODO(GH#45419): string[pyarrow] tests break if we transpose

1412 # unconditionally

1413 values = values.T

1414 check_setitem_lengths(indexer, value, values)

1415

1416 try:

1417 values[indexer] = value

1418 except (ValueError, TypeError) as err:

1419 _catch_deprecated_value_error(err)

1420

1421 if is_interval_dtype(self.dtype):

1422 # see TestSetitemFloatIntervalWithIntIntervalValues

1423 nb = self.coerce_to_target_dtype(orig_value)

1424 return nb.setitem(orig_indexer, orig_value)

1425

1426 elif isinstance(self, NDArrayBackedExtensionBlock):

1427 nb = self.coerce_to_target_dtype(orig_value)

1428 return nb.setitem(orig_indexer, orig_value)

1429

1430 else:

1431 raise

1432

1433 else:

1434 return self

1435

1436 def where(self, other, cond, _downcast="infer") -> list[Block]:

1437 # _downcast private bc we only specify it when calling from fillna

1438 arr = self.values.T

1439

1440 cond = extract_bool_array(cond)

1441

1442 orig_other = other

1443 orig_cond = cond

1444 other = self._maybe_squeeze_arg(other)

1445 cond = self._maybe_squeeze_arg(cond)

1446

1447 if other is lib.no_default:

1448 other = self.fill_value

1449

1450 icond, noop = validate_putmask(arr, ~cond)

1451 if noop:

1452 # GH#44181, GH#45135

1453 # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast

1454 return [self.copy()]

1455

1456 try:

1457 res_values = arr._where(cond, other).T

1458 except (ValueError, TypeError) as err:

1459 _catch_deprecated_value_error(err)

1460

1461 if self.ndim == 1 or self.shape[0] == 1:

1462

1463 if is_interval_dtype(self.dtype):

1464 # TestSetitemFloatIntervalWithIntIntervalValues

1465 blk = self.coerce_to_target_dtype(orig_other)

1466 nbs = blk.where(orig_other, orig_cond)

1467 return self._maybe_downcast(nbs, downcast=_downcast)

1468

1469 elif isinstance(self, NDArrayBackedExtensionBlock):

1470 # NB: not (yet) the same as

1471 # isinstance(values, NDArrayBackedExtensionArray)

1472 blk = self.coerce_to_target_dtype(orig_other)

1473 nbs = blk.where(orig_other, orig_cond)

1474 return self._maybe_downcast(nbs, downcast=_downcast)

1475

1476 else:

1477 raise

1478

1479 else:

1480 # Same pattern we use in Block.putmask

1481 is_array = isinstance(orig_other, (np.ndarray, ExtensionArray))

1482

1483 res_blocks = []

1484 nbs = self._split()

1485 for i, nb in enumerate(nbs):

1486 n = orig_other

1487 if is_array:

1488 # we have a different value per-column

1489 n = orig_other[:, i : i + 1]

1490

1491 submask = orig_cond[:, i : i + 1]

1492 rbs = nb.where(n, submask)

1493 res_blocks.extend(rbs)

1494 return res_blocks

1495

1496 nb = self.make_block_same_class(res_values)

1497 return [nb]

1498

1499 def putmask(self, mask, new) -> list[Block]:

1500 """

1501 See Block.putmask.__doc__

1502 """

1503 mask = extract_bool_array(mask)

1504

1505 values = self.values

1506 if values.ndim == 2:

1507 values = values.T

1508

1509 orig_new = new

1510 orig_mask = mask

1511 new = self._maybe_squeeze_arg(new)

1512 mask = self._maybe_squeeze_arg(mask)

1513

1514 if not mask.any():

1515 return [self]

1516

1517 try:

1518 # Caller is responsible for ensuring matching lengths

1519 values._putmask(mask, new)

1520 except (TypeError, ValueError) as err:

1521 _catch_deprecated_value_error(err)

1522

1523 if self.ndim == 1 or self.shape[0] == 1:

1524

1525 if is_interval_dtype(self.dtype):

1526 # Discussion about what we want to support in the general

1527 # case GH#39584

1528 blk = self.coerce_to_target_dtype(orig_new)

1529 return blk.putmask(orig_mask, orig_new)

1530

1531 elif isinstance(self, NDArrayBackedExtensionBlock):

1532 # NB: not (yet) the same as

1533 # isinstance(values, NDArrayBackedExtensionArray)

1534 blk = self.coerce_to_target_dtype(orig_new)

1535 return blk.putmask(orig_mask, orig_new)

1536

1537 else:

1538 raise

1539

1540 else:

1541 # Same pattern we use in Block.putmask

1542 is_array = isinstance(orig_new, (np.ndarray, ExtensionArray))

1543

1544 res_blocks = []

1545 nbs = self._split()

1546 for i, nb in enumerate(nbs):

1547 n = orig_new

1548 if is_array:

1549 # we have a different value per-column

1550 n = orig_new[:, i : i + 1]

1551

1552 submask = orig_mask[:, i : i + 1]

1553 rbs = nb.putmask(submask, n)

1554 res_blocks.extend(rbs)

1555 return res_blocks

1556

1557 return [self]

1558

1559 def fillna(

1560 self, value, limit: int | None = None, inplace: bool = False, downcast=None

1561 ) -> list[Block]:

1562 # Caller is responsible for validating limit; if int it is strictly positive

1563

1564 if self.dtype.kind == "m":

1565 try:

1566 res_values = self.values.fillna(value, limit=limit)

1567 except (ValueError, TypeError):

1568 # GH#45746

1569 warnings.warn(

1570 "The behavior of fillna with timedelta64[ns] dtype and "

1571 f"an incompatible value ({type(value)}) is deprecated. "

1572 "In a future version, this will cast to a common dtype "

1573 "(usually object) instead of raising, matching the "

1574 "behavior of other dtypes.",

1575 FutureWarning,

1576 stacklevel=find_stack_level(),

1577 )

1578 raise

1579 else:

1580 res_blk = self.make_block(res_values)

1581 return [res_blk]

1582

1583 # TODO: since this now dispatches to super, which in turn dispatches

1584 # to putmask, it may *actually* respect 'inplace=True'. If so, add

1585 # tests for this.

1586 return super().fillna(value, limit=limit, inplace=inplace, downcast=downcast)

1587

1588 def delete(self, loc) -> Block:

1589 # This will be unnecessary if/when __array_function__ is implemented

1590 values = self.values.delete(loc)

1591 mgr_locs = self._mgr_locs.delete(loc)

1592 return type(self)(values, placement=mgr_locs, ndim=self.ndim)

1593

1594 @cache_readonly

1595 def array_values(self) -> ExtensionArray:

1596 return self.values

1597

1598 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:

1599 """

1600 return object dtype as boxed values, such as Timestamps/Timedelta

1601 """

1602 values: ArrayLike = self.values

1603 if dtype == _dtype_obj:

1604 values = values.astype(object)

1605 # TODO(EA2D): reshape not needed with 2D EAs

1606 return np.asarray(values).reshape(self.shape)

1607

1608 def values_for_json(self) -> np.ndarray:

1609 return np.asarray(self.values)

1610

1611 def interpolate(

1612 self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs

1613 ):

1614 values = self.values

1615 if values.ndim == 2 and axis == 0:

1616 # NDArrayBackedExtensionArray.fillna assumes axis=1

1617 new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T

1618 else:

1619 new_values = values.fillna(value=fill_value, method=method, limit=limit)

1620 return self.make_block_same_class(new_values)

1621

1622

1623class ExtensionBlock(libinternals.Block, EABackedBlock):

1624 """

1625 Block for holding extension types.

1626

1627 Notes

1628 -----

1629 This holds all 3rd-party extension array types. It's also the immediate

1630 parent class for our internal extension types' blocks, CategoricalBlock.

1631

1632 ExtensionArrays are limited to 1-D.

1633 """

1634

1635 _can_consolidate = False

1636 _validate_ndim = False

1637 is_extension = True

1638

1639 values: ExtensionArray

1640

1641 @cache_readonly

1642 def shape(self) -> Shape:

1643 # TODO(EA2D): override unnecessary with 2D EAs

1644 if self.ndim == 1:

1645 return (len(self.values),)

1646 return len(self._mgr_locs), len(self.values)

1647

1648 def iget(self, i: int | tuple[int, int] | tuple[slice, int]):

1649 # In the case where we have a tuple[slice, int], the slice will always

1650 # be slice(None)

1651 # We _could_ make the annotation more specific, but mypy would

1652 # complain about override mismatch:

1653 # Literal[0] | tuple[Literal[0], int] | tuple[slice, int]

1654

1655 # Note: only reached with self.ndim == 2

1656

1657 if isinstance(i, tuple):

1658 # TODO(EA2D): unnecessary with 2D EAs

1659 col, loc = i

1660 if not com.is_null_slice(col) and col != 0:

1661 raise IndexError(f"{self} only contains one item")

1662 elif isinstance(col, slice):

1663 # the is_null_slice check above assures that col is slice(None)

1664 # so what we want is a view on all our columns and row loc

1665 if loc < 0:

1666 loc += len(self.values)

1667 # Note: loc:loc+1 vs [[loc]] makes a difference when called

1668 # from fast_xs because we want to get a view back.

1669 return self.values[loc : loc + 1]

1670 return self.values[loc]

1671 else:

1672 if i != 0:

1673 raise IndexError(f"{self} only contains one item")

1674 return self.values

1675

1676 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:

1677 # When an ndarray, we should have locs.tolist() == [0]

1678 # When a BlockPlacement we should have list(locs) == [0]

1679 if copy:

1680 self.values = self.values.copy()

1681 self.values[:] = values

1682

1683 def _maybe_squeeze_arg(self, arg):

1684 """

1685 If necessary, squeeze a (N, 1) ndarray to (N,)

1686 """

1687 # e.g. if we are passed a 2D mask for putmask

1688 if (

1689 isinstance(arg, (np.ndarray, ExtensionArray))

1690 and arg.ndim == self.values.ndim + 1

1691 ):

1692 # TODO(EA2D): unnecessary with 2D EAs

1693 assert arg.shape[1] == 1

1694 # error: No overload variant of "__getitem__" of "ExtensionArray"

1695 # matches argument type "Tuple[slice, int]"

1696 arg = arg[:, 0] # type: ignore[call-overload]

1697 elif isinstance(arg, ABCDataFrame):

1698 # 2022-01-06 only reached for setitem

1699 # TODO: should we avoid getting here with DataFrame?

1700 assert arg.shape[1] == 1

1701 arg = arg._ixs(0, axis=1)._values

1702

1703 return arg

1704

1705 def _unwrap_setitem_indexer(self, indexer):

1706 """

1707 Adapt a 2D-indexer to our 1D values.

1708

1709 This is intended for 'setitem', not 'iget' or '_slice'.

1710 """

1711 # TODO: ATM this doesn't work for iget/_slice, can we change that?

1712

1713 if isinstance(indexer, tuple):

1714 # TODO(EA2D): not needed with 2D EAs

1715 # Should never have length > 2. Caller is responsible for checking.

1716 # Length 1 is reached vis setitem_single_block and setitem_single_column

1717 # each of which pass indexer=(pi,)

1718 if len(indexer) == 2:

1719

1720 if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer):

1721 # GH#44703 went through indexing.maybe_convert_ix

1722 first, second = indexer

1723 if not (

1724 second.size == 1 and (second == 0).all() and first.shape[1] == 1

1725 ):

1726 raise NotImplementedError(

1727 "This should not be reached. Please report a bug at "

1728 "github.com/pandas-dev/pandas/"

1729 )

1730 indexer = first[:, 0]

1731

1732 elif lib.is_integer(indexer[1]) and indexer[1] == 0:

1733 # reached via setitem_single_block passing the whole indexer

1734 indexer = indexer[0]

1735

1736 elif com.is_null_slice(indexer[1]):

1737 indexer = indexer[0]

1738

1739 elif is_list_like(indexer[1]) and indexer[1][0] == 0:

1740 indexer = indexer[0]

1741

1742 else:

1743 raise NotImplementedError(

1744 "This should not be reached. Please report a bug at "

1745 "github.com/pandas-dev/pandas/"

1746 )

1747 return indexer

1748

1749 @property

1750 def is_view(self) -> bool:

1751 """Extension arrays are never treated as views."""

1752 return False

1753

1754 @cache_readonly

1755 def is_numeric(self):

1756 return self.values.dtype._is_numeric

1757

1758 def take_nd(

1759 self,

1760 indexer: npt.NDArray[np.intp],

1761 axis: int = 0,

1762 new_mgr_locs: BlockPlacement | None = None,

1763 fill_value=lib.no_default,

1764 ) -> Block:

1765 """

1766 Take values according to indexer and return them as a block.

1767 """

1768 if fill_value is lib.no_default:

1769 fill_value = None

1770

1771 # TODO(EA2D): special case not needed with 2D EAs

1772 # axis doesn't matter; we are really a single-dim object

1773 # but are passed the axis depending on the calling routing

1774 # if its REALLY axis 0, then this will be a reindex and not a take

1775 new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True)

1776

1777 # Called from three places in managers, all of which satisfy

1778 # this assertion

1779 assert not (self.ndim == 1 and new_mgr_locs is None)

1780 if new_mgr_locs is None:

1781 new_mgr_locs = self._mgr_locs

1782

1783 return self.make_block_same_class(new_values, new_mgr_locs)

1784

1785 def _slice(

1786 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]

1787 ) -> ExtensionArray:

1788 """

1789 Return a slice of my values.

1790

1791 Parameters

1792 ----------

1793 slicer : slice, ndarray[int], or ndarray[bool]

1794 Valid (non-reducing) indexer for self.values.

1795

1796 Returns

1797 -------

1798 ExtensionArray

1799 """

1800 # Notes: ndarray[bool] is only reachable when via getitem_mgr, which

1801 # is only for Series, i.e. self.ndim == 1.

1802

1803 # return same dims as we currently have

1804 if self.ndim == 2:

1805 # reached via getitem_block via _slice_take_blocks_ax0

1806 # TODO(EA2D): won't be necessary with 2D EAs

1807

1808 if not isinstance(slicer, slice):

1809 raise AssertionError(

1810 "invalid slicing for a 1-ndim ExtensionArray", slicer

1811 )

1812 # GH#32959 only full-slicers along fake-dim0 are valid

1813 # TODO(EA2D): won't be necessary with 2D EAs

1814 # range(1) instead of self._mgr_locs to avoid exception on [::-1]

1815 # see test_iloc_getitem_slice_negative_step_ea_block

1816 new_locs = range(1)[slicer]

1817 if not len(new_locs):

1818 raise AssertionError(

1819 "invalid slicing for a 1-ndim ExtensionArray", slicer

1820 )

1821 slicer = slice(None)

1822

1823 return self.values[slicer]

1824

1825 @final

1826 def getitem_block_index(self, slicer: slice) -> ExtensionBlock:

1827 """

1828 Perform __getitem__-like specialized to slicing along index.

1829 """

1830 # GH#42787 in principle this is equivalent to values[..., slicer], but we don't

1831 # require subclasses of ExtensionArray to support that form (for now).

1832 new_values = self.values[slicer]

1833 return type(self)(new_values, self._mgr_locs, ndim=self.ndim)

1834

1835 def diff(self, n: int, axis: int = 1) -> list[Block]:

1836 if axis == 0 and n != 0:

1837 # n==0 case will be a no-op so let is fall through

1838 # Since we only have one column, the result will be all-NA.

1839 # Create this result by shifting along axis=0 past the length of

1840 # our values.

1841 return super().diff(len(self.values), axis=0)

1842 if axis == 1:

1843 # TODO(EA2D): unnecessary with 2D EAs

1844 # we are by definition 1D.

1845 axis = 0

1846 return super().diff(n, axis)

1847

1848 def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]:

1849 """

1850 Shift the block by `periods`.

1851

1852 Dispatches to underlying ExtensionArray and re-boxes in an

1853 ExtensionBlock.

1854 """

1855 new_values = self.values.shift(periods=periods, fill_value=fill_value)

1856 return [self.make_block_same_class(new_values)]

1857

1858 def _unstack(

1859 self,

1860 unstacker,

1861 fill_value,

1862 new_placement: npt.NDArray[np.intp],

1863 needs_masking: npt.NDArray[np.bool_],

1864 ):

1865 # ExtensionArray-safe unstack.

1866 # We override ObjectBlock._unstack, which unstacks directly on the

1867 # values of the array. For EA-backed blocks, this would require

1868 # converting to a 2-D ndarray of objects.

1869 # Instead, we unstack an ndarray of integer positions, followed by

1870 # a `take` on the actual values.

1871

1872 # Caller is responsible for ensuring self.shape[-1] == len(unstacker.index)

1873 new_values, mask = unstacker.arange_result

1874

1875 # Note: these next two lines ensure that

1876 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)

1877 # which the calling function needs in order to pass verify_integrity=False

1878 # to the BlockManager constructor

1879 new_values = new_values.T[mask]

1880 new_placement = new_placement[mask]

1881

1882 # needs_masking[i] calculated once in BlockManager.unstack tells

1883 # us if there are any -1s in the relevant indices. When False,

1884 # that allows us to go through a faster path in 'take', among

1885 # other things avoiding e.g. Categorical._validate_scalar.

1886 blocks = [

1887 # TODO: could cast to object depending on fill_value?

1888 type(self)(

1889 self.values.take(

1890 indices, allow_fill=needs_masking[i], fill_value=fill_value

1891 ),

1892 BlockPlacement(place),

1893 ndim=2,

1894 )

1895 for i, (indices, place) in enumerate(zip(new_values, new_placement))

1896 ]

1897 return blocks, mask

1898

1899

1900class NumpyBlock(libinternals.NumpyBlock, Block):

1901 values: np.ndarray

1902

1903 @property

1904 def is_view(self) -> bool:

1905 """return a boolean if I am possibly a view"""

1906 return self.values.base is not None

1907

1908 @property

1909 def array_values(self) -> ExtensionArray:

1910 return PandasArray(self.values)

1911

1912 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:

1913 if dtype == _dtype_obj:

1914 return self.values.astype(_dtype_obj)

1915 return self.values

1916

1917 def values_for_json(self) -> np.ndarray:

1918 return self.values

1919

1920 def delete(self, loc) -> Block:

1921 values = np.delete(self.values, loc, 0)

1922 mgr_locs = self._mgr_locs.delete(loc)

1923 return type(self)(values, placement=mgr_locs, ndim=self.ndim)

1924

1925

1926class NumericBlock(NumpyBlock):

1927 __slots__ = ()

1928 is_numeric = True

1929

1930

1931class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock):

1932 """

1933 Block backed by an NDArrayBackedExtensionArray

1934 """

1935

1936 values: NDArrayBackedExtensionArray

1937

1938 # error: Signature of "is_extension" incompatible with supertype "Block"

1939 @cache_readonly

1940 def is_extension(self) -> bool: # type: ignore[override]

1941 # i.e. datetime64tz, PeriodDtype

1942 return not isinstance(self.dtype, np.dtype)

1943

1944 @property

1945 def is_view(self) -> bool:

1946 """return a boolean if I am possibly a view"""

1947 # check the ndarray values of the DatetimeIndex values

1948 return self.values._ndarray.base is not None

1949

1950 def diff(self, n: int, axis: int = 0) -> list[Block]:

1951 """

1952 1st discrete difference.

1953

1954 Parameters

1955 ----------

1956 n : int

1957 Number of periods to diff.

1958 axis : int, default 0

1959 Axis to diff upon.

1960

1961 Returns

1962 -------

1963 A list with a new Block.

1964

1965 Notes

1966 -----

1967 The arguments here are mimicking shift so they are called correctly

1968 by apply.

1969 """

1970 values = self.values

1971

1972 new_values = values - values.shift(n, axis=axis)

1973 return [self.make_block(new_values)]

1974

1975 def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]:

1976 values = self.values

1977 new_values = values.shift(periods, fill_value=fill_value, axis=axis)

1978 return [self.make_block_same_class(new_values)]

1979

1980

1981def _catch_deprecated_value_error(err: Exception) -> None:

1982 """

1983 We catch ValueError for now, but only a specific one raised by DatetimeArray

1984 which will no longer be raised in version.2.0.

1985 """

1986 if isinstance(err, ValueError):

1987 # TODO(2.0): once DTA._validate_setitem_value deprecation

1988 # is enforced, stop catching ValueError here altogether

1989 if isinstance(err, IncompatibleFrequency):

1990 pass

1991 elif "'value.closed' is" in str(err):

1992 # IntervalDtype mismatched 'closed'

1993 pass

1994 elif "Timezones don't match" not in str(err):

1995 raise

1996

1997

1998class DatetimeLikeBlock(NDArrayBackedExtensionBlock):

1999 """Block for datetime64[ns], timedelta64[ns]."""

2000

2001 __slots__ = ()

2002 is_numeric = False

2003 values: DatetimeArray | TimedeltaArray

2004

2005 def values_for_json(self) -> np.ndarray:

2006 return self.values._ndarray

2007

2008

2009class DatetimeTZBlock(DatetimeLikeBlock):

2010 """implement a datetime64 block with a tz attribute"""

2011

2012 values: DatetimeArray

2013

2014 __slots__ = ()

2015 is_extension = True

2016 _validate_ndim = True

2017 _can_consolidate = False

2018

2019 # Don't use values_for_json from DatetimeLikeBlock since it is

2020 # an invalid optimization here(drop the tz)

2021 values_for_json = NDArrayBackedExtensionBlock.values_for_json

2022

2023

2024class ObjectBlock(NumpyBlock):

2025 __slots__ = ()

2026 is_object = True

2027

2028 @maybe_split

2029 def reduce(self, func, ignore_failures: bool = False) -> list[Block]:

2030 """

2031 For object-dtype, we operate column-wise.

2032 """

2033 assert self.ndim == 2

2034

2035 try:

2036 res = func(self.values)

2037 except TypeError:

2038 if not ignore_failures:

2039 raise

2040 return []

2041

2042 assert isinstance(res, np.ndarray)

2043 assert res.ndim == 1

2044 res = res.reshape(1, -1)

2045 return [self.make_block_same_class(res)]

2046

2047 @maybe_split

2048 def convert(

2049 self,

2050 copy: bool = True,

2051 datetime: bool = True,

2052 numeric: bool = True,

2053 timedelta: bool = True,

2054 ) -> list[Block]:

2055 """

2056 attempt to cast any object types to better types return a copy of

2057 the block (if copy = True) by definition we ARE an ObjectBlock!!!!!

2058 """

2059 values = self.values

2060 if values.ndim == 2:

2061 # maybe_split ensures we only get here with values.shape[0] == 1,

2062 # avoid doing .ravel as that might make a copy

2063 values = values[0]

2064

2065 res_values = soft_convert_objects(

2066 values,

2067 datetime=datetime,

2068 numeric=numeric,

2069 timedelta=timedelta,

2070 copy=copy,

2071 )

2072 res_values = ensure_block_shape(res_values, self.ndim)

2073 return [self.make_block(res_values)]

2074

2075

2076class CategoricalBlock(ExtensionBlock):

2077 # this Block type is kept for backwards-compatibility

2078 __slots__ = ()

2079

2080 # GH#43232, GH#43334 self.values.dtype can be changed inplace until 2.0,

2081 # so this cannot be cached

2082 @property

2083 def dtype(self) -> DtypeObj:

2084 return self.values.dtype

2085

2086

2087# -----------------------------------------------------------------

2088# Constructor Helpers

2089

2090

2091def maybe_coerce_values(values: ArrayLike) -> ArrayLike:

2092 """

2093 Input validation for values passed to __init__. Ensure that

2094 any datetime64/timedelta64 dtypes are in nanoseconds. Ensure

2095 that we do not have string dtypes.

2096

2097 Parameters

2098 ----------

2099 values : np.ndarray or ExtensionArray

2100

2101 Returns

2102 -------

2103 values : np.ndarray or ExtensionArray

2104 """

2105 # Caller is responsible for ensuring PandasArray is already extracted.

2106

2107 if isinstance(values, np.ndarray):

2108 values = ensure_wrapped_if_datetimelike(values)

2109

2110 if issubclass(values.dtype.type, str):

2111 values = np.array(values, dtype=object)

2112

2113 if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None:

2114 # freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame

2115 values = values._with_freq(None)

2116

2117 return values

2118

2119

2120def get_block_type(dtype: DtypeObj):

2121 """

2122 Find the appropriate Block subclass to use for the given values and dtype.

2123

2124 Parameters

2125 ----------

2126 dtype : numpy or pandas dtype

2127

2128 Returns

2129 -------

2130 cls : class, subclass of Block

2131 """

2132 # We use vtype and kind checks because they are much more performant

2133 # than is_foo_dtype

2134 vtype = dtype.type

2135 kind = dtype.kind

2136

2137 cls: type[Block]

2138

2139 if isinstance(dtype, SparseDtype):

2140 # Need this first(ish) so that Sparse[datetime] is sparse

2141 cls = ExtensionBlock

2142 elif isinstance(dtype, CategoricalDtype):

2143 cls = CategoricalBlock

2144 elif vtype is Timestamp:

2145 cls = DatetimeTZBlock

2146 elif isinstance(dtype, PeriodDtype):

2147 cls = NDArrayBackedExtensionBlock

2148 elif isinstance(dtype, ExtensionDtype):

2149 # Note: need to be sure PandasArray is unwrapped before we get here

2150 cls = ExtensionBlock

2151

2152 elif kind in ["M", "m"]:

2153 cls = DatetimeLikeBlock

2154 elif kind in ["f", "c", "i", "u", "b"]:

2155 cls = NumericBlock

2156 else:

2157 cls = ObjectBlock

2158 return cls

2159

2160

2161def new_block_2d(values: ArrayLike, placement: BlockPlacement):

2162 # new_block specialized to case with

2163 # ndim=2

2164 # isinstance(placement, BlockPlacement)

2165 # check_ndim/ensure_block_shape already checked

2166 klass = get_block_type(values.dtype)

2167

2168 values = maybe_coerce_values(values)

2169 return klass(values, ndim=2, placement=placement)

2170

2171

2172def new_block(values, placement, *, ndim: int) -> Block:

2173 # caller is responsible for ensuring values is NOT a PandasArray

2174

2175 if not isinstance(placement, BlockPlacement):

2176 placement = BlockPlacement(placement)

2177

2178 check_ndim(values, placement, ndim)

2179

2180 klass = get_block_type(values.dtype)

2181

2182 values = maybe_coerce_values(values)

2183 return klass(values, ndim=ndim, placement=placement)

2184

2185

2186def check_ndim(values, placement: BlockPlacement, ndim: int) -> None:

2187 """

2188 ndim inference and validation.

2189

2190 Validates that values.ndim and ndim are consistent.

2191 Validates that len(values) and len(placement) are consistent.

2192

2193 Parameters

2194 ----------

2195 values : array-like

2196 placement : BlockPlacement

2197 ndim : int

2198

2199 Raises

2200 ------

2201 ValueError : the number of dimensions do not match

2202 """

2203

2204 if values.ndim > ndim:

2205 # Check for both np.ndarray and ExtensionArray

2206 raise ValueError(

2207 "Wrong number of dimensions. "

2208 f"values.ndim > ndim [{values.ndim} > {ndim}]"

2209 )

2210

2211 elif not is_1d_only_ea_dtype(values.dtype):

2212 # TODO(EA2D): special case not needed with 2D EAs

2213 if values.ndim != ndim:

2214 raise ValueError(

2215 "Wrong number of dimensions. "

2216 f"values.ndim != ndim [{values.ndim} != {ndim}]"

2217 )

2218 if len(placement) != len(values):

2219 raise ValueError(

2220 f"Wrong number of items passed {len(values)}, "

2221 f"placement implies {len(placement)}"

2222 )

2223 elif ndim == 2 and len(placement) != 1:

2224 # TODO(EA2D): special case unnecessary with 2D EAs

2225 raise ValueError("need to split")

2226

2227

2228def extract_pandas_array(

2229 values: np.ndarray | ExtensionArray, dtype: DtypeObj | None, ndim: int

2230) -> tuple[np.ndarray | ExtensionArray, DtypeObj | None]:

2231 """

2232 Ensure that we don't allow PandasArray / PandasDtype in internals.

2233 """

2234 # For now, blocks should be backed by ndarrays when possible.

2235 if isinstance(values, ABCPandasArray):

2236 values = values.to_numpy()

2237 if ndim and ndim > 1:

2238 # TODO(EA2D): special case not needed with 2D EAs

2239 values = np.atleast_2d(values)

2240

2241 if isinstance(dtype, PandasDtype):

2242 dtype = dtype.numpy_dtype

2243

2244 return values, dtype

2245

2246

2247# -----------------------------------------------------------------

2248

2249

2250def extend_blocks(result, blocks=None) -> list[Block]:

2251 """return a new extended blocks, given the result"""

2252 if blocks is None:

2253 blocks = []

2254 if isinstance(result, list):

2255 for r in result:

2256 if isinstance(r, list):

2257 blocks.extend(r)

2258 else:

2259 blocks.append(r)

2260 else:

2261 assert isinstance(result, Block), type(result)

2262 blocks.append(result)

2263 return blocks

2264

2265

2266def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:

2267 """

2268 Reshape if possible to have values.ndim == ndim.

2269 """

2270

2271 if values.ndim < ndim:

2272 if not is_1d_only_ea_dtype(values.dtype):

2273 # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023

2274 # block.shape is incorrect for "2D" ExtensionArrays

2275 # We can't, and don't need to, reshape.

2276 values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values)

2277 values = values.reshape(1, -1)

2278

2279 return values

2280

2281

2282def to_native_types(

2283 values: ArrayLike,

2284 *,

2285 na_rep="nan",

2286 quoting=None,

2287 float_format=None,

2288 decimal=".",

2289 **kwargs,

2290) -> np.ndarray:

2291 """convert to our native types format"""

2292 if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":

2293 # GH#40754 Convert categorical datetimes to datetime array

2294 values = algos.take_nd(

2295 values.categories._values,

2296 ensure_platform_int(values._codes),

2297 fill_value=na_rep,

2298 )

2299

2300 values = ensure_wrapped_if_datetimelike(values)

2301

2302 if isinstance(values, (DatetimeArray, TimedeltaArray)):

2303 if values.ndim == 1:

2304 result = values._format_native_types(na_rep=na_rep, **kwargs)

2305 result = result.astype(object, copy=False)

2306 return result

2307

2308 # GH#21734 Process every column separately, they might have different formats

2309 results_converted = []

2310 for i in range(len(values)):

2311 result = values[i, :]._format_native_types(na_rep=na_rep, **kwargs)

2312 results_converted.append(result.astype(object, copy=False))

2313 return np.vstack(results_converted)

2314

2315 elif values.dtype.kind == "f" and not is_sparse(values):

2316 # see GH#13418: no special formatting is desired at the

2317 # output (important for appropriate 'quoting' behaviour),

2318 # so do not pass it through the FloatArrayFormatter

2319 if float_format is None and decimal == ".":

2320 mask = isna(values)

2321

2322 if not quoting:

2323 values = values.astype(str)

2324 else:

2325 values = np.array(values, dtype="object")

2326

2327 values[mask] = na_rep

2328 values = values.astype(object, copy=False)

2329 return values

2330

2331 from pandas.io.formats.format import FloatArrayFormatter

2332

2333 formatter = FloatArrayFormatter(

2334 values,

2335 na_rep=na_rep,

2336 float_format=float_format,

2337 decimal=decimal,

2338 quoting=quoting,

2339 fixed_width=False,

2340 )

2341 res = formatter.get_result_as_array()

2342 res = res.astype(object, copy=False)

2343 return res

2344

2345 elif isinstance(values, ExtensionArray):

2346 mask = isna(values)

2347

2348 new_values = np.asarray(values.astype(object))

2349 new_values[mask] = na_rep

2350 return new_values

2351

2352 else:

2353

2354 mask = isna(values)

2355 itemsize = writers.word_len(na_rep)

2356

2357 if values.dtype != _dtype_obj and not quoting and itemsize:

2358 values = values.astype(str)

2359 if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:

2360 # enlarge for the na_rep

2361 values = values.astype(f"<U{itemsize}")

2362 else:

2363 values = np.array(values, dtype="object")

2364

2365 values[mask] = na_rep

2366 values = values.astype(object, copy=False)

2367 return values

2368

2369

2370def external_values(values: ArrayLike) -> ArrayLike:

2371 """

2372 The array that Series.values returns (public attribute).

2373

2374 This has some historical constraints, and is overridden in block

2375 subclasses to return the correct array (e.g. period returns

2376 object ndarray and datetimetz a datetime64[ns] ndarray instead of

2377 proper extension array).

2378 """

2379 if isinstance(values, (PeriodArray, IntervalArray)):

2380 return values.astype(object)

2381 elif isinstance(values, (DatetimeArray, TimedeltaArray)):

2382 # NB: for datetime64tz this is different from np.asarray(values), since

2383 # that returns an object-dtype ndarray of Timestamps.

2384 # Avoid FutureWarning in .astype in casting from dt64tz to dt64

2385 return values._data

2386 else:

2387 return values