Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/internals/blocks.py: 19%
946 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from functools import wraps
4import re
5from typing import (
6 TYPE_CHECKING,
7 Any,
8 Callable,
9 Iterable,
10 Sequence,
11 cast,
12 final,
13)
14import warnings
16import numpy as np
18from pandas._libs import (
19 Timestamp,
20 internals as libinternals,
21 lib,
22 writers,
23)
24from pandas._libs.internals import BlockPlacement
25from pandas._libs.tslibs import IncompatibleFrequency
26from pandas._typing import (
27 ArrayLike,
28 DtypeObj,
29 F,
30 IgnoreRaise,
31 Shape,
32 npt,
33)
34from pandas.errors import AbstractMethodError
35from pandas.util._decorators import cache_readonly
36from pandas.util._exceptions import find_stack_level
37from pandas.util._validators import validate_bool_kwarg
39from pandas.core.dtypes.astype import astype_array_safe
40from pandas.core.dtypes.cast import (
41 LossySetitemError,
42 can_hold_element,
43 find_result_type,
44 maybe_downcast_to_dtype,
45 np_can_hold_element,
46 soft_convert_objects,
47)
48from pandas.core.dtypes.common import (
49 ensure_platform_int,
50 is_1d_only_ea_dtype,
51 is_1d_only_ea_obj,
52 is_dtype_equal,
53 is_interval_dtype,
54 is_list_like,
55 is_sparse,
56 is_string_dtype,
57)
58from pandas.core.dtypes.dtypes import (
59 CategoricalDtype,
60 ExtensionDtype,
61 PandasDtype,
62 PeriodDtype,
63)
64from pandas.core.dtypes.generic import (
65 ABCDataFrame,
66 ABCIndex,
67 ABCPandasArray,
68 ABCSeries,
69)
70from pandas.core.dtypes.inference import is_inferred_bool_dtype
71from pandas.core.dtypes.missing import (
72 is_valid_na_for_dtype,
73 isna,
74 na_value_for_dtype,
75)
77import pandas.core.algorithms as algos
78from pandas.core.array_algos.putmask import (
79 extract_bool_array,
80 putmask_inplace,
81 putmask_without_repeat,
82 setitem_datetimelike_compat,
83 validate_putmask,
84)
85from pandas.core.array_algos.quantile import quantile_compat
86from pandas.core.array_algos.replace import (
87 compare_or_regex_search,
88 replace_regex,
89 should_use_regex,
90)
91from pandas.core.array_algos.transforms import shift
92from pandas.core.arrays import (
93 Categorical,
94 DatetimeArray,
95 ExtensionArray,
96 IntervalArray,
97 PandasArray,
98 PeriodArray,
99 TimedeltaArray,
100)
101from pandas.core.arrays.sparse import SparseDtype
102from pandas.core.base import PandasObject
103import pandas.core.common as com
104import pandas.core.computation.expressions as expressions
105from pandas.core.construction import (
106 ensure_wrapped_if_datetimelike,
107 extract_array,
108)
109from pandas.core.indexers import check_setitem_lengths
110import pandas.core.missing as missing
112if TYPE_CHECKING: 112 ↛ 113line 112 didn't jump to line 113, because the condition on line 112 was never true
113 from pandas import (
114 Float64Index,
115 Index,
116 )
117 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
119# comparison is faster than is_object_dtype
120_dtype_obj = np.dtype("object")
123def maybe_split(meth: F) -> F:
124 """
125 If we have a multi-column block, split and operate block-wise. Otherwise
126 use the original method.
127 """
129 @wraps(meth)
130 def newfunc(self, *args, **kwargs) -> list[Block]:
132 if self.ndim == 1 or self.shape[0] == 1:
133 return meth(self, *args, **kwargs)
134 else:
135 # Split and operate column-by-column
136 return self.split_and_operate(meth, *args, **kwargs)
138 return cast(F, newfunc)
141class Block(PandasObject):
142 """
143 Canonical n-dimensional unit of homogeneous dtype contained in a pandas
144 data structure
146 Index-ignorant; let the container take care of that
147 """
149 values: np.ndarray | ExtensionArray
150 ndim: int
151 __init__: Callable
153 __slots__ = ()
154 is_numeric = False
155 is_object = False
156 is_extension = False
157 _can_consolidate = True
158 _validate_ndim = True
160 @final
161 @cache_readonly
162 def _consolidate_key(self):
163 return self._can_consolidate, self.dtype.name
165 @final
166 @cache_readonly
167 def _can_hold_na(self) -> bool:
168 """
169 Can we store NA values in this Block?
170 """
171 dtype = self.dtype
172 if isinstance(dtype, np.dtype):
173 return dtype.kind not in ["b", "i", "u"]
174 return dtype._can_hold_na
176 @final
177 @cache_readonly
178 def is_categorical(self) -> bool:
179 warnings.warn(
180 "Block.is_categorical is deprecated and will be removed in a "
181 "future version. Use isinstance(block.values, Categorical) "
182 "instead. See https://github.com/pandas-dev/pandas/issues/40226",
183 DeprecationWarning,
184 stacklevel=find_stack_level(),
185 )
186 return isinstance(self.values, Categorical)
188 @final
189 @property
190 def is_bool(self) -> bool:
191 """
192 We can be bool if a) we are bool dtype or b) object dtype with bool objects.
193 """
194 return is_inferred_bool_dtype(self.values)
196 @final
197 def external_values(self):
198 return external_values(self.values)
200 @final
201 @cache_readonly
202 def fill_value(self):
203 # Used in reindex_indexer
204 return na_value_for_dtype(self.dtype, compat=False)
206 @final
207 def _standardize_fill_value(self, value):
208 # if we are passed a scalar None, convert it here
209 if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype):
210 value = self.fill_value
211 return value
213 @property
214 def mgr_locs(self) -> BlockPlacement:
215 return self._mgr_locs
217 @mgr_locs.setter
218 def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None:
219 self._mgr_locs = new_mgr_locs
221 @final
222 def make_block(self, values, placement=None) -> Block:
223 """
224 Create a new block, with type inference propagate any values that are
225 not specified
226 """
227 if placement is None:
228 placement = self._mgr_locs
229 if self.is_extension:
230 values = ensure_block_shape(values, ndim=self.ndim)
232 # TODO: perf by not going through new_block
233 # We assume maybe_coerce_values has already been called
234 return new_block(values, placement=placement, ndim=self.ndim)
236 @final
237 def make_block_same_class(
238 self, values, placement: BlockPlacement | None = None
239 ) -> Block:
240 """Wrap given values in a block of same type as self."""
241 if placement is None:
242 placement = self._mgr_locs
244 if values.dtype.kind in ["m", "M"]:
246 new_values = ensure_wrapped_if_datetimelike(values)
247 if new_values is not values:
248 # TODO(2.0): remove once fastparquet has stopped relying on it
249 warnings.warn(
250 "In a future version, Block.make_block_same_class will "
251 "assume that datetime64 and timedelta64 ndarrays have "
252 "already been cast to DatetimeArray and TimedeltaArray, "
253 "respectively.",
254 DeprecationWarning,
255 stacklevel=find_stack_level(),
256 )
257 values = new_values
259 # We assume maybe_coerce_values has already been called
260 return type(self)(values, placement=placement, ndim=self.ndim)
262 @final
263 def __repr__(self) -> str:
264 # don't want to print out all of the items here
265 name = type(self).__name__
266 if self.ndim == 1:
267 result = f"{name}: {len(self)} dtype: {self.dtype}"
268 else:
270 shape = " x ".join([str(s) for s in self.shape])
271 result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}"
273 return result
275 @final
276 def __len__(self) -> int:
277 return len(self.values)
279 @final
280 def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:
281 """
282 Perform __getitem__-like, return result as block.
284 Only supports slices that preserve dimensionality.
285 """
286 # Note: the only place where we are called with ndarray[intp]
287 # is from internals.concat, and we can verify that never happens
288 # with 1-column blocks, i.e. never for ExtensionBlock.
290 # Invalid index type "Union[slice, ndarray[Any, dtype[signedinteger[Any]]]]"
291 # for "BlockPlacement"; expected type "Union[slice, Sequence[int]]"
292 new_mgr_locs = self._mgr_locs[slicer] # type: ignore[index]
294 new_values = self._slice(slicer)
296 if new_values.ndim != self.values.ndim:
297 raise ValueError("Only same dim slicing is allowed")
299 return type(self)(new_values, new_mgr_locs, self.ndim)
301 @final
302 def getitem_block_columns(
303 self, slicer: slice, new_mgr_locs: BlockPlacement
304 ) -> Block:
305 """
306 Perform __getitem__-like, return result as block.
308 Only supports slices that preserve dimensionality.
309 """
310 new_values = self._slice(slicer)
312 if new_values.ndim != self.values.ndim:
313 raise ValueError("Only same dim slicing is allowed")
315 return type(self)(new_values, new_mgr_locs, self.ndim)
317 @final
318 def _can_hold_element(self, element: Any) -> bool:
319 """require the same dtype as ourselves"""
320 element = extract_array(element, extract_numpy=True)
321 return can_hold_element(self.values, element)
323 @final
324 def should_store(self, value: ArrayLike) -> bool:
325 """
326 Should we set self.values[indexer] = value inplace or do we need to cast?
328 Parameters
329 ----------
330 value : np.ndarray or ExtensionArray
332 Returns
333 -------
334 bool
335 """
336 # faster equivalent to is_dtype_equal(value.dtype, self.dtype)
337 try:
338 return value.dtype == self.dtype
339 except TypeError:
340 return False
342 # ---------------------------------------------------------------------
343 # Apply/Reduce and Helpers
345 @final
346 def apply(self, func, **kwargs) -> list[Block]:
347 """
348 apply the function to my values; return a block if we are not
349 one
350 """
351 result = func(self.values, **kwargs)
353 return self._split_op_result(result)
355 def reduce(self, func, ignore_failures: bool = False) -> list[Block]:
356 # We will apply the function and reshape the result into a single-row
357 # Block with the same mgr_locs; squeezing will be done at a higher level
358 assert self.ndim == 2
360 try:
361 result = func(self.values)
362 except (TypeError, NotImplementedError):
363 if ignore_failures:
364 return []
365 raise
367 if self.values.ndim == 1:
368 # TODO(EA2D): special case not needed with 2D EAs
369 res_values = np.array([[result]])
370 else:
371 res_values = result.reshape(-1, 1)
373 nb = self.make_block(res_values)
374 return [nb]
376 @final
377 def _split_op_result(self, result: ArrayLike) -> list[Block]:
378 # See also: split_and_operate
379 if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype):
380 # TODO(EA2D): unnecessary with 2D EAs
381 # if we get a 2D ExtensionArray, we need to split it into 1D pieces
382 nbs = []
383 for i, loc in enumerate(self._mgr_locs):
384 if not is_1d_only_ea_obj(result):
385 vals = result[i : i + 1]
386 else:
387 vals = result[i]
389 block = self.make_block(values=vals, placement=loc)
390 nbs.append(block)
391 return nbs
393 nb = self.make_block(result)
395 return [nb]
397 @final
398 def _split(self) -> list[Block]:
399 """
400 Split a block into a list of single-column blocks.
401 """
402 assert self.ndim == 2
404 new_blocks = []
405 for i, ref_loc in enumerate(self._mgr_locs):
406 vals = self.values[slice(i, i + 1)]
408 bp = BlockPlacement(ref_loc)
409 nb = type(self)(vals, placement=bp, ndim=2)
410 new_blocks.append(nb)
411 return new_blocks
413 @final
414 def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
415 """
416 Split the block and apply func column-by-column.
418 Parameters
419 ----------
420 func : Block method
421 *args
422 **kwargs
424 Returns
425 -------
426 List[Block]
427 """
428 assert self.ndim == 2 and self.shape[0] != 1
430 res_blocks = []
431 for nb in self._split():
432 rbs = func(nb, *args, **kwargs)
433 res_blocks.extend(rbs)
434 return res_blocks
436 # ---------------------------------------------------------------------
437 # Up/Down-casting
439 @final
440 def coerce_to_target_dtype(self, other) -> Block:
441 """
442 coerce the current block to a dtype compat for other
443 we will return a block, possibly object, and not raise
445 we can also safely try to coerce to the same dtype
446 and will receive the same block
447 """
448 new_dtype = find_result_type(self.values, other)
450 return self.astype(new_dtype, copy=False)
452 @final
453 def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]:
454 if downcast is False:
455 return blocks
457 if self.dtype == _dtype_obj:
458 # GH#44241 We downcast regardless of the argument;
459 # respecting 'downcast=None' may be worthwhile at some point,
460 # but ATM it breaks too much existing code.
461 # split and convert the blocks
463 return extend_blocks(
464 [blk.convert(datetime=True, numeric=False) for blk in blocks]
465 )
467 if downcast is None:
468 return blocks
470 return extend_blocks([b._downcast_2d(downcast) for b in blocks])
472 @final
473 @maybe_split
474 def _downcast_2d(self, dtype) -> list[Block]:
475 """
476 downcast specialized to 2D case post-validation.
478 Refactored to allow use of maybe_split.
479 """
480 new_values = maybe_downcast_to_dtype(self.values, dtype=dtype)
481 return [self.make_block(new_values)]
483 def convert(
484 self,
485 copy: bool = True,
486 datetime: bool = True,
487 numeric: bool = True,
488 timedelta: bool = True,
489 ) -> list[Block]:
490 """
491 attempt to coerce any object types to better types return a copy
492 of the block (if copy = True) by definition we are not an ObjectBlock
493 here!
494 """
495 return [self.copy()] if copy else [self]
497 # ---------------------------------------------------------------------
498 # Array-Like Methods
500 @cache_readonly
501 def dtype(self) -> DtypeObj:
502 return self.values.dtype
504 @final
505 def astype(
506 self, dtype: DtypeObj, copy: bool = False, errors: IgnoreRaise = "raise"
507 ) -> Block:
508 """
509 Coerce to the new dtype.
511 Parameters
512 ----------
513 dtype : np.dtype or ExtensionDtype
514 copy : bool, default False
515 copy if indicated
516 errors : str, {'raise', 'ignore'}, default 'raise'
517 - ``raise`` : allow exceptions to be raised
518 - ``ignore`` : suppress exceptions. On error return original object
520 Returns
521 -------
522 Block
523 """
524 values = self.values
526 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
528 new_values = maybe_coerce_values(new_values)
529 newb = self.make_block(new_values)
530 if newb.shape != self.shape:
531 raise TypeError(
532 f"cannot set astype for copy = [{copy}] for dtype "
533 f"({self.dtype.name} [{self.shape}]) to different shape "
534 f"({newb.dtype.name} [{newb.shape}])"
535 )
536 return newb
538 @final
539 def to_native_types(self, na_rep="nan", quoting=None, **kwargs) -> Block:
540 """convert to our native types format"""
541 result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs)
542 return self.make_block(result)
544 @final
545 def copy(self, deep: bool = True) -> Block:
546 """copy constructor"""
547 values = self.values
548 if deep:
549 values = values.copy()
550 return type(self)(values, placement=self._mgr_locs, ndim=self.ndim)
552 # ---------------------------------------------------------------------
553 # Replace
555 @final
556 def replace(
557 self,
558 to_replace,
559 value,
560 inplace: bool = False,
561 # mask may be pre-computed if we're called from replace_list
562 mask: npt.NDArray[np.bool_] | None = None,
563 ) -> list[Block]:
564 """
565 replace the to_replace value with value, possible to create new
566 blocks here this is just a call to putmask.
567 """
569 # Note: the checks we do in NDFrame.replace ensure we never get
570 # here with listlike to_replace or value, as those cases
571 # go through replace_list
572 values = self.values
574 if isinstance(values, Categorical):
575 # TODO: avoid special-casing
576 blk = self if inplace else self.copy()
577 # error: Item "ExtensionArray" of "Union[ndarray[Any, Any],
578 # ExtensionArray]" has no attribute "_replace"
579 blk.values._replace( # type: ignore[union-attr]
580 to_replace=to_replace, value=value, inplace=True
581 )
582 return [blk]
584 if not self._can_hold_element(to_replace):
585 # We cannot hold `to_replace`, so we know immediately that
586 # replacing it is a no-op.
587 # Note: If to_replace were a list, NDFrame.replace would call
588 # replace_list instead of replace.
589 return [self] if inplace else [self.copy()]
591 if mask is None:
592 mask = missing.mask_missing(values, to_replace)
593 if not mask.any():
594 # Note: we get here with test_replace_extension_other incorrectly
595 # bc _can_hold_element is incorrect.
596 return [self] if inplace else [self.copy()]
598 elif self._can_hold_element(value):
599 blk = self if inplace else self.copy()
600 putmask_inplace(blk.values, mask, value)
601 if not (self.is_object and value is None):
602 # if the user *explicitly* gave None, we keep None, otherwise
603 # may downcast to NaN
604 blocks = blk.convert(numeric=False, copy=False)
605 else:
606 blocks = [blk]
607 return blocks
609 elif self.ndim == 1 or self.shape[0] == 1:
610 if value is None:
611 blk = self.astype(np.dtype(object))
612 else:
613 blk = self.coerce_to_target_dtype(value)
614 return blk.replace(
615 to_replace=to_replace,
616 value=value,
617 inplace=True,
618 mask=mask,
619 )
621 else:
622 # split so that we only upcast where necessary
623 blocks = []
624 for i, nb in enumerate(self._split()):
625 blocks.extend(
626 type(self).replace(
627 nb,
628 to_replace=to_replace,
629 value=value,
630 inplace=True,
631 mask=mask[i : i + 1],
632 )
633 )
634 return blocks
636 @final
637 def _replace_regex(
638 self,
639 to_replace,
640 value,
641 inplace: bool = False,
642 convert: bool = True,
643 mask=None,
644 ) -> list[Block]:
645 """
646 Replace elements by the given value.
648 Parameters
649 ----------
650 to_replace : object or pattern
651 Scalar to replace or regular expression to match.
652 value : object
653 Replacement object.
654 inplace : bool, default False
655 Perform inplace modification.
656 convert : bool, default True
657 If true, try to coerce any object types to better types.
658 mask : array-like of bool, optional
659 True indicate corresponding element is ignored.
661 Returns
662 -------
663 List[Block]
664 """
665 if not self._can_hold_element(to_replace):
666 # i.e. only ObjectBlock, but could in principle include a
667 # String ExtensionBlock
668 return [self] if inplace else [self.copy()]
670 rx = re.compile(to_replace)
672 new_values = self.values if inplace else self.values.copy()
673 replace_regex(new_values, rx, value, mask)
675 block = self.make_block(new_values)
676 return block.convert(numeric=False, copy=False)
678 @final
679 def replace_list(
680 self,
681 src_list: Iterable[Any],
682 dest_list: Sequence[Any],
683 inplace: bool = False,
684 regex: bool = False,
685 ) -> list[Block]:
686 """
687 See BlockManager.replace_list docstring.
688 """
689 values = self.values
691 # Exclude anything that we know we won't contain
692 pairs = [
693 (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
694 ]
695 if not len(pairs):
696 # shortcut, nothing to replace
697 return [self] if inplace else [self.copy()]
699 src_len = len(pairs) - 1
701 if is_string_dtype(values.dtype):
702 # Calculate the mask once, prior to the call of comp
703 # in order to avoid repeating the same computations
704 mask = ~isna(values)
705 masks = [
706 compare_or_regex_search(values, s[0], regex=regex, mask=mask)
707 for s in pairs
708 ]
709 else:
710 # GH#38086 faster if we know we dont need to check for regex
711 masks = [missing.mask_missing(values, s[0]) for s in pairs]
713 # error: Argument 1 to "extract_bool_array" has incompatible type
714 # "Union[ExtensionArray, ndarray, bool]"; expected "Union[ExtensionArray,
715 # ndarray]"
716 masks = [extract_bool_array(x) for x in masks] # type: ignore[arg-type]
718 rb = [self if inplace else self.copy()]
719 for i, (src, dest) in enumerate(pairs):
720 convert = i == src_len # only convert once at the end
721 new_rb: list[Block] = []
723 # GH-39338: _replace_coerce can split a block into
724 # single-column blocks, so track the index so we know
725 # where to index into the mask
726 for blk_num, blk in enumerate(rb):
727 if len(rb) == 1:
728 m = masks[i]
729 else:
730 mib = masks[i]
731 assert not isinstance(mib, bool)
732 m = mib[blk_num : blk_num + 1]
734 # error: Argument "mask" to "_replace_coerce" of "Block" has
735 # incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]";
736 # expected "ndarray[Any, dtype[bool_]]"
737 result = blk._replace_coerce(
738 to_replace=src,
739 value=dest,
740 mask=m, # type: ignore[arg-type]
741 inplace=inplace,
742 regex=regex,
743 )
744 if convert and blk.is_object and not all(x is None for x in dest_list):
745 # GH#44498 avoid unwanted cast-back
746 result = extend_blocks(
747 [b.convert(numeric=False, copy=True) for b in result]
748 )
749 new_rb.extend(result)
750 rb = new_rb
751 return rb
753 @final
754 def _replace_coerce(
755 self,
756 to_replace,
757 value,
758 mask: npt.NDArray[np.bool_],
759 inplace: bool = True,
760 regex: bool = False,
761 ) -> list[Block]:
762 """
763 Replace value corresponding to the given boolean array with another
764 value.
766 Parameters
767 ----------
768 to_replace : object or pattern
769 Scalar to replace or regular expression to match.
770 value : object
771 Replacement object.
772 mask : np.ndarray[bool]
773 True indicate corresponding element is ignored.
774 inplace : bool, default True
775 Perform inplace modification.
776 regex : bool, default False
777 If true, perform regular expression substitution.
779 Returns
780 -------
781 List[Block]
782 """
783 if should_use_regex(regex, to_replace):
784 return self._replace_regex(
785 to_replace,
786 value,
787 inplace=inplace,
788 convert=False,
789 mask=mask,
790 )
791 else:
792 if value is None:
793 # gh-45601, gh-45836, gh-46634
794 if mask.any():
795 nb = self.astype(np.dtype(object), copy=False)
796 if nb is self and not inplace:
797 nb = nb.copy()
798 putmask_inplace(nb.values, mask, value)
799 return [nb]
800 return [self] if inplace else [self.copy()]
801 return self.replace(
802 to_replace=to_replace, value=value, inplace=inplace, mask=mask
803 )
805 # ---------------------------------------------------------------------
806 # 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock
807 # but not ExtensionBlock
809 def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray:
810 """
811 For compatibility with 1D-only ExtensionArrays.
812 """
813 return arg
815 def _unwrap_setitem_indexer(self, indexer):
816 """
817 For compatibility with 1D-only ExtensionArrays.
818 """
819 return indexer
821 # NB: this cannot be made cache_readonly because in mgr.set_values we pin
822 # new .values that can have different shape GH#42631
823 @property
824 def shape(self) -> Shape:
825 return self.values.shape
827 def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray:
828 # In the case where we have a tuple[slice, int], the slice will always
829 # be slice(None)
830 # Note: only reached with self.ndim == 2
831 # Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]"
832 # for "Union[ndarray[Any, Any], ExtensionArray]"; expected type
833 # "Union[int, integer[Any]]"
834 return self.values[i] # type: ignore[index]
836 def _slice(
837 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]
838 ) -> ArrayLike:
839 """return a slice of my values"""
841 return self.values[slicer]
843 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
844 """
845 Modify block values in-place with new item value.
847 If copy=True, first copy the underlying values in place before modifying
848 (for Copy-on-Write).
850 Notes
851 -----
852 `set_inplace` never creates a new array or new Block, whereas `setitem`
853 _may_ create a new array and always creates a new Block.
855 Caller is responsible for checking values.dtype == self.dtype.
856 """
857 if copy:
858 self.values = self.values.copy()
859 self.values[locs] = values
861 def take_nd(
862 self,
863 indexer: npt.NDArray[np.intp],
864 axis: int,
865 new_mgr_locs: BlockPlacement | None = None,
866 fill_value=lib.no_default,
867 ) -> Block:
868 """
869 Take values according to indexer and return them as a block.
870 """
871 values = self.values
873 if fill_value is lib.no_default:
874 fill_value = self.fill_value
875 allow_fill = False
876 else:
877 allow_fill = True
879 # Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype
880 new_values = algos.take_nd(
881 values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value
882 )
884 # Called from three places in managers, all of which satisfy
885 # this assertion
886 assert not (axis == 0 and new_mgr_locs is None)
887 if new_mgr_locs is None:
888 new_mgr_locs = self._mgr_locs
890 if not is_dtype_equal(new_values.dtype, self.dtype):
891 return self.make_block(new_values, new_mgr_locs)
892 else:
893 return self.make_block_same_class(new_values, new_mgr_locs)
895 def _unstack(
896 self,
897 unstacker,
898 fill_value,
899 new_placement: npt.NDArray[np.intp],
900 needs_masking: npt.NDArray[np.bool_],
901 ):
902 """
903 Return a list of unstacked blocks of self
905 Parameters
906 ----------
907 unstacker : reshape._Unstacker
908 fill_value : int
909 Only used in ExtensionBlock._unstack
910 new_placement : np.ndarray[np.intp]
911 allow_fill : bool
912 needs_masking : np.ndarray[bool]
914 Returns
915 -------
916 blocks : list of Block
917 New blocks of unstacked values.
918 mask : array-like of bool
919 The mask of columns of `blocks` we should keep.
920 """
921 new_values, mask = unstacker.get_new_values(
922 self.values.T, fill_value=fill_value
923 )
925 mask = mask.any(0)
926 # TODO: in all tests we have mask.all(); can we rely on that?
928 # Note: these next two lines ensure that
929 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)
930 # which the calling function needs in order to pass verify_integrity=False
931 # to the BlockManager constructor
932 new_values = new_values.T[mask]
933 new_placement = new_placement[mask]
935 bp = BlockPlacement(new_placement)
936 blocks = [new_block_2d(new_values, placement=bp)]
937 return blocks, mask
939 # ---------------------------------------------------------------------
941 def setitem(self, indexer, value) -> Block:
942 """
943 Attempt self.values[indexer] = value, possibly creating a new array.
945 Parameters
946 ----------
947 indexer : tuple, list-like, array-like, slice, int
948 The subset of self.values to set
949 value : object
950 The value being set
952 Returns
953 -------
954 Block
956 Notes
957 -----
958 `indexer` is a direct slice/positional indexer. `value` must
959 be a compatible shape.
960 """
962 value = self._standardize_fill_value(value)
964 values = cast(np.ndarray, self.values)
965 if self.ndim == 2:
966 values = values.T
968 # length checking
969 check_setitem_lengths(indexer, value, values)
971 value = extract_array(value, extract_numpy=True)
972 try:
973 casted = np_can_hold_element(values.dtype, value)
974 except LossySetitemError:
975 # current dtype cannot store value, coerce to common dtype
976 nb = self.coerce_to_target_dtype(value)
977 return nb.setitem(indexer, value)
978 else:
979 if self.dtype == _dtype_obj:
980 # TODO: avoid having to construct values[indexer]
981 vi = values[indexer]
982 if lib.is_list_like(vi):
983 # checking lib.is_scalar here fails on
984 # test_iloc_setitem_custom_object
985 casted = setitem_datetimelike_compat(values, len(vi), casted)
986 values[indexer] = casted
987 return self
989 def putmask(self, mask, new) -> list[Block]:
990 """
991 putmask the data to the block; it is possible that we may create a
992 new dtype of block
994 Return the resulting block(s).
996 Parameters
997 ----------
998 mask : np.ndarray[bool], SparseArray[bool], or BooleanArray
999 new : a ndarray/object
1001 Returns
1002 -------
1003 List[Block]
1004 """
1005 orig_mask = mask
1006 values = cast(np.ndarray, self.values)
1007 mask, noop = validate_putmask(values.T, mask)
1008 assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame))
1010 if new is lib.no_default:
1011 new = self.fill_value
1013 new = self._standardize_fill_value(new)
1014 new = extract_array(new, extract_numpy=True)
1016 if noop:
1017 return [self]
1019 try:
1020 casted = np_can_hold_element(values.dtype, new)
1021 putmask_without_repeat(values.T, mask, casted)
1022 return [self]
1023 except LossySetitemError:
1025 if self.ndim == 1 or self.shape[0] == 1:
1026 # no need to split columns
1028 if not is_list_like(new):
1029 # using just new[indexer] can't save us the need to cast
1030 return self.coerce_to_target_dtype(new).putmask(mask, new)
1031 else:
1032 indexer = mask.nonzero()[0]
1033 nb = self.setitem(indexer, new[indexer])
1034 return [nb]
1036 else:
1037 is_array = isinstance(new, np.ndarray)
1039 res_blocks = []
1040 nbs = self._split()
1041 for i, nb in enumerate(nbs):
1042 n = new
1043 if is_array:
1044 # we have a different value per-column
1045 n = new[:, i : i + 1]
1047 submask = orig_mask[:, i : i + 1]
1048 rbs = nb.putmask(submask, n)
1049 res_blocks.extend(rbs)
1050 return res_blocks
1052 def where(self, other, cond, _downcast="infer") -> list[Block]:
1053 """
1054 evaluate the block; return result block(s) from the result
1056 Parameters
1057 ----------
1058 other : a ndarray/object
1059 cond : np.ndarray[bool], SparseArray[bool], or BooleanArray
1060 _downcast : str or None, default "infer"
1061 Private because we only specify it when calling from fillna.
1063 Returns
1064 -------
1065 List[Block]
1066 """
1067 assert cond.ndim == self.ndim
1068 assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame))
1070 transpose = self.ndim == 2
1072 cond = extract_bool_array(cond)
1074 # EABlocks override where
1075 values = cast(np.ndarray, self.values)
1076 orig_other = other
1077 if transpose:
1078 values = values.T
1080 icond, noop = validate_putmask(values, ~cond)
1081 if noop:
1082 # GH-39595: Always return a copy; short-circuit up/downcasting
1083 return [self.copy()]
1085 if other is lib.no_default:
1086 other = self.fill_value
1088 other = self._standardize_fill_value(other)
1090 try:
1091 # try/except here is equivalent to a self._can_hold_element check,
1092 # but this gets us back 'casted' which we will re-use below;
1093 # without using 'casted', expressions.where may do unwanted upcasts.
1094 casted = np_can_hold_element(values.dtype, other)
1095 except (ValueError, TypeError, LossySetitemError):
1096 # we cannot coerce, return a compat dtype
1098 if self.ndim == 1 or self.shape[0] == 1:
1099 # no need to split columns
1101 block = self.coerce_to_target_dtype(other)
1102 blocks = block.where(orig_other, cond)
1103 return self._maybe_downcast(blocks, downcast=_downcast)
1105 else:
1106 # since _maybe_downcast would split blocks anyway, we
1107 # can avoid some potential upcast/downcast by splitting
1108 # on the front end.
1109 is_array = isinstance(other, (np.ndarray, ExtensionArray))
1111 res_blocks = []
1112 nbs = self._split()
1113 for i, nb in enumerate(nbs):
1114 oth = other
1115 if is_array:
1116 # we have a different value per-column
1117 oth = other[:, i : i + 1]
1119 submask = cond[:, i : i + 1]
1120 rbs = nb.where(oth, submask, _downcast=_downcast)
1121 res_blocks.extend(rbs)
1122 return res_blocks
1124 else:
1125 other = casted
1126 alt = setitem_datetimelike_compat(values, icond.sum(), other)
1127 if alt is not other:
1128 if is_list_like(other) and len(other) < len(values):
1129 # call np.where with other to get the appropriate ValueError
1130 np.where(~icond, values, other)
1131 raise NotImplementedError(
1132 "This should not be reached; call to np.where above is "
1133 "expected to raise ValueError. Please report a bug at "
1134 "github.com/pandas-dev/pandas"
1135 )
1136 result = values.copy()
1137 np.putmask(result, icond, alt)
1138 else:
1139 # By the time we get here, we should have all Series/Index
1140 # args extracted to ndarray
1141 if (
1142 is_list_like(other)
1143 and not isinstance(other, np.ndarray)
1144 and len(other) == self.shape[-1]
1145 ):
1146 # If we don't do this broadcasting here, then expressions.where
1147 # will broadcast a 1D other to be row-like instead of
1148 # column-like.
1149 other = np.array(other).reshape(values.shape)
1150 # If lengths don't match (or len(other)==1), we will raise
1151 # inside expressions.where, see test_series_where
1153 # Note: expressions.where may upcast.
1154 result = expressions.where(~icond, values, other)
1155 # The np_can_hold_element check _should_ ensure that we always
1156 # have result.dtype == self.dtype here.
1158 if transpose:
1159 result = result.T
1161 return [self.make_block(result)]
1163 def fillna(
1164 self, value, limit: int | None = None, inplace: bool = False, downcast=None
1165 ) -> list[Block]:
1166 """
1167 fillna on the block with the value. If we fail, then convert to
1168 ObjectBlock and try again
1169 """
1170 # Caller is responsible for validating limit; if int it is strictly positive
1171 inplace = validate_bool_kwarg(inplace, "inplace")
1173 if not self._can_hold_na:
1174 # can short-circuit the isna call
1175 noop = True
1176 else:
1177 mask = isna(self.values)
1178 mask, noop = validate_putmask(self.values, mask)
1180 if noop:
1181 # we can't process the value, but nothing to do
1182 if inplace:
1183 # Arbitrarily imposing the convention that we ignore downcast
1184 # on no-op when inplace=True
1185 return [self]
1186 else:
1187 # GH#45423 consistent downcasting on no-ops.
1188 nb = self.copy()
1189 nbs = nb._maybe_downcast([nb], downcast=downcast)
1190 return nbs
1192 if limit is not None:
1193 mask[mask.cumsum(self.ndim - 1) > limit] = False
1195 if inplace:
1196 nbs = self.putmask(mask.T, value)
1197 else:
1198 # without _downcast, we would break
1199 # test_fillna_dtype_conversion_equiv_replace
1200 nbs = self.where(value, ~mask.T, _downcast=False)
1202 # Note: blk._maybe_downcast vs self._maybe_downcast(nbs)
1203 # makes a difference bc blk may have object dtype, which has
1204 # different behavior in _maybe_downcast.
1205 return extend_blocks(
1206 [blk._maybe_downcast([blk], downcast=downcast) for blk in nbs]
1207 )
1209 def interpolate(
1210 self,
1211 method: str = "pad",
1212 axis: int = 0,
1213 index: Index | None = None,
1214 inplace: bool = False,
1215 limit: int | None = None,
1216 limit_direction: str = "forward",
1217 limit_area: str | None = None,
1218 fill_value: Any | None = None,
1219 downcast: str | None = None,
1220 **kwargs,
1221 ) -> list[Block]:
1223 inplace = validate_bool_kwarg(inplace, "inplace")
1225 if not self._can_hold_na:
1226 # If there are no NAs, then interpolate is a no-op
1227 return [self] if inplace else [self.copy()]
1229 try:
1230 m = missing.clean_fill_method(method)
1231 except ValueError:
1232 m = None
1233 if m is None and self.dtype.kind != "f":
1234 # only deal with floats
1235 # bc we already checked that can_hold_na, we dont have int dtype here
1236 # test_interp_basic checks that we make a copy here
1237 return [self] if inplace else [self.copy()]
1239 if self.is_object and self.ndim == 2 and self.shape[0] != 1 and axis == 0:
1240 # split improves performance in ndarray.copy()
1241 return self.split_and_operate(
1242 type(self).interpolate,
1243 method,
1244 axis,
1245 index,
1246 inplace,
1247 limit,
1248 limit_direction,
1249 limit_area,
1250 fill_value,
1251 downcast,
1252 **kwargs,
1253 )
1255 data = self.values if inplace else self.values.copy()
1256 data = cast(np.ndarray, data) # bc overridden by ExtensionBlock
1258 missing.interpolate_array_2d(
1259 data,
1260 method=method,
1261 axis=axis,
1262 index=index,
1263 limit=limit,
1264 limit_direction=limit_direction,
1265 limit_area=limit_area,
1266 fill_value=fill_value,
1267 **kwargs,
1268 )
1270 nb = self.make_block_same_class(data)
1271 return nb._maybe_downcast([nb], downcast)
1273 def diff(self, n: int, axis: int = 1) -> list[Block]:
1274 """return block for the diff of the values"""
1275 new_values = algos.diff(self.values, n, axis=axis)
1276 return [self.make_block(values=new_values)]
1278 def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]:
1279 """shift the block by periods, possibly upcast"""
1280 # convert integer to float if necessary. need to do a lot more than
1281 # that, handle boolean etc also
1283 # Note: periods is never 0 here, as that is handled at the top of
1284 # NDFrame.shift. If that ever changes, we can do a check for periods=0
1285 # and possibly avoid coercing.
1287 if not lib.is_scalar(fill_value) and self.dtype != _dtype_obj:
1288 # with object dtype there is nothing to promote, and the user can
1289 # pass pretty much any weird fill_value they like
1290 # see test_shift_object_non_scalar_fill
1291 raise ValueError("fill_value must be a scalar")
1293 fill_value = self._standardize_fill_value(fill_value)
1295 try:
1296 # error: Argument 1 to "np_can_hold_element" has incompatible type
1297 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
1298 casted = np_can_hold_element(
1299 self.dtype, fill_value # type: ignore[arg-type]
1300 )
1301 except LossySetitemError:
1302 nb = self.coerce_to_target_dtype(fill_value)
1303 return nb.shift(periods, axis=axis, fill_value=fill_value)
1305 else:
1306 values = cast(np.ndarray, self.values)
1307 new_values = shift(values, periods, axis, casted)
1308 return [self.make_block(new_values)]
1310 @final
1311 def quantile(
1312 self, qs: Float64Index, interpolation="linear", axis: int = 0
1313 ) -> Block:
1314 """
1315 compute the quantiles of the
1317 Parameters
1318 ----------
1319 qs : Float64Index
1320 List of the quantiles to be computed.
1321 interpolation : str, default 'linear'
1322 Type of interpolation.
1323 axis : int, default 0
1324 Axis to compute.
1326 Returns
1327 -------
1328 Block
1329 """
1330 # We should always have ndim == 2 because Series dispatches to DataFrame
1331 assert self.ndim == 2
1332 assert axis == 1 # only ever called this way
1333 assert is_list_like(qs) # caller is responsible for this
1335 result = quantile_compat(self.values, np.asarray(qs._values), interpolation)
1336 # ensure_block_shape needed for cases where we start with EA and result
1337 # is ndarray, e.g. IntegerArray, SparseArray
1338 result = ensure_block_shape(result, ndim=2)
1339 return new_block_2d(result, placement=self._mgr_locs)
1341 # ---------------------------------------------------------------------
1342 # Abstract Methods Overridden By EABackedBlock and NumpyBlock
1344 def delete(self, loc) -> Block:
1345 """
1346 Return a new Block with the given loc(s) deleted.
1347 """
1348 raise AbstractMethodError(self)
1350 @property
1351 def is_view(self) -> bool:
1352 """return a boolean if I am possibly a view"""
1353 raise AbstractMethodError(self)
1355 @property
1356 def array_values(self) -> ExtensionArray:
1357 """
1358 The array that Series.array returns. Always an ExtensionArray.
1359 """
1360 raise AbstractMethodError(self)
1362 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
1363 """
1364 return an internal format, currently just the ndarray
1365 this is often overridden to handle to_dense like operations
1366 """
1367 raise AbstractMethodError(self)
1369 def values_for_json(self) -> np.ndarray:
1370 raise AbstractMethodError(self)
1373class EABackedBlock(Block):
1374 """
1375 Mixin for Block subclasses backed by ExtensionArray.
1376 """
1378 values: ExtensionArray
1380 def setitem(self, indexer, value):
1381 """
1382 Attempt self.values[indexer] = value, possibly creating a new array.
1384 This differs from Block.setitem by not allowing setitem to change
1385 the dtype of the Block.
1387 Parameters
1388 ----------
1389 indexer : tuple, list-like, array-like, slice, int
1390 The subset of self.values to set
1391 value : object
1392 The value being set
1394 Returns
1395 -------
1396 Block
1398 Notes
1399 -----
1400 `indexer` is a direct slice/positional indexer. `value` must
1401 be a compatible shape.
1402 """
1403 orig_indexer = indexer
1404 orig_value = value
1406 indexer = self._unwrap_setitem_indexer(indexer)
1407 value = self._maybe_squeeze_arg(value)
1409 values = self.values
1410 if values.ndim == 2:
1411 # TODO(GH#45419): string[pyarrow] tests break if we transpose
1412 # unconditionally
1413 values = values.T
1414 check_setitem_lengths(indexer, value, values)
1416 try:
1417 values[indexer] = value
1418 except (ValueError, TypeError) as err:
1419 _catch_deprecated_value_error(err)
1421 if is_interval_dtype(self.dtype):
1422 # see TestSetitemFloatIntervalWithIntIntervalValues
1423 nb = self.coerce_to_target_dtype(orig_value)
1424 return nb.setitem(orig_indexer, orig_value)
1426 elif isinstance(self, NDArrayBackedExtensionBlock):
1427 nb = self.coerce_to_target_dtype(orig_value)
1428 return nb.setitem(orig_indexer, orig_value)
1430 else:
1431 raise
1433 else:
1434 return self
1436 def where(self, other, cond, _downcast="infer") -> list[Block]:
1437 # _downcast private bc we only specify it when calling from fillna
1438 arr = self.values.T
1440 cond = extract_bool_array(cond)
1442 orig_other = other
1443 orig_cond = cond
1444 other = self._maybe_squeeze_arg(other)
1445 cond = self._maybe_squeeze_arg(cond)
1447 if other is lib.no_default:
1448 other = self.fill_value
1450 icond, noop = validate_putmask(arr, ~cond)
1451 if noop:
1452 # GH#44181, GH#45135
1453 # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast
1454 return [self.copy()]
1456 try:
1457 res_values = arr._where(cond, other).T
1458 except (ValueError, TypeError) as err:
1459 _catch_deprecated_value_error(err)
1461 if self.ndim == 1 or self.shape[0] == 1:
1463 if is_interval_dtype(self.dtype):
1464 # TestSetitemFloatIntervalWithIntIntervalValues
1465 blk = self.coerce_to_target_dtype(orig_other)
1466 nbs = blk.where(orig_other, orig_cond)
1467 return self._maybe_downcast(nbs, downcast=_downcast)
1469 elif isinstance(self, NDArrayBackedExtensionBlock):
1470 # NB: not (yet) the same as
1471 # isinstance(values, NDArrayBackedExtensionArray)
1472 blk = self.coerce_to_target_dtype(orig_other)
1473 nbs = blk.where(orig_other, orig_cond)
1474 return self._maybe_downcast(nbs, downcast=_downcast)
1476 else:
1477 raise
1479 else:
1480 # Same pattern we use in Block.putmask
1481 is_array = isinstance(orig_other, (np.ndarray, ExtensionArray))
1483 res_blocks = []
1484 nbs = self._split()
1485 for i, nb in enumerate(nbs):
1486 n = orig_other
1487 if is_array:
1488 # we have a different value per-column
1489 n = orig_other[:, i : i + 1]
1491 submask = orig_cond[:, i : i + 1]
1492 rbs = nb.where(n, submask)
1493 res_blocks.extend(rbs)
1494 return res_blocks
1496 nb = self.make_block_same_class(res_values)
1497 return [nb]
1499 def putmask(self, mask, new) -> list[Block]:
1500 """
1501 See Block.putmask.__doc__
1502 """
1503 mask = extract_bool_array(mask)
1505 values = self.values
1506 if values.ndim == 2:
1507 values = values.T
1509 orig_new = new
1510 orig_mask = mask
1511 new = self._maybe_squeeze_arg(new)
1512 mask = self._maybe_squeeze_arg(mask)
1514 if not mask.any():
1515 return [self]
1517 try:
1518 # Caller is responsible for ensuring matching lengths
1519 values._putmask(mask, new)
1520 except (TypeError, ValueError) as err:
1521 _catch_deprecated_value_error(err)
1523 if self.ndim == 1 or self.shape[0] == 1:
1525 if is_interval_dtype(self.dtype):
1526 # Discussion about what we want to support in the general
1527 # case GH#39584
1528 blk = self.coerce_to_target_dtype(orig_new)
1529 return blk.putmask(orig_mask, orig_new)
1531 elif isinstance(self, NDArrayBackedExtensionBlock):
1532 # NB: not (yet) the same as
1533 # isinstance(values, NDArrayBackedExtensionArray)
1534 blk = self.coerce_to_target_dtype(orig_new)
1535 return blk.putmask(orig_mask, orig_new)
1537 else:
1538 raise
1540 else:
1541 # Same pattern we use in Block.putmask
1542 is_array = isinstance(orig_new, (np.ndarray, ExtensionArray))
1544 res_blocks = []
1545 nbs = self._split()
1546 for i, nb in enumerate(nbs):
1547 n = orig_new
1548 if is_array:
1549 # we have a different value per-column
1550 n = orig_new[:, i : i + 1]
1552 submask = orig_mask[:, i : i + 1]
1553 rbs = nb.putmask(submask, n)
1554 res_blocks.extend(rbs)
1555 return res_blocks
1557 return [self]
1559 def fillna(
1560 self, value, limit: int | None = None, inplace: bool = False, downcast=None
1561 ) -> list[Block]:
1562 # Caller is responsible for validating limit; if int it is strictly positive
1564 if self.dtype.kind == "m":
1565 try:
1566 res_values = self.values.fillna(value, limit=limit)
1567 except (ValueError, TypeError):
1568 # GH#45746
1569 warnings.warn(
1570 "The behavior of fillna with timedelta64[ns] dtype and "
1571 f"an incompatible value ({type(value)}) is deprecated. "
1572 "In a future version, this will cast to a common dtype "
1573 "(usually object) instead of raising, matching the "
1574 "behavior of other dtypes.",
1575 FutureWarning,
1576 stacklevel=find_stack_level(),
1577 )
1578 raise
1579 else:
1580 res_blk = self.make_block(res_values)
1581 return [res_blk]
1583 # TODO: since this now dispatches to super, which in turn dispatches
1584 # to putmask, it may *actually* respect 'inplace=True'. If so, add
1585 # tests for this.
1586 return super().fillna(value, limit=limit, inplace=inplace, downcast=downcast)
1588 def delete(self, loc) -> Block:
1589 # This will be unnecessary if/when __array_function__ is implemented
1590 values = self.values.delete(loc)
1591 mgr_locs = self._mgr_locs.delete(loc)
1592 return type(self)(values, placement=mgr_locs, ndim=self.ndim)
1594 @cache_readonly
1595 def array_values(self) -> ExtensionArray:
1596 return self.values
1598 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
1599 """
1600 return object dtype as boxed values, such as Timestamps/Timedelta
1601 """
1602 values: ArrayLike = self.values
1603 if dtype == _dtype_obj:
1604 values = values.astype(object)
1605 # TODO(EA2D): reshape not needed with 2D EAs
1606 return np.asarray(values).reshape(self.shape)
1608 def values_for_json(self) -> np.ndarray:
1609 return np.asarray(self.values)
1611 def interpolate(
1612 self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs
1613 ):
1614 values = self.values
1615 if values.ndim == 2 and axis == 0:
1616 # NDArrayBackedExtensionArray.fillna assumes axis=1
1617 new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T
1618 else:
1619 new_values = values.fillna(value=fill_value, method=method, limit=limit)
1620 return self.make_block_same_class(new_values)
1623class ExtensionBlock(libinternals.Block, EABackedBlock):
1624 """
1625 Block for holding extension types.
1627 Notes
1628 -----
1629 This holds all 3rd-party extension array types. It's also the immediate
1630 parent class for our internal extension types' blocks, CategoricalBlock.
1632 ExtensionArrays are limited to 1-D.
1633 """
1635 _can_consolidate = False
1636 _validate_ndim = False
1637 is_extension = True
1639 values: ExtensionArray
1641 @cache_readonly
1642 def shape(self) -> Shape:
1643 # TODO(EA2D): override unnecessary with 2D EAs
1644 if self.ndim == 1:
1645 return (len(self.values),)
1646 return len(self._mgr_locs), len(self.values)
1648 def iget(self, i: int | tuple[int, int] | tuple[slice, int]):
1649 # In the case where we have a tuple[slice, int], the slice will always
1650 # be slice(None)
1651 # We _could_ make the annotation more specific, but mypy would
1652 # complain about override mismatch:
1653 # Literal[0] | tuple[Literal[0], int] | tuple[slice, int]
1655 # Note: only reached with self.ndim == 2
1657 if isinstance(i, tuple):
1658 # TODO(EA2D): unnecessary with 2D EAs
1659 col, loc = i
1660 if not com.is_null_slice(col) and col != 0:
1661 raise IndexError(f"{self} only contains one item")
1662 elif isinstance(col, slice):
1663 # the is_null_slice check above assures that col is slice(None)
1664 # so what we want is a view on all our columns and row loc
1665 if loc < 0:
1666 loc += len(self.values)
1667 # Note: loc:loc+1 vs [[loc]] makes a difference when called
1668 # from fast_xs because we want to get a view back.
1669 return self.values[loc : loc + 1]
1670 return self.values[loc]
1671 else:
1672 if i != 0:
1673 raise IndexError(f"{self} only contains one item")
1674 return self.values
1676 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
1677 # When an ndarray, we should have locs.tolist() == [0]
1678 # When a BlockPlacement we should have list(locs) == [0]
1679 if copy:
1680 self.values = self.values.copy()
1681 self.values[:] = values
1683 def _maybe_squeeze_arg(self, arg):
1684 """
1685 If necessary, squeeze a (N, 1) ndarray to (N,)
1686 """
1687 # e.g. if we are passed a 2D mask for putmask
1688 if (
1689 isinstance(arg, (np.ndarray, ExtensionArray))
1690 and arg.ndim == self.values.ndim + 1
1691 ):
1692 # TODO(EA2D): unnecessary with 2D EAs
1693 assert arg.shape[1] == 1
1694 # error: No overload variant of "__getitem__" of "ExtensionArray"
1695 # matches argument type "Tuple[slice, int]"
1696 arg = arg[:, 0] # type: ignore[call-overload]
1697 elif isinstance(arg, ABCDataFrame):
1698 # 2022-01-06 only reached for setitem
1699 # TODO: should we avoid getting here with DataFrame?
1700 assert arg.shape[1] == 1
1701 arg = arg._ixs(0, axis=1)._values
1703 return arg
1705 def _unwrap_setitem_indexer(self, indexer):
1706 """
1707 Adapt a 2D-indexer to our 1D values.
1709 This is intended for 'setitem', not 'iget' or '_slice'.
1710 """
1711 # TODO: ATM this doesn't work for iget/_slice, can we change that?
1713 if isinstance(indexer, tuple):
1714 # TODO(EA2D): not needed with 2D EAs
1715 # Should never have length > 2. Caller is responsible for checking.
1716 # Length 1 is reached vis setitem_single_block and setitem_single_column
1717 # each of which pass indexer=(pi,)
1718 if len(indexer) == 2:
1720 if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer):
1721 # GH#44703 went through indexing.maybe_convert_ix
1722 first, second = indexer
1723 if not (
1724 second.size == 1 and (second == 0).all() and first.shape[1] == 1
1725 ):
1726 raise NotImplementedError(
1727 "This should not be reached. Please report a bug at "
1728 "github.com/pandas-dev/pandas/"
1729 )
1730 indexer = first[:, 0]
1732 elif lib.is_integer(indexer[1]) and indexer[1] == 0:
1733 # reached via setitem_single_block passing the whole indexer
1734 indexer = indexer[0]
1736 elif com.is_null_slice(indexer[1]):
1737 indexer = indexer[0]
1739 elif is_list_like(indexer[1]) and indexer[1][0] == 0:
1740 indexer = indexer[0]
1742 else:
1743 raise NotImplementedError(
1744 "This should not be reached. Please report a bug at "
1745 "github.com/pandas-dev/pandas/"
1746 )
1747 return indexer
1749 @property
1750 def is_view(self) -> bool:
1751 """Extension arrays are never treated as views."""
1752 return False
1754 @cache_readonly
1755 def is_numeric(self):
1756 return self.values.dtype._is_numeric
1758 def take_nd(
1759 self,
1760 indexer: npt.NDArray[np.intp],
1761 axis: int = 0,
1762 new_mgr_locs: BlockPlacement | None = None,
1763 fill_value=lib.no_default,
1764 ) -> Block:
1765 """
1766 Take values according to indexer and return them as a block.
1767 """
1768 if fill_value is lib.no_default:
1769 fill_value = None
1771 # TODO(EA2D): special case not needed with 2D EAs
1772 # axis doesn't matter; we are really a single-dim object
1773 # but are passed the axis depending on the calling routing
1774 # if its REALLY axis 0, then this will be a reindex and not a take
1775 new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True)
1777 # Called from three places in managers, all of which satisfy
1778 # this assertion
1779 assert not (self.ndim == 1 and new_mgr_locs is None)
1780 if new_mgr_locs is None:
1781 new_mgr_locs = self._mgr_locs
1783 return self.make_block_same_class(new_values, new_mgr_locs)
1785 def _slice(
1786 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]
1787 ) -> ExtensionArray:
1788 """
1789 Return a slice of my values.
1791 Parameters
1792 ----------
1793 slicer : slice, ndarray[int], or ndarray[bool]
1794 Valid (non-reducing) indexer for self.values.
1796 Returns
1797 -------
1798 ExtensionArray
1799 """
1800 # Notes: ndarray[bool] is only reachable when via getitem_mgr, which
1801 # is only for Series, i.e. self.ndim == 1.
1803 # return same dims as we currently have
1804 if self.ndim == 2:
1805 # reached via getitem_block via _slice_take_blocks_ax0
1806 # TODO(EA2D): won't be necessary with 2D EAs
1808 if not isinstance(slicer, slice):
1809 raise AssertionError(
1810 "invalid slicing for a 1-ndim ExtensionArray", slicer
1811 )
1812 # GH#32959 only full-slicers along fake-dim0 are valid
1813 # TODO(EA2D): won't be necessary with 2D EAs
1814 # range(1) instead of self._mgr_locs to avoid exception on [::-1]
1815 # see test_iloc_getitem_slice_negative_step_ea_block
1816 new_locs = range(1)[slicer]
1817 if not len(new_locs):
1818 raise AssertionError(
1819 "invalid slicing for a 1-ndim ExtensionArray", slicer
1820 )
1821 slicer = slice(None)
1823 return self.values[slicer]
1825 @final
1826 def getitem_block_index(self, slicer: slice) -> ExtensionBlock:
1827 """
1828 Perform __getitem__-like specialized to slicing along index.
1829 """
1830 # GH#42787 in principle this is equivalent to values[..., slicer], but we don't
1831 # require subclasses of ExtensionArray to support that form (for now).
1832 new_values = self.values[slicer]
1833 return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
1835 def diff(self, n: int, axis: int = 1) -> list[Block]:
1836 if axis == 0 and n != 0:
1837 # n==0 case will be a no-op so let is fall through
1838 # Since we only have one column, the result will be all-NA.
1839 # Create this result by shifting along axis=0 past the length of
1840 # our values.
1841 return super().diff(len(self.values), axis=0)
1842 if axis == 1:
1843 # TODO(EA2D): unnecessary with 2D EAs
1844 # we are by definition 1D.
1845 axis = 0
1846 return super().diff(n, axis)
1848 def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]:
1849 """
1850 Shift the block by `periods`.
1852 Dispatches to underlying ExtensionArray and re-boxes in an
1853 ExtensionBlock.
1854 """
1855 new_values = self.values.shift(periods=periods, fill_value=fill_value)
1856 return [self.make_block_same_class(new_values)]
1858 def _unstack(
1859 self,
1860 unstacker,
1861 fill_value,
1862 new_placement: npt.NDArray[np.intp],
1863 needs_masking: npt.NDArray[np.bool_],
1864 ):
1865 # ExtensionArray-safe unstack.
1866 # We override ObjectBlock._unstack, which unstacks directly on the
1867 # values of the array. For EA-backed blocks, this would require
1868 # converting to a 2-D ndarray of objects.
1869 # Instead, we unstack an ndarray of integer positions, followed by
1870 # a `take` on the actual values.
1872 # Caller is responsible for ensuring self.shape[-1] == len(unstacker.index)
1873 new_values, mask = unstacker.arange_result
1875 # Note: these next two lines ensure that
1876 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)
1877 # which the calling function needs in order to pass verify_integrity=False
1878 # to the BlockManager constructor
1879 new_values = new_values.T[mask]
1880 new_placement = new_placement[mask]
1882 # needs_masking[i] calculated once in BlockManager.unstack tells
1883 # us if there are any -1s in the relevant indices. When False,
1884 # that allows us to go through a faster path in 'take', among
1885 # other things avoiding e.g. Categorical._validate_scalar.
1886 blocks = [
1887 # TODO: could cast to object depending on fill_value?
1888 type(self)(
1889 self.values.take(
1890 indices, allow_fill=needs_masking[i], fill_value=fill_value
1891 ),
1892 BlockPlacement(place),
1893 ndim=2,
1894 )
1895 for i, (indices, place) in enumerate(zip(new_values, new_placement))
1896 ]
1897 return blocks, mask
1900class NumpyBlock(libinternals.NumpyBlock, Block):
1901 values: np.ndarray
1903 @property
1904 def is_view(self) -> bool:
1905 """return a boolean if I am possibly a view"""
1906 return self.values.base is not None
1908 @property
1909 def array_values(self) -> ExtensionArray:
1910 return PandasArray(self.values)
1912 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
1913 if dtype == _dtype_obj:
1914 return self.values.astype(_dtype_obj)
1915 return self.values
1917 def values_for_json(self) -> np.ndarray:
1918 return self.values
1920 def delete(self, loc) -> Block:
1921 values = np.delete(self.values, loc, 0)
1922 mgr_locs = self._mgr_locs.delete(loc)
1923 return type(self)(values, placement=mgr_locs, ndim=self.ndim)
1926class NumericBlock(NumpyBlock):
1927 __slots__ = ()
1928 is_numeric = True
1931class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock):
1932 """
1933 Block backed by an NDArrayBackedExtensionArray
1934 """
1936 values: NDArrayBackedExtensionArray
1938 # error: Signature of "is_extension" incompatible with supertype "Block"
1939 @cache_readonly
1940 def is_extension(self) -> bool: # type: ignore[override]
1941 # i.e. datetime64tz, PeriodDtype
1942 return not isinstance(self.dtype, np.dtype)
1944 @property
1945 def is_view(self) -> bool:
1946 """return a boolean if I am possibly a view"""
1947 # check the ndarray values of the DatetimeIndex values
1948 return self.values._ndarray.base is not None
1950 def diff(self, n: int, axis: int = 0) -> list[Block]:
1951 """
1952 1st discrete difference.
1954 Parameters
1955 ----------
1956 n : int
1957 Number of periods to diff.
1958 axis : int, default 0
1959 Axis to diff upon.
1961 Returns
1962 -------
1963 A list with a new Block.
1965 Notes
1966 -----
1967 The arguments here are mimicking shift so they are called correctly
1968 by apply.
1969 """
1970 values = self.values
1972 new_values = values - values.shift(n, axis=axis)
1973 return [self.make_block(new_values)]
1975 def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]:
1976 values = self.values
1977 new_values = values.shift(periods, fill_value=fill_value, axis=axis)
1978 return [self.make_block_same_class(new_values)]
1981def _catch_deprecated_value_error(err: Exception) -> None:
1982 """
1983 We catch ValueError for now, but only a specific one raised by DatetimeArray
1984 which will no longer be raised in version.2.0.
1985 """
1986 if isinstance(err, ValueError):
1987 # TODO(2.0): once DTA._validate_setitem_value deprecation
1988 # is enforced, stop catching ValueError here altogether
1989 if isinstance(err, IncompatibleFrequency):
1990 pass
1991 elif "'value.closed' is" in str(err):
1992 # IntervalDtype mismatched 'closed'
1993 pass
1994 elif "Timezones don't match" not in str(err):
1995 raise
1998class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
1999 """Block for datetime64[ns], timedelta64[ns]."""
2001 __slots__ = ()
2002 is_numeric = False
2003 values: DatetimeArray | TimedeltaArray
2005 def values_for_json(self) -> np.ndarray:
2006 return self.values._ndarray
2009class DatetimeTZBlock(DatetimeLikeBlock):
2010 """implement a datetime64 block with a tz attribute"""
2012 values: DatetimeArray
2014 __slots__ = ()
2015 is_extension = True
2016 _validate_ndim = True
2017 _can_consolidate = False
2019 # Don't use values_for_json from DatetimeLikeBlock since it is
2020 # an invalid optimization here(drop the tz)
2021 values_for_json = NDArrayBackedExtensionBlock.values_for_json
2024class ObjectBlock(NumpyBlock):
2025 __slots__ = ()
2026 is_object = True
2028 @maybe_split
2029 def reduce(self, func, ignore_failures: bool = False) -> list[Block]:
2030 """
2031 For object-dtype, we operate column-wise.
2032 """
2033 assert self.ndim == 2
2035 try:
2036 res = func(self.values)
2037 except TypeError:
2038 if not ignore_failures:
2039 raise
2040 return []
2042 assert isinstance(res, np.ndarray)
2043 assert res.ndim == 1
2044 res = res.reshape(1, -1)
2045 return [self.make_block_same_class(res)]
2047 @maybe_split
2048 def convert(
2049 self,
2050 copy: bool = True,
2051 datetime: bool = True,
2052 numeric: bool = True,
2053 timedelta: bool = True,
2054 ) -> list[Block]:
2055 """
2056 attempt to cast any object types to better types return a copy of
2057 the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
2058 """
2059 values = self.values
2060 if values.ndim == 2:
2061 # maybe_split ensures we only get here with values.shape[0] == 1,
2062 # avoid doing .ravel as that might make a copy
2063 values = values[0]
2065 res_values = soft_convert_objects(
2066 values,
2067 datetime=datetime,
2068 numeric=numeric,
2069 timedelta=timedelta,
2070 copy=copy,
2071 )
2072 res_values = ensure_block_shape(res_values, self.ndim)
2073 return [self.make_block(res_values)]
2076class CategoricalBlock(ExtensionBlock):
2077 # this Block type is kept for backwards-compatibility
2078 __slots__ = ()
2080 # GH#43232, GH#43334 self.values.dtype can be changed inplace until 2.0,
2081 # so this cannot be cached
2082 @property
2083 def dtype(self) -> DtypeObj:
2084 return self.values.dtype
2087# -----------------------------------------------------------------
2088# Constructor Helpers
2091def maybe_coerce_values(values: ArrayLike) -> ArrayLike:
2092 """
2093 Input validation for values passed to __init__. Ensure that
2094 any datetime64/timedelta64 dtypes are in nanoseconds. Ensure
2095 that we do not have string dtypes.
2097 Parameters
2098 ----------
2099 values : np.ndarray or ExtensionArray
2101 Returns
2102 -------
2103 values : np.ndarray or ExtensionArray
2104 """
2105 # Caller is responsible for ensuring PandasArray is already extracted.
2107 if isinstance(values, np.ndarray):
2108 values = ensure_wrapped_if_datetimelike(values)
2110 if issubclass(values.dtype.type, str):
2111 values = np.array(values, dtype=object)
2113 if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None:
2114 # freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame
2115 values = values._with_freq(None)
2117 return values
2120def get_block_type(dtype: DtypeObj):
2121 """
2122 Find the appropriate Block subclass to use for the given values and dtype.
2124 Parameters
2125 ----------
2126 dtype : numpy or pandas dtype
2128 Returns
2129 -------
2130 cls : class, subclass of Block
2131 """
2132 # We use vtype and kind checks because they are much more performant
2133 # than is_foo_dtype
2134 vtype = dtype.type
2135 kind = dtype.kind
2137 cls: type[Block]
2139 if isinstance(dtype, SparseDtype):
2140 # Need this first(ish) so that Sparse[datetime] is sparse
2141 cls = ExtensionBlock
2142 elif isinstance(dtype, CategoricalDtype):
2143 cls = CategoricalBlock
2144 elif vtype is Timestamp:
2145 cls = DatetimeTZBlock
2146 elif isinstance(dtype, PeriodDtype):
2147 cls = NDArrayBackedExtensionBlock
2148 elif isinstance(dtype, ExtensionDtype):
2149 # Note: need to be sure PandasArray is unwrapped before we get here
2150 cls = ExtensionBlock
2152 elif kind in ["M", "m"]:
2153 cls = DatetimeLikeBlock
2154 elif kind in ["f", "c", "i", "u", "b"]:
2155 cls = NumericBlock
2156 else:
2157 cls = ObjectBlock
2158 return cls
2161def new_block_2d(values: ArrayLike, placement: BlockPlacement):
2162 # new_block specialized to case with
2163 # ndim=2
2164 # isinstance(placement, BlockPlacement)
2165 # check_ndim/ensure_block_shape already checked
2166 klass = get_block_type(values.dtype)
2168 values = maybe_coerce_values(values)
2169 return klass(values, ndim=2, placement=placement)
2172def new_block(values, placement, *, ndim: int) -> Block:
2173 # caller is responsible for ensuring values is NOT a PandasArray
2175 if not isinstance(placement, BlockPlacement):
2176 placement = BlockPlacement(placement)
2178 check_ndim(values, placement, ndim)
2180 klass = get_block_type(values.dtype)
2182 values = maybe_coerce_values(values)
2183 return klass(values, ndim=ndim, placement=placement)
2186def check_ndim(values, placement: BlockPlacement, ndim: int) -> None:
2187 """
2188 ndim inference and validation.
2190 Validates that values.ndim and ndim are consistent.
2191 Validates that len(values) and len(placement) are consistent.
2193 Parameters
2194 ----------
2195 values : array-like
2196 placement : BlockPlacement
2197 ndim : int
2199 Raises
2200 ------
2201 ValueError : the number of dimensions do not match
2202 """
2204 if values.ndim > ndim:
2205 # Check for both np.ndarray and ExtensionArray
2206 raise ValueError(
2207 "Wrong number of dimensions. "
2208 f"values.ndim > ndim [{values.ndim} > {ndim}]"
2209 )
2211 elif not is_1d_only_ea_dtype(values.dtype):
2212 # TODO(EA2D): special case not needed with 2D EAs
2213 if values.ndim != ndim:
2214 raise ValueError(
2215 "Wrong number of dimensions. "
2216 f"values.ndim != ndim [{values.ndim} != {ndim}]"
2217 )
2218 if len(placement) != len(values):
2219 raise ValueError(
2220 f"Wrong number of items passed {len(values)}, "
2221 f"placement implies {len(placement)}"
2222 )
2223 elif ndim == 2 and len(placement) != 1:
2224 # TODO(EA2D): special case unnecessary with 2D EAs
2225 raise ValueError("need to split")
2228def extract_pandas_array(
2229 values: np.ndarray | ExtensionArray, dtype: DtypeObj | None, ndim: int
2230) -> tuple[np.ndarray | ExtensionArray, DtypeObj | None]:
2231 """
2232 Ensure that we don't allow PandasArray / PandasDtype in internals.
2233 """
2234 # For now, blocks should be backed by ndarrays when possible.
2235 if isinstance(values, ABCPandasArray):
2236 values = values.to_numpy()
2237 if ndim and ndim > 1:
2238 # TODO(EA2D): special case not needed with 2D EAs
2239 values = np.atleast_2d(values)
2241 if isinstance(dtype, PandasDtype):
2242 dtype = dtype.numpy_dtype
2244 return values, dtype
2247# -----------------------------------------------------------------
2250def extend_blocks(result, blocks=None) -> list[Block]:
2251 """return a new extended blocks, given the result"""
2252 if blocks is None:
2253 blocks = []
2254 if isinstance(result, list):
2255 for r in result:
2256 if isinstance(r, list):
2257 blocks.extend(r)
2258 else:
2259 blocks.append(r)
2260 else:
2261 assert isinstance(result, Block), type(result)
2262 blocks.append(result)
2263 return blocks
2266def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
2267 """
2268 Reshape if possible to have values.ndim == ndim.
2269 """
2271 if values.ndim < ndim:
2272 if not is_1d_only_ea_dtype(values.dtype):
2273 # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023
2274 # block.shape is incorrect for "2D" ExtensionArrays
2275 # We can't, and don't need to, reshape.
2276 values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values)
2277 values = values.reshape(1, -1)
2279 return values
2282def to_native_types(
2283 values: ArrayLike,
2284 *,
2285 na_rep="nan",
2286 quoting=None,
2287 float_format=None,
2288 decimal=".",
2289 **kwargs,
2290) -> np.ndarray:
2291 """convert to our native types format"""
2292 if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":
2293 # GH#40754 Convert categorical datetimes to datetime array
2294 values = algos.take_nd(
2295 values.categories._values,
2296 ensure_platform_int(values._codes),
2297 fill_value=na_rep,
2298 )
2300 values = ensure_wrapped_if_datetimelike(values)
2302 if isinstance(values, (DatetimeArray, TimedeltaArray)):
2303 if values.ndim == 1:
2304 result = values._format_native_types(na_rep=na_rep, **kwargs)
2305 result = result.astype(object, copy=False)
2306 return result
2308 # GH#21734 Process every column separately, they might have different formats
2309 results_converted = []
2310 for i in range(len(values)):
2311 result = values[i, :]._format_native_types(na_rep=na_rep, **kwargs)
2312 results_converted.append(result.astype(object, copy=False))
2313 return np.vstack(results_converted)
2315 elif values.dtype.kind == "f" and not is_sparse(values):
2316 # see GH#13418: no special formatting is desired at the
2317 # output (important for appropriate 'quoting' behaviour),
2318 # so do not pass it through the FloatArrayFormatter
2319 if float_format is None and decimal == ".":
2320 mask = isna(values)
2322 if not quoting:
2323 values = values.astype(str)
2324 else:
2325 values = np.array(values, dtype="object")
2327 values[mask] = na_rep
2328 values = values.astype(object, copy=False)
2329 return values
2331 from pandas.io.formats.format import FloatArrayFormatter
2333 formatter = FloatArrayFormatter(
2334 values,
2335 na_rep=na_rep,
2336 float_format=float_format,
2337 decimal=decimal,
2338 quoting=quoting,
2339 fixed_width=False,
2340 )
2341 res = formatter.get_result_as_array()
2342 res = res.astype(object, copy=False)
2343 return res
2345 elif isinstance(values, ExtensionArray):
2346 mask = isna(values)
2348 new_values = np.asarray(values.astype(object))
2349 new_values[mask] = na_rep
2350 return new_values
2352 else:
2354 mask = isna(values)
2355 itemsize = writers.word_len(na_rep)
2357 if values.dtype != _dtype_obj and not quoting and itemsize:
2358 values = values.astype(str)
2359 if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
2360 # enlarge for the na_rep
2361 values = values.astype(f"<U{itemsize}")
2362 else:
2363 values = np.array(values, dtype="object")
2365 values[mask] = na_rep
2366 values = values.astype(object, copy=False)
2367 return values
2370def external_values(values: ArrayLike) -> ArrayLike:
2371 """
2372 The array that Series.values returns (public attribute).
2374 This has some historical constraints, and is overridden in block
2375 subclasses to return the correct array (e.g. period returns
2376 object ndarray and datetimetz a datetime64[ns] ndarray instead of
2377 proper extension array).
2378 """
2379 if isinstance(values, (PeriodArray, IntervalArray)):
2380 return values.astype(object)
2381 elif isinstance(values, (DatetimeArray, TimedeltaArray)):
2382 # NB: for datetime64tz this is different from np.asarray(values), since
2383 # that returns an object-dtype ndarray of Timestamps.
2384 # Avoid FutureWarning in .astype in casting from dt64tz to dt64
2385 return values._data
2386 else:
2387 return values