Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/internals/array_manager.py: 17%
601 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Experimental manager based on storing a collection of 1D arrays
3"""
4from __future__ import annotations
6from typing import (
7 TYPE_CHECKING,
8 Any,
9 Callable,
10 Hashable,
11 Literal,
12 TypeVar,
13)
15import numpy as np
17from pandas._libs import (
18 NaT,
19 algos as libalgos,
20 lib,
21)
22from pandas._typing import (
23 ArrayLike,
24 DtypeObj,
25 npt,
26)
27from pandas.util._validators import validate_bool_kwarg
29from pandas.core.dtypes.astype import astype_array_safe
30from pandas.core.dtypes.cast import (
31 ensure_dtype_can_hold_na,
32 infer_dtype_from_scalar,
33 soft_convert_objects,
34)
35from pandas.core.dtypes.common import (
36 ensure_platform_int,
37 is_datetime64_ns_dtype,
38 is_dtype_equal,
39 is_extension_array_dtype,
40 is_integer,
41 is_numeric_dtype,
42 is_object_dtype,
43 is_timedelta64_ns_dtype,
44)
45from pandas.core.dtypes.dtypes import (
46 ExtensionDtype,
47 PandasDtype,
48)
49from pandas.core.dtypes.generic import (
50 ABCDataFrame,
51 ABCSeries,
52)
53from pandas.core.dtypes.inference import is_inferred_bool_dtype
54from pandas.core.dtypes.missing import (
55 array_equals,
56 isna,
57 na_value_for_dtype,
58)
60import pandas.core.algorithms as algos
61from pandas.core.array_algos.quantile import quantile_compat
62from pandas.core.array_algos.take import take_1d
63from pandas.core.arrays import (
64 DatetimeArray,
65 ExtensionArray,
66 PandasArray,
67 TimedeltaArray,
68)
69from pandas.core.arrays.sparse import SparseDtype
70from pandas.core.construction import (
71 ensure_wrapped_if_datetimelike,
72 extract_array,
73 sanitize_array,
74)
75from pandas.core.indexers import (
76 maybe_convert_indices,
77 validate_indices,
78)
79from pandas.core.indexes.api import (
80 Index,
81 ensure_index,
82)
83from pandas.core.internals.base import (
84 DataManager,
85 SingleDataManager,
86 interleaved_dtype,
87)
88from pandas.core.internals.blocks import (
89 ensure_block_shape,
90 external_values,
91 extract_pandas_array,
92 maybe_coerce_values,
93 new_block,
94 to_native_types,
95)
97if TYPE_CHECKING: 97 ↛ 98line 97 didn't jump to line 98, because the condition on line 97 was never true
98 from pandas import Float64Index
101T = TypeVar("T", bound="BaseArrayManager")
104class BaseArrayManager(DataManager):
105 """
106 Core internal data structure to implement DataFrame and Series.
108 Alternative to the BlockManager, storing a list of 1D arrays instead of
109 Blocks.
111 This is *not* a public API class
113 Parameters
114 ----------
115 arrays : Sequence of arrays
116 axes : Sequence of Index
117 verify_integrity : bool, default True
119 """
121 __slots__ = [
122 "_axes", # private attribute, because 'axes' has different order, see below
123 "arrays",
124 ]
126 arrays: list[np.ndarray | ExtensionArray]
127 _axes: list[Index]
129 def __init__(
130 self,
131 arrays: list[np.ndarray | ExtensionArray],
132 axes: list[Index],
133 verify_integrity: bool = True,
134 ) -> None:
135 raise NotImplementedError
137 def make_empty(self: T, axes=None) -> T:
138 """Return an empty ArrayManager with the items axis of len 0 (no columns)"""
139 if axes is None:
140 axes = [self.axes[1:], Index([])]
142 arrays: list[np.ndarray | ExtensionArray] = []
143 return type(self)(arrays, axes)
145 @property
146 def items(self) -> Index:
147 return self._axes[-1]
149 @property
150 # error: Signature of "axes" incompatible with supertype "DataManager"
151 def axes(self) -> list[Index]: # type: ignore[override]
152 # mypy doesn't work to override attribute with property
153 # see https://github.com/python/mypy/issues/4125
154 """Axes is BlockManager-compatible order (columns, rows)"""
155 return [self._axes[1], self._axes[0]]
157 @property
158 def shape_proper(self) -> tuple[int, ...]:
159 # this returns (n_rows, n_columns)
160 return tuple(len(ax) for ax in self._axes)
162 @staticmethod
163 def _normalize_axis(axis: int) -> int:
164 # switch axis
165 axis = 1 if axis == 0 else 0
166 return axis
168 def set_axis(self, axis: int, new_labels: Index) -> None:
169 # Caller is responsible for ensuring we have an Index object.
170 self._validate_set_axis(axis, new_labels)
171 axis = self._normalize_axis(axis)
172 self._axes[axis] = new_labels
174 def get_dtypes(self) -> np.ndarray:
175 return np.array([arr.dtype for arr in self.arrays], dtype="object")
177 def __getstate__(self):
178 return self.arrays, self._axes
180 def __setstate__(self, state) -> None:
181 self.arrays = state[0]
182 self._axes = state[1]
184 def __repr__(self) -> str:
185 output = type(self).__name__
186 output += f"\nIndex: {self._axes[0]}"
187 if self.ndim == 2:
188 output += f"\nColumns: {self._axes[1]}"
189 output += f"\n{len(self.arrays)} arrays:"
190 for arr in self.arrays:
191 output += f"\n{arr.dtype}"
192 return output
194 def apply(
195 self: T,
196 f,
197 align_keys: list[str] | None = None,
198 ignore_failures: bool = False,
199 **kwargs,
200 ) -> T:
201 """
202 Iterate over the arrays, collect and create a new ArrayManager.
204 Parameters
205 ----------
206 f : str or callable
207 Name of the Array method to apply.
208 align_keys: List[str] or None, default None
209 ignore_failures: bool, default False
210 **kwargs
211 Keywords to pass to `f`
213 Returns
214 -------
215 ArrayManager
216 """
217 assert "filter" not in kwargs
219 align_keys = align_keys or []
220 result_arrays: list[np.ndarray] = []
221 result_indices: list[int] = []
222 # fillna: Series/DataFrame is responsible for making sure value is aligned
224 aligned_args = {k: kwargs[k] for k in align_keys}
226 if f == "apply":
227 f = kwargs.pop("func")
229 for i, arr in enumerate(self.arrays):
231 if aligned_args:
233 for k, obj in aligned_args.items():
234 if isinstance(obj, (ABCSeries, ABCDataFrame)):
235 # The caller is responsible for ensuring that
236 # obj.axes[-1].equals(self.items)
237 if obj.ndim == 1:
238 kwargs[k] = obj.iloc[i]
239 else:
240 kwargs[k] = obj.iloc[:, i]._values
241 else:
242 # otherwise we have an array-like
243 kwargs[k] = obj[i]
245 try:
246 if callable(f):
247 applied = f(arr, **kwargs)
248 else:
249 applied = getattr(arr, f)(**kwargs)
250 except (TypeError, NotImplementedError):
251 if not ignore_failures:
252 raise
253 continue
254 # if not isinstance(applied, ExtensionArray):
255 # # TODO not all EA operations return new EAs (eg astype)
256 # applied = array(applied)
257 result_arrays.append(applied)
258 result_indices.append(i)
260 new_axes: list[Index]
261 if ignore_failures:
262 # TODO copy?
263 new_axes = [self._axes[0], self._axes[1][result_indices]]
264 else:
265 new_axes = self._axes
267 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";
268 # expected "List[Union[ndarray, ExtensionArray]]"
269 return type(self)(result_arrays, new_axes) # type: ignore[arg-type]
271 def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T:
272 # switch axis to follow BlockManager logic
273 if swap_axis and "axis" in kwargs and self.ndim == 2:
274 kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0
276 align_keys = align_keys or []
277 aligned_args = {k: kwargs[k] for k in align_keys}
279 result_arrays = []
281 for i, arr in enumerate(self.arrays):
283 if aligned_args:
284 for k, obj in aligned_args.items():
285 if isinstance(obj, (ABCSeries, ABCDataFrame)):
286 # The caller is responsible for ensuring that
287 # obj.axes[-1].equals(self.items)
288 if obj.ndim == 1:
289 if self.ndim == 2:
290 kwargs[k] = obj.iloc[slice(i, i + 1)]._values
291 else:
292 kwargs[k] = obj.iloc[:]._values
293 else:
294 kwargs[k] = obj.iloc[:, [i]]._values
295 else:
296 # otherwise we have an ndarray
297 if obj.ndim == 2:
298 kwargs[k] = obj[[i]]
300 if isinstance(arr.dtype, np.dtype) and not isinstance(arr, np.ndarray):
301 # i.e. TimedeltaArray, DatetimeArray with tz=None. Need to
302 # convert for the Block constructors.
303 arr = np.asarray(arr)
305 if self.ndim == 2:
306 arr = ensure_block_shape(arr, 2)
307 block = new_block(arr, placement=slice(0, 1, 1), ndim=2)
308 else:
309 block = new_block(arr, placement=slice(0, len(self), 1), ndim=1)
311 applied = getattr(block, f)(**kwargs)
312 if isinstance(applied, list):
313 applied = applied[0]
314 arr = applied.values
315 if self.ndim == 2 and arr.ndim == 2:
316 # 2D for np.ndarray or DatetimeArray/TimedeltaArray
317 assert len(arr) == 1
318 # error: No overload variant of "__getitem__" of "ExtensionArray"
319 # matches argument type "Tuple[int, slice]"
320 arr = arr[0, :] # type: ignore[call-overload]
321 result_arrays.append(arr)
323 return type(self)(result_arrays, self._axes)
325 def where(self: T, other, cond, align: bool) -> T:
326 if align:
327 align_keys = ["other", "cond"]
328 else:
329 align_keys = ["cond"]
330 other = extract_array(other, extract_numpy=True)
332 return self.apply_with_block(
333 "where",
334 align_keys=align_keys,
335 other=other,
336 cond=cond,
337 )
339 def setitem(self: T, indexer, value) -> T:
340 return self.apply_with_block("setitem", indexer=indexer, value=value)
342 def putmask(self: T, mask, new, align: bool = True) -> T:
343 if align:
344 align_keys = ["new", "mask"]
345 else:
346 align_keys = ["mask"]
347 new = extract_array(new, extract_numpy=True)
349 return self.apply_with_block(
350 "putmask",
351 align_keys=align_keys,
352 mask=mask,
353 new=new,
354 )
356 def diff(self: T, n: int, axis: int) -> T:
357 if axis == 1:
358 # DataFrame only calls this for n=0, in which case performing it
359 # with axis=0 is equivalent
360 assert n == 0
361 axis = 0
362 return self.apply(algos.diff, n=n, axis=axis)
364 def interpolate(self: T, **kwargs) -> T:
365 return self.apply_with_block("interpolate", swap_axis=False, **kwargs)
367 def shift(self: T, periods: int, axis: int, fill_value) -> T:
368 if fill_value is lib.no_default:
369 fill_value = None
371 if axis == 1 and self.ndim == 2:
372 # TODO column-wise shift
373 raise NotImplementedError
375 return self.apply_with_block(
376 "shift", periods=periods, axis=axis, fill_value=fill_value
377 )
379 def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
381 if limit is not None:
382 # Do this validation even if we go through one of the no-op paths
383 limit = libalgos.validate_limit(None, limit=limit)
385 return self.apply_with_block(
386 "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
387 )
389 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
390 return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors)
392 def convert(
393 self: T,
394 copy: bool = True,
395 datetime: bool = True,
396 numeric: bool = True,
397 timedelta: bool = True,
398 ) -> T:
399 def _convert(arr):
400 if is_object_dtype(arr.dtype):
401 # extract PandasArray for tests that patch PandasArray._typ
402 arr = np.asarray(arr)
403 return soft_convert_objects(
404 arr,
405 datetime=datetime,
406 numeric=numeric,
407 timedelta=timedelta,
408 copy=copy,
409 )
410 else:
411 return arr.copy() if copy else arr
413 return self.apply(_convert)
415 def replace_regex(self: T, **kwargs) -> T:
416 return self.apply_with_block("_replace_regex", **kwargs)
418 def replace(self: T, to_replace, value, inplace: bool) -> T:
419 inplace = validate_bool_kwarg(inplace, "inplace")
420 assert np.ndim(value) == 0, value
421 # TODO "replace" is right now implemented on the blocks, we should move
422 # it to general array algos so it can be reused here
423 return self.apply_with_block(
424 "replace", value=value, to_replace=to_replace, inplace=inplace
425 )
427 def replace_list(
428 self: T,
429 src_list: list[Any],
430 dest_list: list[Any],
431 inplace: bool = False,
432 regex: bool = False,
433 ) -> T:
434 """do a list replace"""
435 inplace = validate_bool_kwarg(inplace, "inplace")
437 return self.apply_with_block(
438 "replace_list",
439 src_list=src_list,
440 dest_list=dest_list,
441 inplace=inplace,
442 regex=regex,
443 )
445 def to_native_types(self: T, **kwargs) -> T:
446 return self.apply(to_native_types, **kwargs)
448 @property
449 def is_mixed_type(self) -> bool:
450 return True
452 @property
453 def is_numeric_mixed_type(self) -> bool:
454 return all(is_numeric_dtype(t) for t in self.get_dtypes())
456 @property
457 def any_extension_types(self) -> bool:
458 """Whether any of the blocks in this manager are extension blocks"""
459 return False # any(block.is_extension for block in self.blocks)
461 @property
462 def is_view(self) -> bool:
463 """return a boolean if we are a single block and are a view"""
464 # TODO what is this used for?
465 return False
467 @property
468 def is_single_block(self) -> bool:
469 return len(self.arrays) == 1
471 def _get_data_subset(self: T, predicate: Callable) -> T:
472 indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]
473 arrays = [self.arrays[i] for i in indices]
474 # TODO copy?
475 # Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq,
476 # see test_describe_datetime_columns
477 taker = np.array(indices, dtype="intp")
478 new_cols = self._axes[1].take(taker)
479 new_axes = [self._axes[0], new_cols]
480 return type(self)(arrays, new_axes, verify_integrity=False)
482 def get_bool_data(self: T, copy: bool = False) -> T:
483 """
484 Select columns that are bool-dtype and object-dtype columns that are all-bool.
486 Parameters
487 ----------
488 copy : bool, default False
489 Whether to copy the blocks
490 """
491 return self._get_data_subset(is_inferred_bool_dtype)
493 def get_numeric_data(self: T, copy: bool = False) -> T:
494 """
495 Select columns that have a numeric dtype.
497 Parameters
498 ----------
499 copy : bool, default False
500 Whether to copy the blocks
501 """
502 return self._get_data_subset(
503 lambda arr: is_numeric_dtype(arr.dtype)
504 or getattr(arr.dtype, "_is_numeric", False)
505 )
507 def copy(self: T, deep=True) -> T:
508 """
509 Make deep or shallow copy of ArrayManager
511 Parameters
512 ----------
513 deep : bool or string, default True
514 If False, return shallow copy (do not copy data)
515 If 'all', copy data and a deep copy of the index
517 Returns
518 -------
519 BlockManager
520 """
521 if deep is None:
522 # ArrayManager does not yet support CoW, so deep=None always means
523 # deep=True for now
524 deep = True
526 # this preserves the notion of view copying of axes
527 if deep:
528 # hit in e.g. tests.io.json.test_pandas
530 def copy_func(ax):
531 return ax.copy(deep=True) if deep == "all" else ax.view()
533 new_axes = [copy_func(ax) for ax in self._axes]
534 else:
535 new_axes = list(self._axes)
537 if deep:
538 new_arrays = [arr.copy() for arr in self.arrays]
539 else:
540 new_arrays = list(self.arrays)
541 return type(self)(new_arrays, new_axes, verify_integrity=False)
543 def reindex_indexer(
544 self: T,
545 new_axis,
546 indexer,
547 axis: int,
548 fill_value=None,
549 allow_dups: bool = False,
550 copy: bool = True,
551 # ignored keywords
552 only_slice: bool = False,
553 # ArrayManager specific keywords
554 use_na_proxy: bool = False,
555 ) -> T:
556 axis = self._normalize_axis(axis)
557 return self._reindex_indexer(
558 new_axis,
559 indexer,
560 axis,
561 fill_value,
562 allow_dups,
563 copy,
564 use_na_proxy,
565 )
567 def _reindex_indexer(
568 self: T,
569 new_axis,
570 indexer: npt.NDArray[np.intp] | None,
571 axis: int,
572 fill_value=None,
573 allow_dups: bool = False,
574 copy: bool = True,
575 use_na_proxy: bool = False,
576 ) -> T:
577 """
578 Parameters
579 ----------
580 new_axis : Index
581 indexer : ndarray[intp] or None
582 axis : int
583 fill_value : object, default None
584 allow_dups : bool, default False
585 copy : bool, default True
588 pandas-indexer with -1's only.
589 """
590 if copy is None:
591 # ArrayManager does not yet support CoW, so deep=None always means
592 # deep=True for now
593 copy = True
595 if indexer is None:
596 if new_axis is self._axes[axis] and not copy:
597 return self
599 result = self.copy(deep=copy)
600 result._axes = list(self._axes)
601 result._axes[axis] = new_axis
602 return result
604 # some axes don't allow reindexing with dups
605 if not allow_dups:
606 self._axes[axis]._validate_can_reindex(indexer)
608 if axis >= self.ndim:
609 raise IndexError("Requested axis not found in manager")
611 if axis == 1:
612 new_arrays = []
613 for i in indexer:
614 if i == -1:
615 arr = self._make_na_array(
616 fill_value=fill_value, use_na_proxy=use_na_proxy
617 )
618 else:
619 arr = self.arrays[i]
620 if copy:
621 arr = arr.copy()
622 new_arrays.append(arr)
624 else:
625 validate_indices(indexer, len(self._axes[0]))
626 indexer = ensure_platform_int(indexer)
627 mask = indexer == -1
628 needs_masking = mask.any()
629 new_arrays = [
630 take_1d(
631 arr,
632 indexer,
633 allow_fill=needs_masking,
634 fill_value=fill_value,
635 mask=mask,
636 # if fill_value is not None else blk.fill_value
637 )
638 for arr in self.arrays
639 ]
641 new_axes = list(self._axes)
642 new_axes[axis] = new_axis
644 return type(self)(new_arrays, new_axes, verify_integrity=False)
646 def take(
647 self: T,
648 indexer,
649 axis: int = 1,
650 verify: bool = True,
651 convert_indices: bool = True,
652 ) -> T:
653 """
654 Take items along any axis.
655 """
656 axis = self._normalize_axis(axis)
658 indexer = (
659 np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64")
660 if isinstance(indexer, slice)
661 else np.asanyarray(indexer, dtype="int64")
662 )
664 if not indexer.ndim == 1:
665 raise ValueError("indexer should be 1-dimensional")
667 n = self.shape_proper[axis]
668 if convert_indices:
669 indexer = maybe_convert_indices(indexer, n, verify=verify)
671 new_labels = self._axes[axis].take(indexer)
672 return self._reindex_indexer(
673 new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True
674 )
676 def _make_na_array(self, fill_value=None, use_na_proxy=False):
677 if use_na_proxy:
678 assert fill_value is None
679 return NullArrayProxy(self.shape_proper[0])
681 if fill_value is None:
682 fill_value = np.nan
684 dtype, fill_value = infer_dtype_from_scalar(fill_value)
685 # error: Argument "dtype" to "empty" has incompatible type "Union[dtype[Any],
686 # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
687 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
688 # _DTypeDict, Tuple[Any, Any]]]"
689 values = np.empty(self.shape_proper[0], dtype=dtype) # type: ignore[arg-type]
690 values.fill(fill_value)
691 return values
693 def _equal_values(self, other) -> bool:
694 """
695 Used in .equals defined in base class. Only check the column values
696 assuming shape and indexes have already been checked.
697 """
698 for left, right in zip(self.arrays, other.arrays):
699 if not array_equals(left, right):
700 return False
701 else:
702 return True
704 # TODO
705 # to_dict
708class ArrayManager(BaseArrayManager):
709 @property
710 def ndim(self) -> Literal[2]:
711 return 2
713 def __init__(
714 self,
715 arrays: list[np.ndarray | ExtensionArray],
716 axes: list[Index],
717 verify_integrity: bool = True,
718 ) -> None:
719 # Note: we are storing the axes in "_axes" in the (row, columns) order
720 # which contrasts the order how it is stored in BlockManager
721 self._axes = axes
722 self.arrays = arrays
724 if verify_integrity:
725 self._axes = [ensure_index(ax) for ax in axes]
726 arrays = [extract_pandas_array(x, None, 1)[0] for x in arrays]
727 self.arrays = [maybe_coerce_values(arr) for arr in arrays]
728 self._verify_integrity()
730 def _verify_integrity(self) -> None:
731 n_rows, n_columns = self.shape_proper
732 if not len(self.arrays) == n_columns:
733 raise ValueError(
734 "Number of passed arrays must equal the size of the column Index: "
735 f"{len(self.arrays)} arrays vs {n_columns} columns."
736 )
737 for arr in self.arrays:
738 if not len(arr) == n_rows:
739 raise ValueError(
740 "Passed arrays should have the same length as the rows Index: "
741 f"{len(arr)} vs {n_rows} rows"
742 )
743 if not isinstance(arr, (np.ndarray, ExtensionArray)):
744 raise ValueError(
745 "Passed arrays should be np.ndarray or ExtensionArray instances, "
746 f"got {type(arr)} instead"
747 )
748 if not arr.ndim == 1:
749 raise ValueError(
750 "Passed arrays should be 1-dimensional, got array with "
751 f"{arr.ndim} dimensions instead."
752 )
754 # --------------------------------------------------------------------
755 # Indexing
757 def fast_xs(self, loc: int) -> SingleArrayManager:
758 """
759 Return the array corresponding to `frame.iloc[loc]`.
761 Parameters
762 ----------
763 loc : int
765 Returns
766 -------
767 np.ndarray or ExtensionArray
768 """
769 dtype = interleaved_dtype([arr.dtype for arr in self.arrays])
771 values = [arr[loc] for arr in self.arrays]
772 if isinstance(dtype, ExtensionDtype):
773 result = dtype.construct_array_type()._from_sequence(values, dtype=dtype)
774 # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT
775 elif is_datetime64_ns_dtype(dtype):
776 result = DatetimeArray._from_sequence(values, dtype=dtype)._data
777 elif is_timedelta64_ns_dtype(dtype):
778 result = TimedeltaArray._from_sequence(values, dtype=dtype)._data
779 else:
780 result = np.array(values, dtype=dtype)
781 return SingleArrayManager([result], [self._axes[1]])
783 def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager:
784 axis = self._normalize_axis(axis)
786 if axis == 0:
787 arrays = [arr[slobj] for arr in self.arrays]
788 elif axis == 1:
789 arrays = self.arrays[slobj]
791 new_axes = list(self._axes)
792 new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
794 return type(self)(arrays, new_axes, verify_integrity=False)
796 def iget(self, i: int) -> SingleArrayManager:
797 """
798 Return the data as a SingleArrayManager.
799 """
800 values = self.arrays[i]
801 return SingleArrayManager([values], [self._axes[0]])
803 def iget_values(self, i: int) -> ArrayLike:
804 """
805 Return the data for column i as the values (ndarray or ExtensionArray).
806 """
807 return self.arrays[i]
809 @property
810 def column_arrays(self) -> list[ArrayLike]:
811 """
812 Used in the JSON C code to access column arrays.
813 """
815 return [np.asarray(arr) for arr in self.arrays]
817 def iset(
818 self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False
819 ) -> None:
820 """
821 Set new column(s).
823 This changes the ArrayManager in-place, but replaces (an) existing
824 column(s), not changing column values in-place).
826 Parameters
827 ----------
828 loc : integer, slice or boolean mask
829 Positional location (already bounds checked)
830 value : np.ndarray or ExtensionArray
831 inplace : bool, default False
832 Whether overwrite existing array as opposed to replacing it.
833 """
834 # single column -> single integer index
835 if lib.is_integer(loc):
837 # TODO can we avoid needing to unpack this here? That means converting
838 # DataFrame into 1D array when loc is an integer
839 if isinstance(value, np.ndarray) and value.ndim == 2:
840 assert value.shape[1] == 1
841 value = value[:, 0]
843 # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item
844 # but we should avoid that and pass directly the proper array
845 value = maybe_coerce_values(value)
847 assert isinstance(value, (np.ndarray, ExtensionArray))
848 assert value.ndim == 1
849 assert len(value) == len(self._axes[0])
850 self.arrays[loc] = value
851 return
853 # multiple columns -> convert slice or array to integer indices
854 elif isinstance(loc, slice):
855 indices = range(
856 loc.start if loc.start is not None else 0,
857 loc.stop if loc.stop is not None else self.shape_proper[1],
858 loc.step if loc.step is not None else 1,
859 )
860 else:
861 assert isinstance(loc, np.ndarray)
862 assert loc.dtype == "bool"
863 # error: Incompatible types in assignment (expression has type "ndarray",
864 # variable has type "range")
865 indices = np.nonzero(loc)[0] # type: ignore[assignment]
867 assert value.ndim == 2
868 assert value.shape[0] == len(self._axes[0])
870 for value_idx, mgr_idx in enumerate(indices):
871 # error: No overload variant of "__getitem__" of "ExtensionArray" matches
872 # argument type "Tuple[slice, int]"
873 value_arr = value[:, value_idx] # type: ignore[call-overload]
874 self.arrays[mgr_idx] = value_arr
875 return
877 def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None:
878 """
879 Set values ("setitem") into a single column (not setting the full column).
881 This is a method on the ArrayManager level, to avoid creating an
882 intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`)
883 """
884 if not is_integer(loc):
885 raise TypeError("The column index should be an integer")
886 arr = self.arrays[loc]
887 mgr = SingleArrayManager([arr], [self._axes[0]])
888 new_mgr = mgr.setitem((idx,), value)
889 # update existing ArrayManager in-place
890 self.arrays[loc] = new_mgr.arrays[0]
892 def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
893 """
894 Insert item at selected position.
896 Parameters
897 ----------
898 loc : int
899 item : hashable
900 value : np.ndarray or ExtensionArray
901 """
902 # insert to the axis; this could possibly raise a TypeError
903 new_axis = self.items.insert(loc, item)
905 value = extract_array(value, extract_numpy=True)
906 if value.ndim == 2:
907 if value.shape[0] == 1:
908 # error: No overload variant of "__getitem__" of "ExtensionArray"
909 # matches argument type "Tuple[int, slice]"
910 value = value[0, :] # type: ignore[call-overload]
911 else:
912 raise ValueError(
913 f"Expected a 1D array, got an array with shape {value.shape}"
914 )
915 value = maybe_coerce_values(value)
917 # TODO self.arrays can be empty
918 # assert len(value) == len(self.arrays[0])
920 # TODO is this copy needed?
921 arrays = self.arrays.copy()
922 arrays.insert(loc, value)
924 self.arrays = arrays
925 self._axes[1] = new_axis
927 def idelete(self, indexer) -> ArrayManager:
928 """
929 Delete selected locations in-place (new block and array, same BlockManager)
930 """
931 to_keep = np.ones(self.shape[0], dtype=np.bool_)
932 to_keep[indexer] = False
934 self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]]
935 self._axes = [self._axes[0], self._axes[1][to_keep]]
936 return self
938 # --------------------------------------------------------------------
939 # Array-wise Operation
941 def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
942 """
943 Apply grouped reduction function columnwise, returning a new ArrayManager.
945 Parameters
946 ----------
947 func : grouped reduction function
948 ignore_failures : bool, default False
949 Whether to drop columns where func raises TypeError.
951 Returns
952 -------
953 ArrayManager
954 """
955 result_arrays: list[np.ndarray] = []
956 result_indices: list[int] = []
958 for i, arr in enumerate(self.arrays):
959 # grouped_reduce functions all expect 2D arrays
960 arr = ensure_block_shape(arr, ndim=2)
961 try:
962 res = func(arr)
963 except (TypeError, NotImplementedError):
964 if not ignore_failures:
965 raise
966 continue
968 if res.ndim == 2:
969 # reverse of ensure_block_shape
970 assert res.shape[0] == 1
971 res = res[0]
973 result_arrays.append(res)
974 result_indices.append(i)
976 if len(result_arrays) == 0:
977 index = Index([None]) # placeholder
978 else:
979 index = Index(range(result_arrays[0].shape[0]))
981 if ignore_failures:
982 columns = self.items[np.array(result_indices, dtype="int64")]
983 else:
984 columns = self.items
986 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";
987 # expected "List[Union[ndarray, ExtensionArray]]"
988 return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type]
990 def reduce(
991 self: T, func: Callable, ignore_failures: bool = False
992 ) -> tuple[T, np.ndarray]:
993 """
994 Apply reduction function column-wise, returning a single-row ArrayManager.
996 Parameters
997 ----------
998 func : reduction function
999 ignore_failures : bool, default False
1000 Whether to drop columns where func raises TypeError.
1002 Returns
1003 -------
1004 ArrayManager
1005 np.ndarray
1006 Indexer of column indices that are retained.
1007 """
1008 result_arrays: list[np.ndarray] = []
1009 result_indices: list[int] = []
1010 for i, arr in enumerate(self.arrays):
1011 try:
1012 res = func(arr, axis=0)
1013 except TypeError:
1014 if not ignore_failures:
1015 raise
1016 else:
1017 # TODO NaT doesn't preserve dtype, so we need to ensure to create
1018 # a timedelta result array if original was timedelta
1019 # what if datetime results in timedelta? (eg std)
1020 if res is NaT and is_timedelta64_ns_dtype(arr.dtype):
1021 result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]"))
1022 else:
1023 # error: Argument 1 to "append" of "list" has incompatible type
1024 # "ExtensionArray"; expected "ndarray"
1025 result_arrays.append(
1026 sanitize_array([res], None) # type: ignore[arg-type]
1027 )
1028 result_indices.append(i)
1030 index = Index._simple_new(np.array([None], dtype=object)) # placeholder
1031 if ignore_failures:
1032 indexer = np.array(result_indices)
1033 columns = self.items[result_indices]
1034 else:
1035 indexer = np.arange(self.shape[0])
1036 columns = self.items
1038 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";
1039 # expected "List[Union[ndarray, ExtensionArray]]"
1040 new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type]
1041 return new_mgr, indexer
1043 def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:
1044 """
1045 Apply array_op blockwise with another (aligned) BlockManager.
1046 """
1047 # TODO what if `other` is BlockManager ?
1048 left_arrays = self.arrays
1049 right_arrays = other.arrays
1050 result_arrays = [
1051 array_op(left, right) for left, right in zip(left_arrays, right_arrays)
1052 ]
1053 return type(self)(result_arrays, self._axes)
1055 def quantile(
1056 self,
1057 *,
1058 qs: Float64Index,
1059 axis: int = 0,
1060 transposed: bool = False,
1061 interpolation="linear",
1062 ) -> ArrayManager:
1064 arrs = [ensure_block_shape(x, 2) for x in self.arrays]
1065 assert axis == 1
1066 new_arrs = [
1067 quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs
1068 ]
1069 for i, arr in enumerate(new_arrs):
1070 if arr.ndim == 2:
1071 assert arr.shape[0] == 1, arr.shape
1072 new_arrs[i] = arr[0]
1074 axes = [qs, self._axes[1]]
1075 return type(self)(new_arrs, axes)
1077 # ----------------------------------------------------------------
1079 def unstack(self, unstacker, fill_value) -> ArrayManager:
1080 """
1081 Return a BlockManager with all blocks unstacked.
1083 Parameters
1084 ----------
1085 unstacker : reshape._Unstacker
1086 fill_value : Any
1087 fill_value for newly introduced missing values.
1089 Returns
1090 -------
1091 unstacked : BlockManager
1092 """
1093 indexer, _ = unstacker._indexer_and_to_sort
1094 if unstacker.mask.all():
1095 new_indexer = indexer
1096 allow_fill = False
1097 new_mask2D = None
1098 needs_masking = None
1099 else:
1100 new_indexer = np.full(unstacker.mask.shape, -1)
1101 new_indexer[unstacker.mask] = indexer
1102 allow_fill = True
1103 # calculating the full mask once and passing it to take_1d is faster
1104 # than letting take_1d calculate it in each repeated call
1105 new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape)
1106 needs_masking = new_mask2D.any(axis=0)
1107 new_indexer2D = new_indexer.reshape(*unstacker.full_shape)
1108 new_indexer2D = ensure_platform_int(new_indexer2D)
1110 new_arrays = []
1111 for arr in self.arrays:
1112 for i in range(unstacker.full_shape[1]):
1113 if allow_fill:
1114 # error: Value of type "Optional[Any]" is not indexable [index]
1115 new_arr = take_1d(
1116 arr,
1117 new_indexer2D[:, i],
1118 allow_fill=needs_masking[i], # type: ignore[index]
1119 fill_value=fill_value,
1120 mask=new_mask2D[:, i], # type: ignore[index]
1121 )
1122 else:
1123 new_arr = take_1d(arr, new_indexer2D[:, i], allow_fill=False)
1124 new_arrays.append(new_arr)
1126 new_index = unstacker.new_index
1127 new_columns = unstacker.get_new_columns(self._axes[1])
1128 new_axes = [new_index, new_columns]
1130 return type(self)(new_arrays, new_axes, verify_integrity=False)
1132 def as_array(
1133 self,
1134 dtype=None,
1135 copy: bool = False,
1136 na_value: object = lib.no_default,
1137 ) -> np.ndarray:
1138 """
1139 Convert the blockmanager data into an numpy array.
1141 Parameters
1142 ----------
1143 dtype : object, default None
1144 Data type of the return array.
1145 copy : bool, default False
1146 If True then guarantee that a copy is returned. A value of
1147 False does not guarantee that the underlying data is not
1148 copied.
1149 na_value : object, default lib.no_default
1150 Value to be used as the missing value sentinel.
1152 Returns
1153 -------
1154 arr : ndarray
1155 """
1156 if len(self.arrays) == 0:
1157 empty_arr = np.empty(self.shape, dtype=float)
1158 return empty_arr.transpose()
1160 # We want to copy when na_value is provided to avoid
1161 # mutating the original object
1162 copy = copy or na_value is not lib.no_default
1164 if not dtype:
1165 dtype = interleaved_dtype([arr.dtype for arr in self.arrays])
1167 if isinstance(dtype, SparseDtype):
1168 dtype = dtype.subtype
1169 elif isinstance(dtype, PandasDtype):
1170 dtype = dtype.numpy_dtype
1171 elif is_extension_array_dtype(dtype):
1172 dtype = "object"
1173 elif is_dtype_equal(dtype, str):
1174 dtype = "object"
1176 result = np.empty(self.shape_proper, dtype=dtype)
1178 for i, arr in enumerate(self.arrays):
1179 arr = arr.astype(dtype, copy=copy)
1180 result[:, i] = arr
1182 if na_value is not lib.no_default:
1183 result[isna(result)] = na_value
1185 return result
1188class SingleArrayManager(BaseArrayManager, SingleDataManager):
1190 __slots__ = [
1191 "_axes", # private attribute, because 'axes' has different order, see below
1192 "arrays",
1193 ]
1195 arrays: list[np.ndarray | ExtensionArray]
1196 _axes: list[Index]
1198 @property
1199 def ndim(self) -> Literal[1]:
1200 return 1
1202 def __init__(
1203 self,
1204 arrays: list[np.ndarray | ExtensionArray],
1205 axes: list[Index],
1206 verify_integrity: bool = True,
1207 ) -> None:
1208 self._axes = axes
1209 self.arrays = arrays
1211 if verify_integrity:
1212 assert len(axes) == 1
1213 assert len(arrays) == 1
1214 self._axes = [ensure_index(ax) for ax in self._axes]
1215 arr = arrays[0]
1216 arr = maybe_coerce_values(arr)
1217 arr = extract_pandas_array(arr, None, 1)[0]
1218 self.arrays = [arr]
1219 self._verify_integrity()
1221 def _verify_integrity(self) -> None:
1222 (n_rows,) = self.shape
1223 assert len(self.arrays) == 1
1224 arr = self.arrays[0]
1225 assert len(arr) == n_rows
1226 if not arr.ndim == 1:
1227 raise ValueError(
1228 "Passed array should be 1-dimensional, got array with "
1229 f"{arr.ndim} dimensions instead."
1230 )
1232 @staticmethod
1233 def _normalize_axis(axis):
1234 return axis
1236 def make_empty(self, axes=None) -> SingleArrayManager:
1237 """Return an empty ArrayManager with index/array of length 0"""
1238 if axes is None:
1239 axes = [Index([], dtype=object)]
1240 array: np.ndarray = np.array([], dtype=self.dtype)
1241 return type(self)([array], axes)
1243 @classmethod
1244 def from_array(cls, array, index) -> SingleArrayManager:
1245 return cls([array], [index])
1247 @property
1248 def axes(self):
1249 return self._axes
1251 @property
1252 def index(self) -> Index:
1253 return self._axes[0]
1255 @property
1256 def dtype(self):
1257 return self.array.dtype
1259 def external_values(self):
1260 """The array that Series.values returns"""
1261 return external_values(self.array)
1263 def internal_values(self):
1264 """The array that Series._values returns"""
1265 return self.array
1267 def array_values(self):
1268 """The array that Series.array returns"""
1269 arr = self.array
1270 if isinstance(arr, np.ndarray):
1271 arr = PandasArray(arr)
1272 return arr
1274 @property
1275 def _can_hold_na(self) -> bool:
1276 if isinstance(self.array, np.ndarray):
1277 return self.array.dtype.kind not in ["b", "i", "u"]
1278 else:
1279 # ExtensionArray
1280 return self.array._can_hold_na
1282 @property
1283 def is_single_block(self) -> bool:
1284 return True
1286 def fast_xs(self, loc: int) -> SingleArrayManager:
1287 raise NotImplementedError("Use series._values[loc] instead")
1289 def get_slice(self, slobj: slice, axis: int = 0) -> SingleArrayManager:
1290 if axis >= self.ndim:
1291 raise IndexError("Requested axis not found in manager")
1293 new_array = self.array[slobj]
1294 new_index = self.index._getitem_slice(slobj)
1295 return type(self)([new_array], [new_index], verify_integrity=False)
1297 def getitem_mgr(self, indexer) -> SingleArrayManager:
1298 new_array = self.array[indexer]
1299 new_index = self.index[indexer]
1300 return type(self)([new_array], [new_index])
1302 def apply(self, func, **kwargs):
1303 if callable(func):
1304 new_array = func(self.array, **kwargs)
1305 else:
1306 new_array = getattr(self.array, func)(**kwargs)
1307 return type(self)([new_array], self._axes)
1309 def setitem(self, indexer, value) -> SingleArrayManager:
1310 """
1311 Set values with indexer.
1313 For SingleArrayManager, this backs s[indexer] = value
1315 See `setitem_inplace` for a version that works inplace and doesn't
1316 return a new Manager.
1317 """
1318 if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim:
1319 raise ValueError(f"Cannot set values with ndim > {self.ndim}")
1320 return self.apply_with_block("setitem", indexer=indexer, value=value)
1322 def idelete(self, indexer) -> SingleArrayManager:
1323 """
1324 Delete selected locations in-place (new array, same ArrayManager)
1325 """
1326 to_keep = np.ones(self.shape[0], dtype=np.bool_)
1327 to_keep[indexer] = False
1329 self.arrays = [self.arrays[0][to_keep]]
1330 self._axes = [self._axes[0][to_keep]]
1331 return self
1333 def _get_data_subset(self, predicate: Callable) -> SingleArrayManager:
1334 # used in get_numeric_data / get_bool_data
1335 if predicate(self.array):
1336 return type(self)(self.arrays, self._axes, verify_integrity=False)
1337 else:
1338 return self.make_empty()
1340 def set_values(self, values: ArrayLike) -> None:
1341 """
1342 Set (replace) the values of the SingleArrayManager in place.
1344 Use at your own risk! This does not check if the passed values are
1345 valid for the current SingleArrayManager (length, dtype, etc).
1346 """
1347 self.arrays[0] = values
1349 def to_2d_mgr(self, columns: Index) -> ArrayManager:
1350 """
1351 Manager analogue of Series.to_frame
1352 """
1353 arrays = [self.arrays[0]]
1354 axes = [self.axes[0], columns]
1356 return ArrayManager(arrays, axes, verify_integrity=False)
1359class NullArrayProxy:
1360 """
1361 Proxy object for an all-NA array.
1363 Only stores the length of the array, and not the dtype. The dtype
1364 will only be known when actually concatenating (after determining the
1365 common dtype, for which this proxy is ignored).
1366 Using this object avoids that the internals/concat.py needs to determine
1367 the proper dtype and array type.
1368 """
1370 ndim = 1
1372 def __init__(self, n: int) -> None:
1373 self.n = n
1375 @property
1376 def shape(self) -> tuple[int]:
1377 return (self.n,)
1379 def to_array(self, dtype: DtypeObj) -> ArrayLike:
1380 """
1381 Helper function to create the actual all-NA array from the NullArrayProxy
1382 object.
1384 Parameters
1385 ----------
1386 arr : NullArrayProxy
1387 dtype : the dtype for the resulting array
1389 Returns
1390 -------
1391 np.ndarray or ExtensionArray
1392 """
1393 if isinstance(dtype, ExtensionDtype):
1394 empty = dtype.construct_array_type()._from_sequence([], dtype=dtype)
1395 indexer = -np.ones(self.n, dtype=np.intp)
1396 return empty.take(indexer, allow_fill=True)
1397 else:
1398 # when introducing missing values, int becomes float, bool becomes object
1399 dtype = ensure_dtype_can_hold_na(dtype)
1400 fill_value = na_value_for_dtype(dtype)
1401 arr = np.empty(self.n, dtype=dtype)
1402 arr.fill(fill_value)
1403 return ensure_wrapped_if_datetimelike(arr)