Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/base.py: 29%
388 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2An interface for extending pandas with custom arrays.
4.. warning::
6 This is an experimental API and subject to breaking changes
7 without warning.
8"""
9from __future__ import annotations
11import inspect
12import operator
13from typing import (
14 TYPE_CHECKING,
15 Any,
16 Callable,
17 ClassVar,
18 Iterator,
19 Literal,
20 Sequence,
21 TypeVar,
22 cast,
23 overload,
24)
25import warnings
27import numpy as np
29from pandas._libs import lib
30from pandas._typing import (
31 ArrayLike,
32 AstypeArg,
33 Dtype,
34 FillnaOptions,
35 PositionalIndexer,
36 ScalarIndexer,
37 SequenceIndexer,
38 Shape,
39 TakeIndexer,
40 npt,
41)
42from pandas.compat import set_function_name
43from pandas.compat.numpy import function as nv
44from pandas.errors import AbstractMethodError
45from pandas.util._decorators import (
46 Appender,
47 Substitution,
48 cache_readonly,
49 deprecate_nonkeyword_arguments,
50)
51from pandas.util._exceptions import find_stack_level
52from pandas.util._validators import (
53 validate_bool_kwarg,
54 validate_fillna_kwargs,
55 validate_insert_loc,
56)
58from pandas.core.dtypes.cast import maybe_cast_to_extension_array
59from pandas.core.dtypes.common import (
60 is_dtype_equal,
61 is_list_like,
62 is_scalar,
63 pandas_dtype,
64)
65from pandas.core.dtypes.dtypes import ExtensionDtype
66from pandas.core.dtypes.generic import (
67 ABCDataFrame,
68 ABCIndex,
69 ABCSeries,
70)
71from pandas.core.dtypes.missing import isna
73from pandas.core import (
74 arraylike,
75 missing,
76 roperator,
77)
78from pandas.core.algorithms import (
79 factorize_array,
80 isin,
81 mode,
82 rank,
83 resolve_na_sentinel,
84 unique,
85)
86from pandas.core.array_algos.quantile import quantile_with_mask
87from pandas.core.sorting import (
88 nargminmax,
89 nargsort,
90)
92if TYPE_CHECKING: 92 ↛ 94line 92 didn't jump to line 94, because the condition on line 92 was never true
94 class ExtensionArraySupportsAnyAll("ExtensionArray"):
95 def any(self, *, skipna: bool = True) -> bool:
96 pass
98 def all(self, *, skipna: bool = True) -> bool:
99 pass
101 from pandas._typing import (
102 NumpySorter,
103 NumpyValueArrayLike,
104 )
107_extension_array_shared_docs: dict[str, str] = {}
109ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray")
112class ExtensionArray:
113 """
114 Abstract base class for custom 1-D array types.
116 pandas will recognize instances of this class as proper arrays
117 with a custom type and will not attempt to coerce them to objects. They
118 may be stored directly inside a :class:`DataFrame` or :class:`Series`.
120 Attributes
121 ----------
122 dtype
123 nbytes
124 ndim
125 shape
127 Methods
128 -------
129 argsort
130 astype
131 copy
132 dropna
133 factorize
134 fillna
135 equals
136 insert
137 isin
138 isna
139 ravel
140 repeat
141 searchsorted
142 shift
143 take
144 tolist
145 unique
146 view
147 _concat_same_type
148 _formatter
149 _from_factorized
150 _from_sequence
151 _from_sequence_of_strings
152 _reduce
153 _values_for_argsort
154 _values_for_factorize
156 Notes
157 -----
158 The interface includes the following abstract methods that must be
159 implemented by subclasses:
161 * _from_sequence
162 * _from_factorized
163 * __getitem__
164 * __len__
165 * __eq__
166 * dtype
167 * nbytes
168 * isna
169 * take
170 * copy
171 * _concat_same_type
173 A default repr displaying the type, (truncated) data, length,
174 and dtype is provided. It can be customized or replaced by
175 by overriding:
177 * __repr__ : A default repr for the ExtensionArray.
178 * _formatter : Print scalars inside a Series or DataFrame.
180 Some methods require casting the ExtensionArray to an ndarray of Python
181 objects with ``self.astype(object)``, which may be expensive. When
182 performance is a concern, we highly recommend overriding the following
183 methods:
185 * fillna
186 * dropna
187 * unique
188 * factorize / _values_for_factorize
189 * argsort, argmax, argmin / _values_for_argsort
190 * searchsorted
192 The remaining methods implemented on this class should be performant,
193 as they only compose abstract methods. Still, a more efficient
194 implementation may be available, and these methods can be overridden.
196 One can implement methods to handle array reductions.
198 * _reduce
200 One can implement methods to handle parsing from strings that will be used
201 in methods such as ``pandas.io.parsers.read_csv``.
203 * _from_sequence_of_strings
205 This class does not inherit from 'abc.ABCMeta' for performance reasons.
206 Methods and properties required by the interface raise
207 ``pandas.errors.AbstractMethodError`` and no ``register`` method is
208 provided for registering virtual subclasses.
210 ExtensionArrays are limited to 1 dimension.
212 They may be backed by none, one, or many NumPy arrays. For example,
213 ``pandas.Categorical`` is an extension array backed by two arrays,
214 one for codes and one for categories. An array of IPv6 address may
215 be backed by a NumPy structured array with two fields, one for the
216 lower 64 bits and one for the upper 64 bits. Or they may be backed
217 by some other storage type, like Python lists. Pandas makes no
218 assumptions on how the data are stored, just that it can be converted
219 to a NumPy array.
220 The ExtensionArray interface does not impose any rules on how this data
221 is stored. However, currently, the backing data cannot be stored in
222 attributes called ``.values`` or ``._values`` to ensure full compatibility
223 with pandas internals. But other names as ``.data``, ``._data``,
224 ``._items``, ... can be freely used.
226 If implementing NumPy's ``__array_ufunc__`` interface, pandas expects
227 that
229 1. You defer by returning ``NotImplemented`` when any Series are present
230 in `inputs`. Pandas will extract the arrays and call the ufunc again.
231 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class.
232 Pandas inspect this to determine whether the ufunc is valid for the
233 types present.
235 See :ref:`extending.extension.ufunc` for more.
237 By default, ExtensionArrays are not hashable. Immutable subclasses may
238 override this behavior.
239 """
241 # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
242 # Don't override this.
243 _typ = "extension"
245 # ------------------------------------------------------------------------
246 # Constructors
247 # ------------------------------------------------------------------------
249 @classmethod
250 def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
251 """
252 Construct a new ExtensionArray from a sequence of scalars.
254 Parameters
255 ----------
256 scalars : Sequence
257 Each element will be an instance of the scalar type for this
258 array, ``cls.dtype.type`` or be converted into this type in this method.
259 dtype : dtype, optional
260 Construct for this particular dtype. This should be a Dtype
261 compatible with the ExtensionArray.
262 copy : bool, default False
263 If True, copy the underlying data.
265 Returns
266 -------
267 ExtensionArray
268 """
269 raise AbstractMethodError(cls)
271 @classmethod
272 def _from_sequence_of_strings(
273 cls, strings, *, dtype: Dtype | None = None, copy=False
274 ):
275 """
276 Construct a new ExtensionArray from a sequence of strings.
278 Parameters
279 ----------
280 strings : Sequence
281 Each element will be an instance of the scalar type for this
282 array, ``cls.dtype.type``.
283 dtype : dtype, optional
284 Construct for this particular dtype. This should be a Dtype
285 compatible with the ExtensionArray.
286 copy : bool, default False
287 If True, copy the underlying data.
289 Returns
290 -------
291 ExtensionArray
292 """
293 raise AbstractMethodError(cls)
295 @classmethod
296 def _from_factorized(cls, values, original):
297 """
298 Reconstruct an ExtensionArray after factorization.
300 Parameters
301 ----------
302 values : ndarray
303 An integer ndarray with the factorized values.
304 original : ExtensionArray
305 The original ExtensionArray that factorize was called on.
307 See Also
308 --------
309 factorize : Top-level factorize method that dispatches here.
310 ExtensionArray.factorize : Encode the extension array as an enumerated type.
311 """
312 raise AbstractMethodError(cls)
314 # ------------------------------------------------------------------------
315 # Must be a Sequence
316 # ------------------------------------------------------------------------
317 @overload
318 def __getitem__(self, item: ScalarIndexer) -> Any:
319 ...
321 @overload
322 def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT:
323 ...
325 def __getitem__(
326 self: ExtensionArrayT, item: PositionalIndexer
327 ) -> ExtensionArrayT | Any:
328 """
329 Select a subset of self.
331 Parameters
332 ----------
333 item : int, slice, or ndarray
334 * int: The position in 'self' to get.
336 * slice: A slice object, where 'start', 'stop', and 'step' are
337 integers or None
339 * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
341 * list[int]: A list of int
343 Returns
344 -------
345 item : scalar or ExtensionArray
347 Notes
348 -----
349 For scalar ``item``, return a scalar value suitable for the array's
350 type. This should be an instance of ``self.dtype.type``.
352 For slice ``key``, return an instance of ``ExtensionArray``, even
353 if the slice is length 0 or 1.
355 For a boolean mask, return an instance of ``ExtensionArray``, filtered
356 to the values where ``item`` is True.
357 """
358 raise AbstractMethodError(self)
360 def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
361 """
362 Set one or more values inplace.
364 This method is not required to satisfy the pandas extension array
365 interface.
367 Parameters
368 ----------
369 key : int, ndarray, or slice
370 When called from, e.g. ``Series.__setitem__``, ``key`` will be
371 one of
373 * scalar int
374 * ndarray of integers.
375 * boolean ndarray
376 * slice object
378 value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
379 value or values to be set of ``key``.
381 Returns
382 -------
383 None
384 """
385 # Some notes to the ExtensionArray implementor who may have ended up
386 # here. While this method is not required for the interface, if you
387 # *do* choose to implement __setitem__, then some semantics should be
388 # observed:
389 #
390 # * Setting multiple values : ExtensionArrays should support setting
391 # multiple values at once, 'key' will be a sequence of integers and
392 # 'value' will be a same-length sequence.
393 #
394 # * Broadcasting : For a sequence 'key' and a scalar 'value',
395 # each position in 'key' should be set to 'value'.
396 #
397 # * Coercion : Most users will expect basic coercion to work. For
398 # example, a string like '2018-01-01' is coerced to a datetime
399 # when setting on a datetime64ns array. In general, if the
400 # __init__ method coerces that value, then so should __setitem__
401 # Note, also, that Series/DataFrame.where internally use __setitem__
402 # on a copy of the data.
403 raise NotImplementedError(f"{type(self)} does not implement __setitem__.")
405 def __len__(self) -> int:
406 """
407 Length of this array
409 Returns
410 -------
411 length : int
412 """
413 raise AbstractMethodError(self)
415 def __iter__(self) -> Iterator[Any]:
416 """
417 Iterate over elements of the array.
418 """
419 # This needs to be implemented so that pandas recognizes extension
420 # arrays as list-like. The default implementation makes successive
421 # calls to ``__getitem__``, which may be slower than necessary.
422 for i in range(len(self)):
423 yield self[i]
425 def __contains__(self, item: object) -> bool | np.bool_:
426 """
427 Return for `item in self`.
428 """
429 # GH37867
430 # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA]
431 # would raise a TypeError. The implementation below works around that.
432 if is_scalar(item) and isna(item):
433 if not self._can_hold_na:
434 return False
435 elif item is self.dtype.na_value or isinstance(item, self.dtype.type):
436 return self._hasna
437 else:
438 return False
439 else:
440 # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no
441 # attribute "any"
442 return (item == self).any() # type: ignore[union-attr]
444 # error: Signature of "__eq__" incompatible with supertype "object"
445 def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override]
446 """
447 Return for `self == other` (element-wise equality).
448 """
449 # Implementer note: this should return a boolean numpy ndarray or
450 # a boolean ExtensionArray.
451 # When `other` is one of Series, Index, or DataFrame, this method should
452 # return NotImplemented (to ensure that those objects are responsible for
453 # first unpacking the arrays, and then dispatch the operation to the
454 # underlying arrays)
455 raise AbstractMethodError(self)
457 # error: Signature of "__ne__" incompatible with supertype "object"
458 def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override]
459 """
460 Return for `self != other` (element-wise in-equality).
461 """
462 return ~(self == other)
464 def __init_subclass__(cls, **kwargs) -> None:
465 factorize = getattr(cls, "factorize")
466 if ( 466 ↛ 472line 466 didn't jump to line 472
467 "use_na_sentinel" not in inspect.signature(factorize).parameters
468 # TimelikeOps uses old factorize args to ensure we don't break things
469 and cls.__name__ not in ("TimelikeOps", "DatetimeArray", "TimedeltaArray")
470 ):
471 # See GH#46910 for details on the deprecation
472 name = cls.__name__
473 warnings.warn(
474 f"The `na_sentinel` argument of `{name}.factorize` is deprecated. "
475 f"In the future, pandas will use the `use_na_sentinel` argument "
476 f"instead. Add this argument to `{name}.factorize` to be compatible "
477 f"with future versions of pandas and silence this warning.",
478 DeprecationWarning,
479 stacklevel=find_stack_level(),
480 )
482 def to_numpy(
483 self,
484 dtype: npt.DTypeLike | None = None,
485 copy: bool = False,
486 na_value: object = lib.no_default,
487 ) -> np.ndarray:
488 """
489 Convert to a NumPy ndarray.
491 .. versionadded:: 1.0.0
493 This is similar to :meth:`numpy.asarray`, but may provide additional control
494 over how the conversion is done.
496 Parameters
497 ----------
498 dtype : str or numpy.dtype, optional
499 The dtype to pass to :meth:`numpy.asarray`.
500 copy : bool, default False
501 Whether to ensure that the returned value is a not a view on
502 another array. Note that ``copy=False`` does not *ensure* that
503 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
504 a copy is made, even if not strictly necessary.
505 na_value : Any, optional
506 The value to use for missing values. The default value depends
507 on `dtype` and the type of the array.
509 Returns
510 -------
511 numpy.ndarray
512 """
513 result = np.asarray(self, dtype=dtype)
514 if copy or na_value is not lib.no_default:
515 result = result.copy()
516 if na_value is not lib.no_default:
517 result[self.isna()] = na_value
518 return result
520 # ------------------------------------------------------------------------
521 # Required attributes
522 # ------------------------------------------------------------------------
524 @property
525 def dtype(self) -> ExtensionDtype:
526 """
527 An instance of 'ExtensionDtype'.
528 """
529 raise AbstractMethodError(self)
531 @property
532 def shape(self) -> Shape:
533 """
534 Return a tuple of the array dimensions.
535 """
536 return (len(self),)
538 @property
539 def size(self) -> int:
540 """
541 The number of elements in the array.
542 """
543 # error: Incompatible return value type (got "signedinteger[_64Bit]",
544 # expected "int") [return-value]
545 return np.prod(self.shape) # type: ignore[return-value]
547 @property
548 def ndim(self) -> int:
549 """
550 Extension Arrays are only allowed to be 1-dimensional.
551 """
552 return 1
554 @property
555 def nbytes(self) -> int:
556 """
557 The number of bytes needed to store this object in memory.
558 """
559 # If this is expensive to compute, return an approximate lower bound
560 # on the number of bytes needed.
561 raise AbstractMethodError(self)
563 # ------------------------------------------------------------------------
564 # Additional Methods
565 # ------------------------------------------------------------------------
567 @overload
568 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
569 ...
571 @overload
572 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
573 ...
575 @overload
576 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
577 ...
579 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
580 """
581 Cast to a NumPy array or ExtensionArray with 'dtype'.
583 Parameters
584 ----------
585 dtype : str or dtype
586 Typecode or data-type to which the array is cast.
587 copy : bool, default True
588 Whether to copy the data, even if not necessary. If False,
589 a copy is made only if the old dtype does not match the
590 new dtype.
592 Returns
593 -------
594 array : np.ndarray or ExtensionArray
595 An ExtensionArray if dtype is ExtensionDtype,
596 Otherwise a NumPy ndarray with 'dtype' for its dtype.
597 """
599 dtype = pandas_dtype(dtype)
600 if is_dtype_equal(dtype, self.dtype):
601 if not copy:
602 return self
603 else:
604 return self.copy()
606 if isinstance(dtype, ExtensionDtype):
607 cls = dtype.construct_array_type()
608 return cls._from_sequence(self, dtype=dtype, copy=copy)
610 return np.array(self, dtype=dtype, copy=copy)
612 def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
613 """
614 A 1-D array indicating if each value is missing.
616 Returns
617 -------
618 na_values : Union[np.ndarray, ExtensionArray]
619 In most cases, this should return a NumPy ndarray. For
620 exceptional cases like ``SparseArray``, where returning
621 an ndarray would be expensive, an ExtensionArray may be
622 returned.
624 Notes
625 -----
626 If returning an ExtensionArray, then
628 * ``na_values._is_boolean`` should be True
629 * `na_values` should implement :func:`ExtensionArray._reduce`
630 * ``na_values.any`` and ``na_values.all`` should be implemented
631 """
632 raise AbstractMethodError(self)
634 @property
635 def _hasna(self) -> bool:
636 # GH#22680
637 """
638 Equivalent to `self.isna().any()`.
640 Some ExtensionArray subclasses may be able to optimize this check.
641 """
642 return bool(self.isna().any())
644 def _values_for_argsort(self) -> np.ndarray:
645 """
646 Return values for sorting.
648 Returns
649 -------
650 ndarray
651 The transformed values should maintain the ordering between values
652 within the array.
654 See Also
655 --------
656 ExtensionArray.argsort : Return the indices that would sort this array.
658 Notes
659 -----
660 The caller is responsible for *not* modifying these values in-place, so
661 it is safe for implementors to give views on `self`.
663 Functions that use this (e.g. ExtensionArray.argsort) should ignore
664 entries with missing values in the original array (according to `self.isna()`).
665 This means that the corresponding entries in the returned array don't need to
666 be modified to sort correctly.
667 """
668 # Note: this is used in `ExtensionArray.argsort/argmin/argmax`.
669 return np.array(self)
671 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
672 def argsort(
673 self,
674 ascending: bool = True,
675 kind: str = "quicksort",
676 na_position: str = "last",
677 *args,
678 **kwargs,
679 ) -> np.ndarray:
680 """
681 Return the indices that would sort this array.
683 Parameters
684 ----------
685 ascending : bool, default True
686 Whether the indices should result in an ascending
687 or descending sort.
688 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
689 Sorting algorithm.
690 *args, **kwargs:
691 Passed through to :func:`numpy.argsort`.
693 Returns
694 -------
695 np.ndarray[np.intp]
696 Array of indices that sort ``self``. If NaN values are contained,
697 NaN values are placed at the end.
699 See Also
700 --------
701 numpy.argsort : Sorting implementation used internally.
702 """
703 # Implementor note: You have two places to override the behavior of
704 # argsort.
705 # 1. _values_for_argsort : construct the values passed to np.argsort
706 # 2. argsort : total control over sorting. In case of overriding this,
707 # it is recommended to also override argmax/argmin
708 ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
710 values = self._values_for_argsort()
711 return nargsort(
712 values,
713 kind=kind,
714 ascending=ascending,
715 na_position=na_position,
716 mask=np.asarray(self.isna()),
717 )
719 def argmin(self, skipna: bool = True) -> int:
720 """
721 Return the index of minimum value.
723 In case of multiple occurrences of the minimum value, the index
724 corresponding to the first occurrence is returned.
726 Parameters
727 ----------
728 skipna : bool, default True
730 Returns
731 -------
732 int
734 See Also
735 --------
736 ExtensionArray.argmax
737 """
738 # Implementor note: You have two places to override the behavior of
739 # argmin.
740 # 1. _values_for_argsort : construct the values used in nargminmax
741 # 2. argmin itself : total control over sorting.
742 validate_bool_kwarg(skipna, "skipna")
743 if not skipna and self._hasna:
744 raise NotImplementedError
745 return nargminmax(self, "argmin")
747 def argmax(self, skipna: bool = True) -> int:
748 """
749 Return the index of maximum value.
751 In case of multiple occurrences of the maximum value, the index
752 corresponding to the first occurrence is returned.
754 Parameters
755 ----------
756 skipna : bool, default True
758 Returns
759 -------
760 int
762 See Also
763 --------
764 ExtensionArray.argmin
765 """
766 # Implementor note: You have two places to override the behavior of
767 # argmax.
768 # 1. _values_for_argsort : construct the values used in nargminmax
769 # 2. argmax itself : total control over sorting.
770 validate_bool_kwarg(skipna, "skipna")
771 if not skipna and self._hasna:
772 raise NotImplementedError
773 return nargminmax(self, "argmax")
775 def fillna(
776 self: ExtensionArrayT,
777 value: object | ArrayLike | None = None,
778 method: FillnaOptions | None = None,
779 limit: int | None = None,
780 ) -> ExtensionArrayT:
781 """
782 Fill NA/NaN values using the specified method.
784 Parameters
785 ----------
786 value : scalar, array-like
787 If a scalar value is passed it is used to fill all missing values.
788 Alternatively, an array-like 'value' can be given. It's expected
789 that the array-like have the same length as 'self'.
790 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
791 Method to use for filling holes in reindexed Series
792 pad / ffill: propagate last valid observation forward to next valid
793 backfill / bfill: use NEXT valid observation to fill gap.
794 limit : int, default None
795 If method is specified, this is the maximum number of consecutive
796 NaN values to forward/backward fill. In other words, if there is
797 a gap with more than this number of consecutive NaNs, it will only
798 be partially filled. If method is not specified, this is the
799 maximum number of entries along the entire axis where NaNs will be
800 filled.
802 Returns
803 -------
804 ExtensionArray
805 With NA/NaN filled.
806 """
807 value, method = validate_fillna_kwargs(value, method)
809 mask = self.isna()
810 # error: Argument 2 to "check_value_size" has incompatible type
811 # "ExtensionArray"; expected "ndarray"
812 value = missing.check_value_size(
813 value, mask, len(self) # type: ignore[arg-type]
814 )
816 if mask.any():
817 if method is not None:
818 func = missing.get_fill_func(method)
819 npvalues = self.astype(object)
820 func(npvalues, limit=limit, mask=mask)
821 new_values = self._from_sequence(npvalues, dtype=self.dtype)
822 else:
823 # fill with value
824 new_values = self.copy()
825 new_values[mask] = value
826 else:
827 new_values = self.copy()
828 return new_values
830 def dropna(self: ExtensionArrayT) -> ExtensionArrayT:
831 """
832 Return ExtensionArray without NA values.
834 Returns
835 -------
836 valid : ExtensionArray
837 """
838 # error: Unsupported operand type for ~ ("ExtensionArray")
839 return self[~self.isna()] # type: ignore[operator]
841 def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray:
842 """
843 Shift values by desired number.
845 Newly introduced missing values are filled with
846 ``self.dtype.na_value``.
848 Parameters
849 ----------
850 periods : int, default 1
851 The number of periods to shift. Negative values are allowed
852 for shifting backwards.
854 fill_value : object, optional
855 The scalar value to use for newly introduced missing values.
856 The default is ``self.dtype.na_value``.
858 Returns
859 -------
860 ExtensionArray
861 Shifted.
863 Notes
864 -----
865 If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
866 returned.
868 If ``periods > len(self)``, then an array of size
869 len(self) is returned, with all values filled with
870 ``self.dtype.na_value``.
871 """
872 # Note: this implementation assumes that `self.dtype.na_value` can be
873 # stored in an instance of your ExtensionArray with `self.dtype`.
874 if not len(self) or periods == 0:
875 return self.copy()
877 if isna(fill_value):
878 fill_value = self.dtype.na_value
880 empty = self._from_sequence(
881 [fill_value] * min(abs(periods), len(self)), dtype=self.dtype
882 )
883 if periods > 0:
884 a = empty
885 b = self[:-periods]
886 else:
887 a = self[abs(periods) :]
888 b = empty
889 return self._concat_same_type([a, b])
891 def unique(self: ExtensionArrayT) -> ExtensionArrayT:
892 """
893 Compute the ExtensionArray of unique values.
895 Returns
896 -------
897 uniques : ExtensionArray
898 """
899 uniques = unique(self.astype(object))
900 return self._from_sequence(uniques, dtype=self.dtype)
902 def searchsorted(
903 self,
904 value: NumpyValueArrayLike | ExtensionArray,
905 side: Literal["left", "right"] = "left",
906 sorter: NumpySorter = None,
907 ) -> npt.NDArray[np.intp] | np.intp:
908 """
909 Find indices where elements should be inserted to maintain order.
911 Find the indices into a sorted array `self` (a) such that, if the
912 corresponding elements in `value` were inserted before the indices,
913 the order of `self` would be preserved.
915 Assuming that `self` is sorted:
917 ====== ================================
918 `side` returned index `i` satisfies
919 ====== ================================
920 left ``self[i-1] < value <= self[i]``
921 right ``self[i-1] <= value < self[i]``
922 ====== ================================
924 Parameters
925 ----------
926 value : array-like, list or scalar
927 Value(s) to insert into `self`.
928 side : {'left', 'right'}, optional
929 If 'left', the index of the first suitable location found is given.
930 If 'right', return the last such index. If there is no suitable
931 index, return either 0 or N (where N is the length of `self`).
932 sorter : 1-D array-like, optional
933 Optional array of integer indices that sort array a into ascending
934 order. They are typically the result of argsort.
936 Returns
937 -------
938 array of ints or int
939 If value is array-like, array of insertion points.
940 If value is scalar, a single integer.
942 See Also
943 --------
944 numpy.searchsorted : Similar method from NumPy.
945 """
946 # Note: the base tests provided by pandas only test the basics.
947 # We do not test
948 # 1. Values outside the range of the `data_for_sorting` fixture
949 # 2. Values between the values in the `data_for_sorting` fixture
950 # 3. Missing values.
951 arr = self.astype(object)
952 if isinstance(value, ExtensionArray):
953 value = value.astype(object)
954 return arr.searchsorted(value, side=side, sorter=sorter)
956 def equals(self, other: object) -> bool:
957 """
958 Return if another array is equivalent to this array.
960 Equivalent means that both arrays have the same shape and dtype, and
961 all values compare equal. Missing values in the same location are
962 considered equal (in contrast with normal equality).
964 Parameters
965 ----------
966 other : ExtensionArray
967 Array to compare to this Array.
969 Returns
970 -------
971 boolean
972 Whether the arrays are equivalent.
973 """
974 if type(self) != type(other):
975 return False
976 other = cast(ExtensionArray, other)
977 if not is_dtype_equal(self.dtype, other.dtype):
978 return False
979 elif len(self) != len(other):
980 return False
981 else:
982 equal_values = self == other
983 if isinstance(equal_values, ExtensionArray):
984 # boolean array with NA -> fill with False
985 equal_values = equal_values.fillna(False)
986 # error: Unsupported left operand type for & ("ExtensionArray")
987 equal_na = self.isna() & other.isna() # type: ignore[operator]
988 return bool((equal_values | equal_na).all())
990 def isin(self, values) -> npt.NDArray[np.bool_]:
991 """
992 Pointwise comparison for set containment in the given values.
994 Roughly equivalent to `np.array([x in values for x in self])`
996 Parameters
997 ----------
998 values : Sequence
1000 Returns
1001 -------
1002 np.ndarray[bool]
1003 """
1004 return isin(np.asarray(self), values)
1006 def _values_for_factorize(self) -> tuple[np.ndarray, Any]:
1007 """
1008 Return an array and missing value suitable for factorization.
1010 Returns
1011 -------
1012 values : ndarray
1014 An array suitable for factorization. This should maintain order
1015 and be a supported dtype (Float64, Int64, UInt64, String, Object).
1016 By default, the extension array is cast to object dtype.
1017 na_value : object
1018 The value in `values` to consider missing. This will be treated
1019 as NA in the factorization routines, so it will be coded as
1020 `na_sentinel` and not included in `uniques`. By default,
1021 ``np.nan`` is used.
1023 Notes
1024 -----
1025 The values returned by this method are also used in
1026 :func:`pandas.util.hash_pandas_object`.
1027 """
1028 return self.astype(object), np.nan
1030 def factorize(
1031 self,
1032 na_sentinel: int | lib.NoDefault = lib.no_default,
1033 use_na_sentinel: bool | lib.NoDefault = lib.no_default,
1034 ) -> tuple[np.ndarray, ExtensionArray]:
1035 """
1036 Encode the extension array as an enumerated type.
1038 Parameters
1039 ----------
1040 na_sentinel : int, default -1
1041 Value to use in the `codes` array to indicate missing values.
1043 .. deprecated:: 1.5.0
1044 The na_sentinel argument is deprecated and
1045 will be removed in a future version of pandas. Specify use_na_sentinel
1046 as either True or False.
1048 use_na_sentinel : bool, default True
1049 If True, the sentinel -1 will be used for NaN values. If False,
1050 NaN values will be encoded as non-negative integers and will not drop the
1051 NaN from the uniques of the values.
1053 .. versionadded:: 1.5.0
1055 Returns
1056 -------
1057 codes : ndarray
1058 An integer NumPy array that's an indexer into the original
1059 ExtensionArray.
1060 uniques : ExtensionArray
1061 An ExtensionArray containing the unique values of `self`.
1063 .. note::
1065 uniques will *not* contain an entry for the NA value of
1066 the ExtensionArray if there are any missing values present
1067 in `self`.
1069 See Also
1070 --------
1071 factorize : Top-level factorize method that dispatches here.
1073 Notes
1074 -----
1075 :meth:`pandas.factorize` offers a `sort` keyword as well.
1076 """
1077 # Implementer note: There are two ways to override the behavior of
1078 # pandas.factorize
1079 # 1. _values_for_factorize and _from_factorize.
1080 # Specify the values passed to pandas' internal factorization
1081 # routines, and how to convert from those values back to the
1082 # original ExtensionArray.
1083 # 2. ExtensionArray.factorize.
1084 # Complete control over factorization.
1085 resolved_na_sentinel = resolve_na_sentinel(na_sentinel, use_na_sentinel)
1086 arr, na_value = self._values_for_factorize()
1088 codes, uniques = factorize_array(
1089 arr, na_sentinel=resolved_na_sentinel, na_value=na_value
1090 )
1092 uniques_ea = self._from_factorized(uniques, self)
1093 return codes, uniques_ea
1095 _extension_array_shared_docs[
1096 "repeat"
1097 ] = """
1098 Repeat elements of a %(klass)s.
1100 Returns a new %(klass)s where each element of the current %(klass)s
1101 is repeated consecutively a given number of times.
1103 Parameters
1104 ----------
1105 repeats : int or array of ints
1106 The number of repetitions for each element. This should be a
1107 non-negative integer. Repeating 0 times will return an empty
1108 %(klass)s.
1109 axis : None
1110 Must be ``None``. Has no effect but is accepted for compatibility
1111 with numpy.
1113 Returns
1114 -------
1115 repeated_array : %(klass)s
1116 Newly created %(klass)s with repeated elements.
1118 See Also
1119 --------
1120 Series.repeat : Equivalent function for Series.
1121 Index.repeat : Equivalent function for Index.
1122 numpy.repeat : Similar method for :class:`numpy.ndarray`.
1123 ExtensionArray.take : Take arbitrary positions.
1125 Examples
1126 --------
1127 >>> cat = pd.Categorical(['a', 'b', 'c'])
1128 >>> cat
1129 ['a', 'b', 'c']
1130 Categories (3, object): ['a', 'b', 'c']
1131 >>> cat.repeat(2)
1132 ['a', 'a', 'b', 'b', 'c', 'c']
1133 Categories (3, object): ['a', 'b', 'c']
1134 >>> cat.repeat([1, 2, 3])
1135 ['a', 'b', 'b', 'c', 'c', 'c']
1136 Categories (3, object): ['a', 'b', 'c']
1137 """
1139 @Substitution(klass="ExtensionArray")
1140 @Appender(_extension_array_shared_docs["repeat"])
1141 def repeat(
1142 self: ExtensionArrayT, repeats: int | Sequence[int], axis: int | None = None
1143 ) -> ExtensionArrayT:
1144 nv.validate_repeat((), {"axis": axis})
1145 ind = np.arange(len(self)).repeat(repeats)
1146 return self.take(ind)
1148 # ------------------------------------------------------------------------
1149 # Indexing methods
1150 # ------------------------------------------------------------------------
1152 def take(
1153 self: ExtensionArrayT,
1154 indices: TakeIndexer,
1155 *,
1156 allow_fill: bool = False,
1157 fill_value: Any = None,
1158 ) -> ExtensionArrayT:
1159 """
1160 Take elements from an array.
1162 Parameters
1163 ----------
1164 indices : sequence of int or one-dimensional np.ndarray of int
1165 Indices to be taken.
1166 allow_fill : bool, default False
1167 How to handle negative values in `indices`.
1169 * False: negative values in `indices` indicate positional indices
1170 from the right (the default). This is similar to
1171 :func:`numpy.take`.
1173 * True: negative values in `indices` indicate
1174 missing values. These values are set to `fill_value`. Any other
1175 other negative values raise a ``ValueError``.
1177 fill_value : any, optional
1178 Fill value to use for NA-indices when `allow_fill` is True.
1179 This may be ``None``, in which case the default NA value for
1180 the type, ``self.dtype.na_value``, is used.
1182 For many ExtensionArrays, there will be two representations of
1183 `fill_value`: a user-facing "boxed" scalar, and a low-level
1184 physical NA value. `fill_value` should be the user-facing version,
1185 and the implementation should handle translating that to the
1186 physical version for processing the take if necessary.
1188 Returns
1189 -------
1190 ExtensionArray
1192 Raises
1193 ------
1194 IndexError
1195 When the indices are out of bounds for the array.
1196 ValueError
1197 When `indices` contains negative values other than ``-1``
1198 and `allow_fill` is True.
1200 See Also
1201 --------
1202 numpy.take : Take elements from an array along an axis.
1203 api.extensions.take : Take elements from an array.
1205 Notes
1206 -----
1207 ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
1208 ``iloc``, when `indices` is a sequence of values. Additionally,
1209 it's called by :meth:`Series.reindex`, or any other method
1210 that causes realignment, with a `fill_value`.
1212 Examples
1213 --------
1214 Here's an example implementation, which relies on casting the
1215 extension array to object dtype. This uses the helper method
1216 :func:`pandas.api.extensions.take`.
1218 .. code-block:: python
1220 def take(self, indices, allow_fill=False, fill_value=None):
1221 from pandas.core.algorithms import take
1223 # If the ExtensionArray is backed by an ndarray, then
1224 # just pass that here instead of coercing to object.
1225 data = self.astype(object)
1227 if allow_fill and fill_value is None:
1228 fill_value = self.dtype.na_value
1230 # fill value should always be translated from the scalar
1231 # type for the array, to the physical storage type for
1232 # the data, before passing to take.
1234 result = take(data, indices, fill_value=fill_value,
1235 allow_fill=allow_fill)
1236 return self._from_sequence(result, dtype=self.dtype)
1237 """
1238 # Implementer note: The `fill_value` parameter should be a user-facing
1239 # value, an instance of self.dtype.type. When passed `fill_value=None`,
1240 # the default of `self.dtype.na_value` should be used.
1241 # This may differ from the physical storage type your ExtensionArray
1242 # uses. In this case, your implementation is responsible for casting
1243 # the user-facing type to the storage type, before using
1244 # pandas.api.extensions.take
1245 raise AbstractMethodError(self)
1247 def copy(self: ExtensionArrayT) -> ExtensionArrayT:
1248 """
1249 Return a copy of the array.
1251 Returns
1252 -------
1253 ExtensionArray
1254 """
1255 raise AbstractMethodError(self)
1257 def view(self, dtype: Dtype | None = None) -> ArrayLike:
1258 """
1259 Return a view on the array.
1261 Parameters
1262 ----------
1263 dtype : str, np.dtype, or ExtensionDtype, optional
1264 Default None.
1266 Returns
1267 -------
1268 ExtensionArray or np.ndarray
1269 A view on the :class:`ExtensionArray`'s data.
1270 """
1271 # NB:
1272 # - This must return a *new* object referencing the same data, not self.
1273 # - The only case that *must* be implemented is with dtype=None,
1274 # giving a view with the same dtype as self.
1275 if dtype is not None:
1276 raise NotImplementedError(dtype)
1277 return self[:]
1279 # ------------------------------------------------------------------------
1280 # Printing
1281 # ------------------------------------------------------------------------
1283 def __repr__(self) -> str:
1284 if self.ndim > 1:
1285 return self._repr_2d()
1287 from pandas.io.formats.printing import format_object_summary
1289 # the short repr has no trailing newline, while the truncated
1290 # repr does. So we include a newline in our template, and strip
1291 # any trailing newlines from format_object_summary
1292 data = format_object_summary(
1293 self, self._formatter(), indent_for_name=False
1294 ).rstrip(", \n")
1295 class_name = f"<{type(self).__name__}>\n"
1296 return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
1298 def _repr_2d(self) -> str:
1299 from pandas.io.formats.printing import format_object_summary
1301 # the short repr has no trailing newline, while the truncated
1302 # repr does. So we include a newline in our template, and strip
1303 # any trailing newlines from format_object_summary
1304 lines = [
1305 format_object_summary(x, self._formatter(), indent_for_name=False).rstrip(
1306 ", \n"
1307 )
1308 for x in self
1309 ]
1310 data = ",\n".join(lines)
1311 class_name = f"<{type(self).__name__}>"
1312 return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"
1314 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
1315 """
1316 Formatting function for scalar values.
1318 This is used in the default '__repr__'. The returned formatting
1319 function receives instances of your scalar type.
1321 Parameters
1322 ----------
1323 boxed : bool, default False
1324 An indicated for whether or not your array is being printed
1325 within a Series, DataFrame, or Index (True), or just by
1326 itself (False). This may be useful if you want scalar values
1327 to appear differently within a Series versus on its own (e.g.
1328 quoted or not).
1330 Returns
1331 -------
1332 Callable[[Any], str]
1333 A callable that gets instances of the scalar type and
1334 returns a string. By default, :func:`repr` is used
1335 when ``boxed=False`` and :func:`str` is used when
1336 ``boxed=True``.
1337 """
1338 if boxed:
1339 return str
1340 return repr
1342 # ------------------------------------------------------------------------
1343 # Reshaping
1344 # ------------------------------------------------------------------------
1346 def transpose(self, *axes: int) -> ExtensionArray:
1347 """
1348 Return a transposed view on this array.
1350 Because ExtensionArrays are always 1D, this is a no-op. It is included
1351 for compatibility with np.ndarray.
1352 """
1353 return self[:]
1355 @property
1356 def T(self) -> ExtensionArray:
1357 return self.transpose()
1359 def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray:
1360 """
1361 Return a flattened view on this array.
1363 Parameters
1364 ----------
1365 order : {None, 'C', 'F', 'A', 'K'}, default 'C'
1367 Returns
1368 -------
1369 ExtensionArray
1371 Notes
1372 -----
1373 - Because ExtensionArrays are 1D-only, this is a no-op.
1374 - The "order" argument is ignored, is for compatibility with NumPy.
1375 """
1376 return self
1378 @classmethod
1379 def _concat_same_type(
1380 cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT]
1381 ) -> ExtensionArrayT:
1382 """
1383 Concatenate multiple array of this dtype.
1385 Parameters
1386 ----------
1387 to_concat : sequence of this type
1389 Returns
1390 -------
1391 ExtensionArray
1392 """
1393 # Implementer note: this method will only be called with a sequence of
1394 # ExtensionArrays of this class and with the same dtype as self. This
1395 # should allow "easy" concatenation (no upcasting needed), and result
1396 # in a new ExtensionArray of the same dtype.
1397 # Note: this strict behaviour is only guaranteed starting with pandas 1.1
1398 raise AbstractMethodError(cls)
1400 # The _can_hold_na attribute is set to True so that pandas internals
1401 # will use the ExtensionDtype.na_value as the NA value in operations
1402 # such as take(), reindex(), shift(), etc. In addition, those results
1403 # will then be of the ExtensionArray subclass rather than an array
1404 # of objects
1405 @cache_readonly
1406 def _can_hold_na(self) -> bool:
1407 return self.dtype._can_hold_na
1409 def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
1410 """
1411 Return a scalar result of performing the reduction operation.
1413 Parameters
1414 ----------
1415 name : str
1416 Name of the function, supported values are:
1417 { any, all, min, max, sum, mean, median, prod,
1418 std, var, sem, kurt, skew }.
1419 skipna : bool, default True
1420 If True, skip NaN values.
1421 **kwargs
1422 Additional keyword arguments passed to the reduction function.
1423 Currently, `ddof` is the only supported kwarg.
1425 Returns
1426 -------
1427 scalar
1429 Raises
1430 ------
1431 TypeError : subclass does not define reductions
1432 """
1433 meth = getattr(self, name, None)
1434 if meth is None:
1435 raise TypeError(
1436 f"'{type(self).__name__}' with dtype {self.dtype} "
1437 f"does not support reduction '{name}'"
1438 )
1439 return meth(skipna=skipna, **kwargs)
1441 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
1442 # Incompatible types in assignment (expression has type "None", base class
1443 # "object" defined the type as "Callable[[object], int]")
1444 __hash__: ClassVar[None] # type: ignore[assignment]
1446 # ------------------------------------------------------------------------
1447 # Non-Optimized Default Methods; in the case of the private methods here,
1448 # these are not guaranteed to be stable across pandas versions.
1450 def tolist(self) -> list:
1451 """
1452 Return a list of the values.
1454 These are each a scalar type, which is a Python scalar
1455 (for str, int, float) or a pandas scalar
1456 (for Timestamp/Timedelta/Interval/Period)
1458 Returns
1459 -------
1460 list
1461 """
1462 if self.ndim > 1:
1463 return [x.tolist() for x in self]
1464 return list(self)
1466 def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT:
1467 indexer = np.delete(np.arange(len(self)), loc)
1468 return self.take(indexer)
1470 def insert(self: ExtensionArrayT, loc: int, item) -> ExtensionArrayT:
1471 """
1472 Insert an item at the given position.
1474 Parameters
1475 ----------
1476 loc : int
1477 item : scalar-like
1479 Returns
1480 -------
1481 same type as self
1483 Notes
1484 -----
1485 This method should be both type and dtype-preserving. If the item
1486 cannot be held in an array of this type/dtype, either ValueError or
1487 TypeError should be raised.
1489 The default implementation relies on _from_sequence to raise on invalid
1490 items.
1491 """
1492 loc = validate_insert_loc(loc, len(self))
1494 item_arr = type(self)._from_sequence([item], dtype=self.dtype)
1496 return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]])
1498 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
1499 """
1500 Analogue to np.putmask(self, mask, value)
1502 Parameters
1503 ----------
1504 mask : np.ndarray[bool]
1505 value : scalar or listlike
1506 If listlike, must be arraylike with same length as self.
1508 Returns
1509 -------
1510 None
1512 Notes
1513 -----
1514 Unlike np.putmask, we do not repeat listlike values with mismatched length.
1515 'value' should either be a scalar or an arraylike with the same length
1516 as self.
1517 """
1518 if is_list_like(value):
1519 val = value[mask]
1520 else:
1521 val = value
1523 self[mask] = val
1525 def _where(
1526 self: ExtensionArrayT, mask: npt.NDArray[np.bool_], value
1527 ) -> ExtensionArrayT:
1528 """
1529 Analogue to np.where(mask, self, value)
1531 Parameters
1532 ----------
1533 mask : np.ndarray[bool]
1534 value : scalar or listlike
1536 Returns
1537 -------
1538 same type as self
1539 """
1540 result = self.copy()
1542 if is_list_like(value):
1543 val = value[~mask]
1544 else:
1545 val = value
1547 result[~mask] = val
1548 return result
1550 def _fill_mask_inplace(
1551 self, method: str, limit, mask: npt.NDArray[np.bool_]
1552 ) -> None:
1553 """
1554 Replace values in locations specified by 'mask' using pad or backfill.
1556 See also
1557 --------
1558 ExtensionArray.fillna
1559 """
1560 func = missing.get_fill_func(method)
1561 npvalues = self.astype(object)
1562 # NB: if we don't copy mask here, it may be altered inplace, which
1563 # would mess up the `self[mask] = ...` below.
1564 func(npvalues, limit=limit, mask=mask.copy())
1565 new_values = self._from_sequence(npvalues, dtype=self.dtype)
1566 self[mask] = new_values[mask]
1567 return
1569 def _rank(
1570 self,
1571 *,
1572 axis: int = 0,
1573 method: str = "average",
1574 na_option: str = "keep",
1575 ascending: bool = True,
1576 pct: bool = False,
1577 ):
1578 """
1579 See Series.rank.__doc__.
1580 """
1581 if axis != 0:
1582 raise NotImplementedError
1584 # TODO: we only have tests that get here with dt64 and td64
1585 # TODO: all tests that get here use the defaults for all the kwds
1586 return rank(
1587 self,
1588 axis=axis,
1589 method=method,
1590 na_option=na_option,
1591 ascending=ascending,
1592 pct=pct,
1593 )
1595 @classmethod
1596 def _empty(cls, shape: Shape, dtype: ExtensionDtype):
1597 """
1598 Create an ExtensionArray with the given shape and dtype.
1600 See also
1601 --------
1602 ExtensionDtype.empty
1603 ExtensionDtype.empty is the 'official' public version of this API.
1604 """
1605 # Implementer note: while ExtensionDtype.empty is the public way to
1606 # call this method, it is still required to implement this `_empty`
1607 # method as well (it is called internally in pandas)
1608 obj = cls._from_sequence([], dtype=dtype)
1610 taker = np.broadcast_to(np.intp(-1), shape)
1611 result = obj.take(taker, allow_fill=True)
1612 if not isinstance(result, cls) or dtype != result.dtype:
1613 raise NotImplementedError(
1614 f"Default 'empty' implementation is invalid for dtype='{dtype}'"
1615 )
1616 return result
1618 def _quantile(
1619 self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str
1620 ) -> ExtensionArrayT:
1621 """
1622 Compute the quantiles of self for each quantile in `qs`.
1624 Parameters
1625 ----------
1626 qs : np.ndarray[float64]
1627 interpolation: str
1629 Returns
1630 -------
1631 same type as self
1632 """
1633 mask = np.asarray(self.isna())
1634 arr = np.asarray(self)
1635 fill_value = np.nan
1637 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)
1638 return type(self)._from_sequence(res_values)
1640 def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT:
1641 """
1642 Returns the mode(s) of the ExtensionArray.
1644 Always returns `ExtensionArray` even if only one value.
1646 Parameters
1647 ----------
1648 dropna : bool, default True
1649 Don't consider counts of NA values.
1651 Returns
1652 -------
1653 same type as self
1654 Sorted, if possible.
1655 """
1656 # error: Incompatible return value type (got "Union[ExtensionArray,
1657 # ndarray[Any, Any]]", expected "ExtensionArrayT")
1658 return mode(self, dropna=dropna) # type: ignore[return-value]
1660 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
1661 if any(
1662 isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
1663 ):
1664 return NotImplemented
1666 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
1667 self, ufunc, method, *inputs, **kwargs
1668 )
1669 if result is not NotImplemented:
1670 return result
1672 if "out" in kwargs:
1673 return arraylike.dispatch_ufunc_with_out(
1674 self, ufunc, method, *inputs, **kwargs
1675 )
1677 if method == "reduce":
1678 result = arraylike.dispatch_reduction_ufunc(
1679 self, ufunc, method, *inputs, **kwargs
1680 )
1681 if result is not NotImplemented:
1682 return result
1684 return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
1687class ExtensionOpsMixin:
1688 """
1689 A base class for linking the operators to their dunder names.
1691 .. note::
1693 You may want to set ``__array_priority__`` if you want your
1694 implementation to be called when involved in binary operations
1695 with NumPy arrays.
1696 """
1698 @classmethod
1699 def _create_arithmetic_method(cls, op):
1700 raise AbstractMethodError(cls)
1702 @classmethod
1703 def _add_arithmetic_ops(cls):
1704 setattr(cls, "__add__", cls._create_arithmetic_method(operator.add))
1705 setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd))
1706 setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub))
1707 setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub))
1708 setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul))
1709 setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul))
1710 setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow))
1711 setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow))
1712 setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod))
1713 setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod))
1714 setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv))
1715 setattr(
1716 cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv)
1717 )
1718 setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv))
1719 setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv))
1720 setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod))
1721 setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod))
1723 @classmethod
1724 def _create_comparison_method(cls, op):
1725 raise AbstractMethodError(cls)
1727 @classmethod
1728 def _add_comparison_ops(cls):
1729 setattr(cls, "__eq__", cls._create_comparison_method(operator.eq))
1730 setattr(cls, "__ne__", cls._create_comparison_method(operator.ne))
1731 setattr(cls, "__lt__", cls._create_comparison_method(operator.lt))
1732 setattr(cls, "__gt__", cls._create_comparison_method(operator.gt))
1733 setattr(cls, "__le__", cls._create_comparison_method(operator.le))
1734 setattr(cls, "__ge__", cls._create_comparison_method(operator.ge))
1736 @classmethod
1737 def _create_logical_method(cls, op):
1738 raise AbstractMethodError(cls)
1740 @classmethod
1741 def _add_logical_ops(cls):
1742 setattr(cls, "__and__", cls._create_logical_method(operator.and_))
1743 setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_))
1744 setattr(cls, "__or__", cls._create_logical_method(operator.or_))
1745 setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_))
1746 setattr(cls, "__xor__", cls._create_logical_method(operator.xor))
1747 setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor))
1750class ExtensionScalarOpsMixin(ExtensionOpsMixin):
1751 """
1752 A mixin for defining ops on an ExtensionArray.
1754 It is assumed that the underlying scalar objects have the operators
1755 already defined.
1757 Notes
1758 -----
1759 If you have defined a subclass MyExtensionArray(ExtensionArray), then
1760 use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to
1761 get the arithmetic operators. After the definition of MyExtensionArray,
1762 insert the lines
1764 MyExtensionArray._add_arithmetic_ops()
1765 MyExtensionArray._add_comparison_ops()
1767 to link the operators to your class.
1769 .. note::
1771 You may want to set ``__array_priority__`` if you want your
1772 implementation to be called when involved in binary operations
1773 with NumPy arrays.
1774 """
1776 @classmethod
1777 def _create_method(cls, op, coerce_to_dtype=True, result_dtype=None):
1778 """
1779 A class method that returns a method that will correspond to an
1780 operator for an ExtensionArray subclass, by dispatching to the
1781 relevant operator defined on the individual elements of the
1782 ExtensionArray.
1784 Parameters
1785 ----------
1786 op : function
1787 An operator that takes arguments op(a, b)
1788 coerce_to_dtype : bool, default True
1789 boolean indicating whether to attempt to convert
1790 the result to the underlying ExtensionArray dtype.
1791 If it's not possible to create a new ExtensionArray with the
1792 values, an ndarray is returned instead.
1794 Returns
1795 -------
1796 Callable[[Any, Any], Union[ndarray, ExtensionArray]]
1797 A method that can be bound to a class. When used, the method
1798 receives the two arguments, one of which is the instance of
1799 this class, and should return an ExtensionArray or an ndarray.
1801 Returning an ndarray may be necessary when the result of the
1802 `op` cannot be stored in the ExtensionArray. The dtype of the
1803 ndarray uses NumPy's normal inference rules.
1805 Examples
1806 --------
1807 Given an ExtensionArray subclass called MyExtensionArray, use
1809 __add__ = cls._create_method(operator.add)
1811 in the class definition of MyExtensionArray to create the operator
1812 for addition, that will be based on the operator implementation
1813 of the underlying elements of the ExtensionArray
1814 """
1816 def _binop(self, other):
1817 def convert_values(param):
1818 if isinstance(param, ExtensionArray) or is_list_like(param):
1819 ovalues = param
1820 else: # Assume its an object
1821 ovalues = [param] * len(self)
1822 return ovalues
1824 if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)):
1825 # rely on pandas to unbox and dispatch to us
1826 return NotImplemented
1828 lvalues = self
1829 rvalues = convert_values(other)
1831 # If the operator is not defined for the underlying objects,
1832 # a TypeError should be raised
1833 res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
1835 def _maybe_convert(arr):
1836 if coerce_to_dtype:
1837 # https://github.com/pandas-dev/pandas/issues/22850
1838 # We catch all regular exceptions here, and fall back
1839 # to an ndarray.
1840 res = maybe_cast_to_extension_array(type(self), arr)
1841 if not isinstance(res, type(self)):
1842 # exception raised in _from_sequence; ensure we have ndarray
1843 res = np.asarray(arr)
1844 else:
1845 res = np.asarray(arr, dtype=result_dtype)
1846 return res
1848 if op.__name__ in {"divmod", "rdivmod"}:
1849 a, b = zip(*res)
1850 return _maybe_convert(a), _maybe_convert(b)
1852 return _maybe_convert(res)
1854 op_name = f"__{op.__name__}__"
1855 return set_function_name(_binop, op_name, cls)
1857 @classmethod
1858 def _create_arithmetic_method(cls, op):
1859 return cls._create_method(op)
1861 @classmethod
1862 def _create_comparison_method(cls, op):
1863 return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)