Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/base.py: 29%

1"""

2An interface for extending pandas with custom arrays.

4.. warning::

6 This is an experimental API and subject to breaking changes

7 without warning.

8"""

9from __future__ import annotations

11import inspect

12import operator

13from typing import (

14 TYPE_CHECKING,

15 Any,

16 Callable,

17 ClassVar,

18 Iterator,

19 Literal,

20 Sequence,

21 TypeVar,

22 cast,

23 overload,

24)

25import warnings

27import numpy as np

29from pandas._libs import lib

30from pandas._typing import (

31 ArrayLike,

32 AstypeArg,

33 Dtype,

34 FillnaOptions,

35 PositionalIndexer,

36 ScalarIndexer,

37 SequenceIndexer,

38 Shape,

39 TakeIndexer,

40 npt,

41)

42from pandas.compat import set_function_name

43from pandas.compat.numpy import function as nv

44from pandas.errors import AbstractMethodError

45from pandas.util._decorators import (

46 Appender,

47 Substitution,

48 cache_readonly,

49 deprecate_nonkeyword_arguments,

50)

51from pandas.util._exceptions import find_stack_level

52from pandas.util._validators import (

53 validate_bool_kwarg,

54 validate_fillna_kwargs,

55 validate_insert_loc,

56)

58from pandas.core.dtypes.cast import maybe_cast_to_extension_array

59from pandas.core.dtypes.common import (

60 is_dtype_equal,

61 is_list_like,

62 is_scalar,

63 pandas_dtype,

64)

65from pandas.core.dtypes.dtypes import ExtensionDtype

66from pandas.core.dtypes.generic import (

67 ABCDataFrame,

68 ABCIndex,

69 ABCSeries,

70)

71from pandas.core.dtypes.missing import isna

73from pandas.core import (

74 arraylike,

75 missing,

76 roperator,

77)

78from pandas.core.algorithms import (

79 factorize_array,

80 isin,

81 mode,

82 rank,

83 resolve_na_sentinel,

84 unique,

85)

86from pandas.core.array_algos.quantile import quantile_with_mask

87from pandas.core.sorting import (

88 nargminmax,

89 nargsort,

90)

92if TYPE_CHECKING: 92 ↛ 94line 92 didn't jump to line 94, because the condition on line 92 was never true

94 class ExtensionArraySupportsAnyAll("ExtensionArray"):

95 def any(self, *, skipna: bool = True) -> bool:

96 pass

98 def all(self, *, skipna: bool = True) -> bool:

99 pass

100

101 from pandas._typing import (

102 NumpySorter,

103 NumpyValueArrayLike,

104 )

105

106

107_extension_array_shared_docs: dict[str, str] = {}

108

109ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray")

110

111

112class ExtensionArray:

113 """

114 Abstract base class for custom 1-D array types.

115

116 pandas will recognize instances of this class as proper arrays

117 with a custom type and will not attempt to coerce them to objects. They

118 may be stored directly inside a :class:`DataFrame` or :class:`Series`.

119

120 Attributes

121 ----------

122 dtype

123 nbytes

124 ndim

125 shape

126

127 Methods

128 -------

129 argsort

130 astype

131 copy

132 dropna

133 factorize

134 fillna

135 equals

136 insert

137 isin

138 isna

139 ravel

140 repeat

141 searchsorted

142 shift

143 take

144 tolist

145 unique

146 view

147 _concat_same_type

148 _formatter

149 _from_factorized

150 _from_sequence

151 _from_sequence_of_strings

152 _reduce

153 _values_for_argsort

154 _values_for_factorize

155

156 Notes

157 -----

158 The interface includes the following abstract methods that must be

159 implemented by subclasses:

160

161 * _from_sequence

162 * _from_factorized

163 * __getitem__

164 * __len__

165 * __eq__

166 * dtype

167 * nbytes

168 * isna

169 * take

170 * copy

171 * _concat_same_type

172

173 A default repr displaying the type, (truncated) data, length,

174 and dtype is provided. It can be customized or replaced by

175 by overriding:

176

177 * __repr__ : A default repr for the ExtensionArray.

178 * _formatter : Print scalars inside a Series or DataFrame.

179

180 Some methods require casting the ExtensionArray to an ndarray of Python

181 objects with ``self.astype(object)``, which may be expensive. When

182 performance is a concern, we highly recommend overriding the following

183 methods:

184

185 * fillna

186 * dropna

187 * unique

188 * factorize / _values_for_factorize

189 * argsort, argmax, argmin / _values_for_argsort

190 * searchsorted

191

192 The remaining methods implemented on this class should be performant,

193 as they only compose abstract methods. Still, a more efficient

194 implementation may be available, and these methods can be overridden.

195

196 One can implement methods to handle array reductions.

197

198 * _reduce

199

200 One can implement methods to handle parsing from strings that will be used

201 in methods such as ``pandas.io.parsers.read_csv``.

202

203 * _from_sequence_of_strings

204

205 This class does not inherit from 'abc.ABCMeta' for performance reasons.

206 Methods and properties required by the interface raise

207 ``pandas.errors.AbstractMethodError`` and no ``register`` method is

208 provided for registering virtual subclasses.

209

210 ExtensionArrays are limited to 1 dimension.

211

212 They may be backed by none, one, or many NumPy arrays. For example,

213 ``pandas.Categorical`` is an extension array backed by two arrays,

214 one for codes and one for categories. An array of IPv6 address may

215 be backed by a NumPy structured array with two fields, one for the

216 lower 64 bits and one for the upper 64 bits. Or they may be backed

217 by some other storage type, like Python lists. Pandas makes no

218 assumptions on how the data are stored, just that it can be converted

219 to a NumPy array.

220 The ExtensionArray interface does not impose any rules on how this data

221 is stored. However, currently, the backing data cannot be stored in

222 attributes called ``.values`` or ``._values`` to ensure full compatibility

223 with pandas internals. But other names as ``.data``, ``._data``,

224 ``._items``, ... can be freely used.

225

226 If implementing NumPy's ``__array_ufunc__`` interface, pandas expects

227 that

228

229 1. You defer by returning ``NotImplemented`` when any Series are present

230 in `inputs`. Pandas will extract the arrays and call the ufunc again.

231 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class.

232 Pandas inspect this to determine whether the ufunc is valid for the

233 types present.

234

235 See :ref:`extending.extension.ufunc` for more.

236

237 By default, ExtensionArrays are not hashable. Immutable subclasses may

238 override this behavior.

239 """

240

241 # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.

242 # Don't override this.

243 _typ = "extension"

244

245 # ------------------------------------------------------------------------

246 # Constructors

247 # ------------------------------------------------------------------------

248

249 @classmethod

250 def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):

251 """

252 Construct a new ExtensionArray from a sequence of scalars.

253

254 Parameters

255 ----------

256 scalars : Sequence

257 Each element will be an instance of the scalar type for this

258 array, ``cls.dtype.type`` or be converted into this type in this method.

259 dtype : dtype, optional

260 Construct for this particular dtype. This should be a Dtype

261 compatible with the ExtensionArray.

262 copy : bool, default False

263 If True, copy the underlying data.

264

265 Returns

266 -------

267 ExtensionArray

268 """

269 raise AbstractMethodError(cls)

270

271 @classmethod

272 def _from_sequence_of_strings(

273 cls, strings, *, dtype: Dtype | None = None, copy=False

274 ):

275 """

276 Construct a new ExtensionArray from a sequence of strings.

277

278 Parameters

279 ----------

280 strings : Sequence

281 Each element will be an instance of the scalar type for this

282 array, ``cls.dtype.type``.

283 dtype : dtype, optional

284 Construct for this particular dtype. This should be a Dtype

285 compatible with the ExtensionArray.

286 copy : bool, default False

287 If True, copy the underlying data.

288

289 Returns

290 -------

291 ExtensionArray

292 """

293 raise AbstractMethodError(cls)

294

295 @classmethod

296 def _from_factorized(cls, values, original):

297 """

298 Reconstruct an ExtensionArray after factorization.

299

300 Parameters

301 ----------

302 values : ndarray

303 An integer ndarray with the factorized values.

304 original : ExtensionArray

305 The original ExtensionArray that factorize was called on.

306

307 See Also

308 --------

309 factorize : Top-level factorize method that dispatches here.

310 ExtensionArray.factorize : Encode the extension array as an enumerated type.

311 """

312 raise AbstractMethodError(cls)

313

314 # ------------------------------------------------------------------------

315 # Must be a Sequence

316 # ------------------------------------------------------------------------

317 @overload

318 def __getitem__(self, item: ScalarIndexer) -> Any:

319 ...

320

321 @overload

322 def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT:

323 ...

324

325 def __getitem__(

326 self: ExtensionArrayT, item: PositionalIndexer

327 ) -> ExtensionArrayT | Any:

328 """

329 Select a subset of self.

330

331 Parameters

332 ----------

333 item : int, slice, or ndarray

334 * int: The position in 'self' to get.

335

336 * slice: A slice object, where 'start', 'stop', and 'step' are

337 integers or None

338

339 * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'

340

341 * list[int]: A list of int

342

343 Returns

344 -------

345 item : scalar or ExtensionArray

346

347 Notes

348 -----

349 For scalar ``item``, return a scalar value suitable for the array's

350 type. This should be an instance of ``self.dtype.type``.

351

352 For slice ``key``, return an instance of ``ExtensionArray``, even

353 if the slice is length 0 or 1.

354

355 For a boolean mask, return an instance of ``ExtensionArray``, filtered

356 to the values where ``item`` is True.

357 """

358 raise AbstractMethodError(self)

359

360 def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:

361 """

362 Set one or more values inplace.

363

364 This method is not required to satisfy the pandas extension array

365 interface.

366

367 Parameters

368 ----------

369 key : int, ndarray, or slice

370 When called from, e.g. ``Series.__setitem__``, ``key`` will be

371 one of

372

373 * scalar int

374 * ndarray of integers.

375 * boolean ndarray

376 * slice object

377

378 value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object

379 value or values to be set of ``key``.

380

381 Returns

382 -------

383 None

384 """

385 # Some notes to the ExtensionArray implementor who may have ended up

386 # here. While this method is not required for the interface, if you

387 # *do* choose to implement __setitem__, then some semantics should be

388 # observed:

389 #

390 # * Setting multiple values : ExtensionArrays should support setting

391 # multiple values at once, 'key' will be a sequence of integers and

392 # 'value' will be a same-length sequence.

393 #

394 # * Broadcasting : For a sequence 'key' and a scalar 'value',

395 # each position in 'key' should be set to 'value'.

396 #

397 # * Coercion : Most users will expect basic coercion to work. For

398 # example, a string like '2018-01-01' is coerced to a datetime

399 # when setting on a datetime64ns array. In general, if the

400 # __init__ method coerces that value, then so should __setitem__

401 # Note, also, that Series/DataFrame.where internally use __setitem__

402 # on a copy of the data.

403 raise NotImplementedError(f"{type(self)} does not implement __setitem__.")

404

405 def __len__(self) -> int:

406 """

407 Length of this array

408

409 Returns

410 -------

411 length : int

412 """

413 raise AbstractMethodError(self)

414

415 def __iter__(self) -> Iterator[Any]:

416 """

417 Iterate over elements of the array.

418 """

419 # This needs to be implemented so that pandas recognizes extension

420 # arrays as list-like. The default implementation makes successive

421 # calls to ``__getitem__``, which may be slower than necessary.

422 for i in range(len(self)):

423 yield self[i]

424

425 def __contains__(self, item: object) -> bool | np.bool_:

426 """

427 Return for `item in self`.

428 """

429 # GH37867

430 # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA]

431 # would raise a TypeError. The implementation below works around that.

432 if is_scalar(item) and isna(item):

433 if not self._can_hold_na:

434 return False

435 elif item is self.dtype.na_value or isinstance(item, self.dtype.type):

436 return self._hasna

437 else:

438 return False

439 else:

440 # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no

441 # attribute "any"

442 return (item == self).any() # type: ignore[union-attr]

443

444 # error: Signature of "__eq__" incompatible with supertype "object"

445 def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override]

446 """

447 Return for `self == other` (element-wise equality).

448 """

449 # Implementer note: this should return a boolean numpy ndarray or

450 # a boolean ExtensionArray.

451 # When `other` is one of Series, Index, or DataFrame, this method should

452 # return NotImplemented (to ensure that those objects are responsible for

453 # first unpacking the arrays, and then dispatch the operation to the

454 # underlying arrays)

455 raise AbstractMethodError(self)

456

457 # error: Signature of "__ne__" incompatible with supertype "object"

458 def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override]

459 """

460 Return for `self != other` (element-wise in-equality).

461 """

462 return ~(self == other)

463

464 def __init_subclass__(cls, **kwargs) -> None:

465 factorize = getattr(cls, "factorize")

466 if ( 466 ↛ 472line 466 didn't jump to line 472

467 "use_na_sentinel" not in inspect.signature(factorize).parameters

468 # TimelikeOps uses old factorize args to ensure we don't break things

469 and cls.__name__ not in ("TimelikeOps", "DatetimeArray", "TimedeltaArray")

470 ):

471 # See GH#46910 for details on the deprecation

472 name = cls.__name__

473 warnings.warn(

474 f"The `na_sentinel` argument of `{name}.factorize` is deprecated. "

475 f"In the future, pandas will use the `use_na_sentinel` argument "

476 f"instead. Add this argument to `{name}.factorize` to be compatible "

477 f"with future versions of pandas and silence this warning.",

478 DeprecationWarning,

479 stacklevel=find_stack_level(),

480 )

481

482 def to_numpy(

483 self,

484 dtype: npt.DTypeLike | None = None,

485 copy: bool = False,

486 na_value: object = lib.no_default,

487 ) -> np.ndarray:

488 """

489 Convert to a NumPy ndarray.

490

491 .. versionadded:: 1.0.0

492

493 This is similar to :meth:`numpy.asarray`, but may provide additional control

494 over how the conversion is done.

495

496 Parameters

497 ----------

498 dtype : str or numpy.dtype, optional

499 The dtype to pass to :meth:`numpy.asarray`.

500 copy : bool, default False

501 Whether to ensure that the returned value is a not a view on

502 another array. Note that ``copy=False`` does not *ensure* that

503 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that

504 a copy is made, even if not strictly necessary.

505 na_value : Any, optional

506 The value to use for missing values. The default value depends

507 on `dtype` and the type of the array.

508

509 Returns

510 -------

511 numpy.ndarray

512 """

513 result = np.asarray(self, dtype=dtype)

514 if copy or na_value is not lib.no_default:

515 result = result.copy()

516 if na_value is not lib.no_default:

517 result[self.isna()] = na_value

518 return result

519

520 # ------------------------------------------------------------------------

521 # Required attributes

522 # ------------------------------------------------------------------------

523

524 @property

525 def dtype(self) -> ExtensionDtype:

526 """

527 An instance of 'ExtensionDtype'.

528 """

529 raise AbstractMethodError(self)

530

531 @property

532 def shape(self) -> Shape:

533 """

534 Return a tuple of the array dimensions.

535 """

536 return (len(self),)

537

538 @property

539 def size(self) -> int:

540 """

541 The number of elements in the array.

542 """

543 # error: Incompatible return value type (got "signedinteger[_64Bit]",

544 # expected "int") [return-value]

545 return np.prod(self.shape) # type: ignore[return-value]

546

547 @property

548 def ndim(self) -> int:

549 """

550 Extension Arrays are only allowed to be 1-dimensional.

551 """

552 return 1

553

554 @property

555 def nbytes(self) -> int:

556 """

557 The number of bytes needed to store this object in memory.

558 """

559 # If this is expensive to compute, return an approximate lower bound

560 # on the number of bytes needed.

561 raise AbstractMethodError(self)

562

563 # ------------------------------------------------------------------------

564 # Additional Methods

565 # ------------------------------------------------------------------------

566

567 @overload

568 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:

569 ...

570

571 @overload

572 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:

573 ...

574

575 @overload

576 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:

577 ...

578

579 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

580 """

581 Cast to a NumPy array or ExtensionArray with 'dtype'.

582

583 Parameters

584 ----------

585 dtype : str or dtype

586 Typecode or data-type to which the array is cast.

587 copy : bool, default True

588 Whether to copy the data, even if not necessary. If False,

589 a copy is made only if the old dtype does not match the

590 new dtype.

591

592 Returns

593 -------

594 array : np.ndarray or ExtensionArray

595 An ExtensionArray if dtype is ExtensionDtype,

596 Otherwise a NumPy ndarray with 'dtype' for its dtype.

597 """

598

599 dtype = pandas_dtype(dtype)

600 if is_dtype_equal(dtype, self.dtype):

601 if not copy:

602 return self

603 else:

604 return self.copy()

605

606 if isinstance(dtype, ExtensionDtype):

607 cls = dtype.construct_array_type()

608 return cls._from_sequence(self, dtype=dtype, copy=copy)

609

610 return np.array(self, dtype=dtype, copy=copy)

611

612 def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:

613 """

614 A 1-D array indicating if each value is missing.

615

616 Returns

617 -------

618 na_values : Union[np.ndarray, ExtensionArray]

619 In most cases, this should return a NumPy ndarray. For

620 exceptional cases like ``SparseArray``, where returning

621 an ndarray would be expensive, an ExtensionArray may be

622 returned.

623

624 Notes

625 -----

626 If returning an ExtensionArray, then

627

628 * ``na_values._is_boolean`` should be True

629 * `na_values` should implement :func:`ExtensionArray._reduce`

630 * ``na_values.any`` and ``na_values.all`` should be implemented

631 """

632 raise AbstractMethodError(self)

633

634 @property

635 def _hasna(self) -> bool:

636 # GH#22680

637 """

638 Equivalent to `self.isna().any()`.

639

640 Some ExtensionArray subclasses may be able to optimize this check.

641 """

642 return bool(self.isna().any())

643

644 def _values_for_argsort(self) -> np.ndarray:

645 """

646 Return values for sorting.

647

648 Returns

649 -------

650 ndarray

651 The transformed values should maintain the ordering between values

652 within the array.

653

654 See Also

655 --------

656 ExtensionArray.argsort : Return the indices that would sort this array.

657

658 Notes

659 -----

660 The caller is responsible for *not* modifying these values in-place, so

661 it is safe for implementors to give views on `self`.

662

663 Functions that use this (e.g. ExtensionArray.argsort) should ignore

664 entries with missing values in the original array (according to `self.isna()`).

665 This means that the corresponding entries in the returned array don't need to

666 be modified to sort correctly.

667 """

668 # Note: this is used in `ExtensionArray.argsort/argmin/argmax`.

669 return np.array(self)

670

671 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])

672 def argsort(

673 self,

674 ascending: bool = True,

675 kind: str = "quicksort",

676 na_position: str = "last",

677 *args,

678 **kwargs,

679 ) -> np.ndarray:

680 """

681 Return the indices that would sort this array.

682

683 Parameters

684 ----------

685 ascending : bool, default True

686 Whether the indices should result in an ascending

687 or descending sort.

688 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional

689 Sorting algorithm.

690 *args, **kwargs:

691 Passed through to :func:`numpy.argsort`.

692

693 Returns

694 -------

695 np.ndarray[np.intp]

696 Array of indices that sort ``self``. If NaN values are contained,

697 NaN values are placed at the end.

698

699 See Also

700 --------

701 numpy.argsort : Sorting implementation used internally.

702 """

703 # Implementor note: You have two places to override the behavior of

704 # argsort.

705 # 1. _values_for_argsort : construct the values passed to np.argsort

706 # 2. argsort : total control over sorting. In case of overriding this,

707 # it is recommended to also override argmax/argmin

708 ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)

709

710 values = self._values_for_argsort()

711 return nargsort(

712 values,

713 kind=kind,

714 ascending=ascending,

715 na_position=na_position,

716 mask=np.asarray(self.isna()),

717 )

718

719 def argmin(self, skipna: bool = True) -> int:

720 """

721 Return the index of minimum value.

722

723 In case of multiple occurrences of the minimum value, the index

724 corresponding to the first occurrence is returned.

725

726 Parameters

727 ----------

728 skipna : bool, default True

729

730 Returns

731 -------

732 int

733

734 See Also

735 --------

736 ExtensionArray.argmax

737 """

738 # Implementor note: You have two places to override the behavior of

739 # argmin.

740 # 1. _values_for_argsort : construct the values used in nargminmax

741 # 2. argmin itself : total control over sorting.

742 validate_bool_kwarg(skipna, "skipna")

743 if not skipna and self._hasna:

744 raise NotImplementedError

745 return nargminmax(self, "argmin")

746

747 def argmax(self, skipna: bool = True) -> int:

748 """

749 Return the index of maximum value.

750

751 In case of multiple occurrences of the maximum value, the index

752 corresponding to the first occurrence is returned.

753

754 Parameters

755 ----------

756 skipna : bool, default True

757

758 Returns

759 -------

760 int

761

762 See Also

763 --------

764 ExtensionArray.argmin

765 """

766 # Implementor note: You have two places to override the behavior of

767 # argmax.

768 # 1. _values_for_argsort : construct the values used in nargminmax

769 # 2. argmax itself : total control over sorting.

770 validate_bool_kwarg(skipna, "skipna")

771 if not skipna and self._hasna:

772 raise NotImplementedError

773 return nargminmax(self, "argmax")

774

775 def fillna(

776 self: ExtensionArrayT,

777 value: object | ArrayLike | None = None,

778 method: FillnaOptions | None = None,

779 limit: int | None = None,

780 ) -> ExtensionArrayT:

781 """

782 Fill NA/NaN values using the specified method.

783

784 Parameters

785 ----------

786 value : scalar, array-like

787 If a scalar value is passed it is used to fill all missing values.

788 Alternatively, an array-like 'value' can be given. It's expected

789 that the array-like have the same length as 'self'.

790 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None

791 Method to use for filling holes in reindexed Series

792 pad / ffill: propagate last valid observation forward to next valid

793 backfill / bfill: use NEXT valid observation to fill gap.

794 limit : int, default None

795 If method is specified, this is the maximum number of consecutive

796 NaN values to forward/backward fill. In other words, if there is

797 a gap with more than this number of consecutive NaNs, it will only

798 be partially filled. If method is not specified, this is the

799 maximum number of entries along the entire axis where NaNs will be

800 filled.

801

802 Returns

803 -------

804 ExtensionArray

805 With NA/NaN filled.

806 """

807 value, method = validate_fillna_kwargs(value, method)

808

809 mask = self.isna()

810 # error: Argument 2 to "check_value_size" has incompatible type

811 # "ExtensionArray"; expected "ndarray"

812 value = missing.check_value_size(

813 value, mask, len(self) # type: ignore[arg-type]

814 )

815

816 if mask.any():

817 if method is not None:

818 func = missing.get_fill_func(method)

819 npvalues = self.astype(object)

820 func(npvalues, limit=limit, mask=mask)

821 new_values = self._from_sequence(npvalues, dtype=self.dtype)

822 else:

823 # fill with value

824 new_values = self.copy()

825 new_values[mask] = value

826 else:

827 new_values = self.copy()

828 return new_values

829

830 def dropna(self: ExtensionArrayT) -> ExtensionArrayT:

831 """

832 Return ExtensionArray without NA values.

833

834 Returns

835 -------

836 valid : ExtensionArray

837 """

838 # error: Unsupported operand type for ~ ("ExtensionArray")

839 return self[~self.isna()] # type: ignore[operator]

840

841 def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray:

842 """

843 Shift values by desired number.

844

845 Newly introduced missing values are filled with

846 ``self.dtype.na_value``.

847

848 Parameters

849 ----------

850 periods : int, default 1

851 The number of periods to shift. Negative values are allowed

852 for shifting backwards.

853

854 fill_value : object, optional

855 The scalar value to use for newly introduced missing values.

856 The default is ``self.dtype.na_value``.

857

858 Returns

859 -------

860 ExtensionArray

861 Shifted.

862

863 Notes

864 -----

865 If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is

866 returned.

867

868 If ``periods > len(self)``, then an array of size

869 len(self) is returned, with all values filled with

870 ``self.dtype.na_value``.

871 """

872 # Note: this implementation assumes that `self.dtype.na_value` can be

873 # stored in an instance of your ExtensionArray with `self.dtype`.

874 if not len(self) or periods == 0:

875 return self.copy()

876

877 if isna(fill_value):

878 fill_value = self.dtype.na_value

879

880 empty = self._from_sequence(

881 [fill_value] * min(abs(periods), len(self)), dtype=self.dtype

882 )

883 if periods > 0:

884 a = empty

885 b = self[:-periods]

886 else:

887 a = self[abs(periods) :]

888 b = empty

889 return self._concat_same_type([a, b])

890

891 def unique(self: ExtensionArrayT) -> ExtensionArrayT:

892 """

893 Compute the ExtensionArray of unique values.

894

895 Returns

896 -------

897 uniques : ExtensionArray

898 """

899 uniques = unique(self.astype(object))

900 return self._from_sequence(uniques, dtype=self.dtype)

901

902 def searchsorted(

903 self,

904 value: NumpyValueArrayLike | ExtensionArray,

905 side: Literal["left", "right"] = "left",

906 sorter: NumpySorter = None,

907 ) -> npt.NDArray[np.intp] | np.intp:

908 """

909 Find indices where elements should be inserted to maintain order.

910

911 Find the indices into a sorted array `self` (a) such that, if the

912 corresponding elements in `value` were inserted before the indices,

913 the order of `self` would be preserved.

914

915 Assuming that `self` is sorted:

916

917 ====== ================================

918 `side` returned index `i` satisfies

919 ====== ================================

920 left ``self[i-1] < value <= self[i]``

921 right ``self[i-1] <= value < self[i]``

922 ====== ================================

923

924 Parameters

925 ----------

926 value : array-like, list or scalar

927 Value(s) to insert into `self`.

928 side : {'left', 'right'}, optional

929 If 'left', the index of the first suitable location found is given.

930 If 'right', return the last such index. If there is no suitable

931 index, return either 0 or N (where N is the length of `self`).

932 sorter : 1-D array-like, optional

933 Optional array of integer indices that sort array a into ascending

934 order. They are typically the result of argsort.

935

936 Returns

937 -------

938 array of ints or int

939 If value is array-like, array of insertion points.

940 If value is scalar, a single integer.

941

942 See Also

943 --------

944 numpy.searchsorted : Similar method from NumPy.

945 """

946 # Note: the base tests provided by pandas only test the basics.

947 # We do not test

948 # 1. Values outside the range of the `data_for_sorting` fixture

949 # 2. Values between the values in the `data_for_sorting` fixture

950 # 3. Missing values.

951 arr = self.astype(object)

952 if isinstance(value, ExtensionArray):

953 value = value.astype(object)

954 return arr.searchsorted(value, side=side, sorter=sorter)

955

956 def equals(self, other: object) -> bool:

957 """

958 Return if another array is equivalent to this array.

959

960 Equivalent means that both arrays have the same shape and dtype, and

961 all values compare equal. Missing values in the same location are

962 considered equal (in contrast with normal equality).

963

964 Parameters

965 ----------

966 other : ExtensionArray

967 Array to compare to this Array.

968

969 Returns

970 -------

971 boolean

972 Whether the arrays are equivalent.

973 """

974 if type(self) != type(other):

975 return False

976 other = cast(ExtensionArray, other)

977 if not is_dtype_equal(self.dtype, other.dtype):

978 return False

979 elif len(self) != len(other):

980 return False

981 else:

982 equal_values = self == other

983 if isinstance(equal_values, ExtensionArray):

984 # boolean array with NA -> fill with False

985 equal_values = equal_values.fillna(False)

986 # error: Unsupported left operand type for & ("ExtensionArray")

987 equal_na = self.isna() & other.isna() # type: ignore[operator]

988 return bool((equal_values | equal_na).all())

989

990 def isin(self, values) -> npt.NDArray[np.bool_]:

991 """

992 Pointwise comparison for set containment in the given values.

993

994 Roughly equivalent to `np.array([x in values for x in self])`

995

996 Parameters

997 ----------

998 values : Sequence

999

1000 Returns

1001 -------

1002 np.ndarray[bool]

1003 """

1004 return isin(np.asarray(self), values)

1005

1006 def _values_for_factorize(self) -> tuple[np.ndarray, Any]:

1007 """

1008 Return an array and missing value suitable for factorization.

1009

1010 Returns

1011 -------

1012 values : ndarray

1013

1014 An array suitable for factorization. This should maintain order

1015 and be a supported dtype (Float64, Int64, UInt64, String, Object).

1016 By default, the extension array is cast to object dtype.

1017 na_value : object

1018 The value in `values` to consider missing. This will be treated

1019 as NA in the factorization routines, so it will be coded as

1020 `na_sentinel` and not included in `uniques`. By default,

1021 ``np.nan`` is used.

1022

1023 Notes

1024 -----

1025 The values returned by this method are also used in

1026 :func:`pandas.util.hash_pandas_object`.

1027 """

1028 return self.astype(object), np.nan

1029

1030 def factorize(

1031 self,

1032 na_sentinel: int | lib.NoDefault = lib.no_default,

1033 use_na_sentinel: bool | lib.NoDefault = lib.no_default,

1034 ) -> tuple[np.ndarray, ExtensionArray]:

1035 """

1036 Encode the extension array as an enumerated type.

1037

1038 Parameters

1039 ----------

1040 na_sentinel : int, default -1

1041 Value to use in the `codes` array to indicate missing values.

1042

1043 .. deprecated:: 1.5.0

1044 The na_sentinel argument is deprecated and

1045 will be removed in a future version of pandas. Specify use_na_sentinel

1046 as either True or False.

1047

1048 use_na_sentinel : bool, default True

1049 If True, the sentinel -1 will be used for NaN values. If False,

1050 NaN values will be encoded as non-negative integers and will not drop the

1051 NaN from the uniques of the values.

1052

1053 .. versionadded:: 1.5.0

1054

1055 Returns

1056 -------

1057 codes : ndarray

1058 An integer NumPy array that's an indexer into the original

1059 ExtensionArray.

1060 uniques : ExtensionArray

1061 An ExtensionArray containing the unique values of `self`.

1062

1063 .. note::

1064

1065 uniques will *not* contain an entry for the NA value of

1066 the ExtensionArray if there are any missing values present

1067 in `self`.

1068

1069 See Also

1070 --------

1071 factorize : Top-level factorize method that dispatches here.

1072

1073 Notes

1074 -----

1075 :meth:`pandas.factorize` offers a `sort` keyword as well.

1076 """

1077 # Implementer note: There are two ways to override the behavior of

1078 # pandas.factorize

1079 # 1. _values_for_factorize and _from_factorize.

1080 # Specify the values passed to pandas' internal factorization

1081 # routines, and how to convert from those values back to the

1082 # original ExtensionArray.

1083 # 2. ExtensionArray.factorize.

1084 # Complete control over factorization.

1085 resolved_na_sentinel = resolve_na_sentinel(na_sentinel, use_na_sentinel)

1086 arr, na_value = self._values_for_factorize()

1087

1088 codes, uniques = factorize_array(

1089 arr, na_sentinel=resolved_na_sentinel, na_value=na_value

1090 )

1091

1092 uniques_ea = self._from_factorized(uniques, self)

1093 return codes, uniques_ea

1094

1095 _extension_array_shared_docs[

1096 "repeat"

1097 ] = """

1098 Repeat elements of a %(klass)s.

1099

1100 Returns a new %(klass)s where each element of the current %(klass)s

1101 is repeated consecutively a given number of times.

1102

1103 Parameters

1104 ----------

1105 repeats : int or array of ints

1106 The number of repetitions for each element. This should be a

1107 non-negative integer. Repeating 0 times will return an empty

1108 %(klass)s.

1109 axis : None

1110 Must be ``None``. Has no effect but is accepted for compatibility

1111 with numpy.

1112

1113 Returns

1114 -------

1115 repeated_array : %(klass)s

1116 Newly created %(klass)s with repeated elements.

1117

1118 See Also

1119 --------

1120 Series.repeat : Equivalent function for Series.

1121 Index.repeat : Equivalent function for Index.

1122 numpy.repeat : Similar method for :class:`numpy.ndarray`.

1123 ExtensionArray.take : Take arbitrary positions.

1124

1125 Examples

1126 --------

1127 >>> cat = pd.Categorical(['a', 'b', 'c'])

1128 >>> cat

1129 ['a', 'b', 'c']

1130 Categories (3, object): ['a', 'b', 'c']

1131 >>> cat.repeat(2)

1132 ['a', 'a', 'b', 'b', 'c', 'c']

1133 Categories (3, object): ['a', 'b', 'c']

1134 >>> cat.repeat([1, 2, 3])

1135 ['a', 'b', 'b', 'c', 'c', 'c']

1136 Categories (3, object): ['a', 'b', 'c']

1137 """

1138

1139 @Substitution(klass="ExtensionArray")

1140 @Appender(_extension_array_shared_docs["repeat"])

1141 def repeat(

1142 self: ExtensionArrayT, repeats: int | Sequence[int], axis: int | None = None

1143 ) -> ExtensionArrayT:

1144 nv.validate_repeat((), {"axis": axis})

1145 ind = np.arange(len(self)).repeat(repeats)

1146 return self.take(ind)

1147

1148 # ------------------------------------------------------------------------

1149 # Indexing methods

1150 # ------------------------------------------------------------------------

1151

1152 def take(

1153 self: ExtensionArrayT,

1154 indices: TakeIndexer,

1155 *,

1156 allow_fill: bool = False,

1157 fill_value: Any = None,

1158 ) -> ExtensionArrayT:

1159 """

1160 Take elements from an array.

1161

1162 Parameters

1163 ----------

1164 indices : sequence of int or one-dimensional np.ndarray of int

1165 Indices to be taken.

1166 allow_fill : bool, default False

1167 How to handle negative values in `indices`.

1168

1169 * False: negative values in `indices` indicate positional indices

1170 from the right (the default). This is similar to

1171 :func:`numpy.take`.

1172

1173 * True: negative values in `indices` indicate

1174 missing values. These values are set to `fill_value`. Any other

1175 other negative values raise a ``ValueError``.

1176

1177 fill_value : any, optional

1178 Fill value to use for NA-indices when `allow_fill` is True.

1179 This may be ``None``, in which case the default NA value for

1180 the type, ``self.dtype.na_value``, is used.

1181

1182 For many ExtensionArrays, there will be two representations of

1183 `fill_value`: a user-facing "boxed" scalar, and a low-level

1184 physical NA value. `fill_value` should be the user-facing version,

1185 and the implementation should handle translating that to the

1186 physical version for processing the take if necessary.

1187

1188 Returns

1189 -------

1190 ExtensionArray

1191

1192 Raises

1193 ------

1194 IndexError

1195 When the indices are out of bounds for the array.

1196 ValueError

1197 When `indices` contains negative values other than ``-1``

1198 and `allow_fill` is True.

1199

1200 See Also

1201 --------

1202 numpy.take : Take elements from an array along an axis.

1203 api.extensions.take : Take elements from an array.

1204

1205 Notes

1206 -----

1207 ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,

1208 ``iloc``, when `indices` is a sequence of values. Additionally,

1209 it's called by :meth:`Series.reindex`, or any other method

1210 that causes realignment, with a `fill_value`.

1211

1212 Examples

1213 --------

1214 Here's an example implementation, which relies on casting the

1215 extension array to object dtype. This uses the helper method

1216 :func:`pandas.api.extensions.take`.

1217

1218 .. code-block:: python

1219

1220 def take(self, indices, allow_fill=False, fill_value=None):

1221 from pandas.core.algorithms import take

1222

1223 # If the ExtensionArray is backed by an ndarray, then

1224 # just pass that here instead of coercing to object.

1225 data = self.astype(object)

1226

1227 if allow_fill and fill_value is None:

1228 fill_value = self.dtype.na_value

1229

1230 # fill value should always be translated from the scalar

1231 # type for the array, to the physical storage type for

1232 # the data, before passing to take.

1233

1234 result = take(data, indices, fill_value=fill_value,

1235 allow_fill=allow_fill)

1236 return self._from_sequence(result, dtype=self.dtype)

1237 """

1238 # Implementer note: The `fill_value` parameter should be a user-facing

1239 # value, an instance of self.dtype.type. When passed `fill_value=None`,

1240 # the default of `self.dtype.na_value` should be used.

1241 # This may differ from the physical storage type your ExtensionArray

1242 # uses. In this case, your implementation is responsible for casting

1243 # the user-facing type to the storage type, before using

1244 # pandas.api.extensions.take

1245 raise AbstractMethodError(self)

1246

1247 def copy(self: ExtensionArrayT) -> ExtensionArrayT:

1248 """

1249 Return a copy of the array.

1250

1251 Returns

1252 -------

1253 ExtensionArray

1254 """

1255 raise AbstractMethodError(self)

1256

1257 def view(self, dtype: Dtype | None = None) -> ArrayLike:

1258 """

1259 Return a view on the array.

1260

1261 Parameters

1262 ----------

1263 dtype : str, np.dtype, or ExtensionDtype, optional

1264 Default None.

1265

1266 Returns

1267 -------

1268 ExtensionArray or np.ndarray

1269 A view on the :class:`ExtensionArray`'s data.

1270 """

1271 # NB:

1272 # - This must return a *new* object referencing the same data, not self.

1273 # - The only case that *must* be implemented is with dtype=None,

1274 # giving a view with the same dtype as self.

1275 if dtype is not None:

1276 raise NotImplementedError(dtype)

1277 return self[:]

1278

1279 # ------------------------------------------------------------------------

1280 # Printing

1281 # ------------------------------------------------------------------------

1282

1283 def __repr__(self) -> str:

1284 if self.ndim > 1:

1285 return self._repr_2d()

1286

1287 from pandas.io.formats.printing import format_object_summary

1288

1289 # the short repr has no trailing newline, while the truncated

1290 # repr does. So we include a newline in our template, and strip

1291 # any trailing newlines from format_object_summary

1292 data = format_object_summary(

1293 self, self._formatter(), indent_for_name=False

1294 ).rstrip(", \n")

1295 class_name = f"<{type(self).__name__}>\n"

1296 return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"

1297

1298 def _repr_2d(self) -> str:

1299 from pandas.io.formats.printing import format_object_summary

1300

1301 # the short repr has no trailing newline, while the truncated

1302 # repr does. So we include a newline in our template, and strip

1303 # any trailing newlines from format_object_summary

1304 lines = [

1305 format_object_summary(x, self._formatter(), indent_for_name=False).rstrip(

1306 ", \n"

1307 )

1308 for x in self

1309 ]

1310 data = ",\n".join(lines)

1311 class_name = f"<{type(self).__name__}>"

1312 return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"

1313

1314 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:

1315 """

1316 Formatting function for scalar values.

1317

1318 This is used in the default '__repr__'. The returned formatting

1319 function receives instances of your scalar type.

1320

1321 Parameters

1322 ----------

1323 boxed : bool, default False

1324 An indicated for whether or not your array is being printed

1325 within a Series, DataFrame, or Index (True), or just by

1326 itself (False). This may be useful if you want scalar values

1327 to appear differently within a Series versus on its own (e.g.

1328 quoted or not).

1329

1330 Returns

1331 -------

1332 Callable[[Any], str]

1333 A callable that gets instances of the scalar type and

1334 returns a string. By default, :func:`repr` is used

1335 when ``boxed=False`` and :func:`str` is used when

1336 ``boxed=True``.

1337 """

1338 if boxed:

1339 return str

1340 return repr

1341

1342 # ------------------------------------------------------------------------

1343 # Reshaping

1344 # ------------------------------------------------------------------------

1345

1346 def transpose(self, *axes: int) -> ExtensionArray:

1347 """

1348 Return a transposed view on this array.

1349

1350 Because ExtensionArrays are always 1D, this is a no-op. It is included

1351 for compatibility with np.ndarray.

1352 """

1353 return self[:]

1354

1355 @property

1356 def T(self) -> ExtensionArray:

1357 return self.transpose()

1358

1359 def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray:

1360 """

1361 Return a flattened view on this array.

1362

1363 Parameters

1364 ----------

1365 order : {None, 'C', 'F', 'A', 'K'}, default 'C'

1366

1367 Returns

1368 -------

1369 ExtensionArray

1370

1371 Notes

1372 -----

1373 - Because ExtensionArrays are 1D-only, this is a no-op.

1374 - The "order" argument is ignored, is for compatibility with NumPy.

1375 """

1376 return self

1377

1378 @classmethod

1379 def _concat_same_type(

1380 cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT]

1381 ) -> ExtensionArrayT:

1382 """

1383 Concatenate multiple array of this dtype.

1384

1385 Parameters

1386 ----------

1387 to_concat : sequence of this type

1388

1389 Returns

1390 -------

1391 ExtensionArray

1392 """

1393 # Implementer note: this method will only be called with a sequence of

1394 # ExtensionArrays of this class and with the same dtype as self. This

1395 # should allow "easy" concatenation (no upcasting needed), and result

1396 # in a new ExtensionArray of the same dtype.

1397 # Note: this strict behaviour is only guaranteed starting with pandas 1.1

1398 raise AbstractMethodError(cls)

1399

1400 # The _can_hold_na attribute is set to True so that pandas internals

1401 # will use the ExtensionDtype.na_value as the NA value in operations

1402 # such as take(), reindex(), shift(), etc. In addition, those results

1403 # will then be of the ExtensionArray subclass rather than an array

1404 # of objects

1405 @cache_readonly

1406 def _can_hold_na(self) -> bool:

1407 return self.dtype._can_hold_na

1408

1409 def _reduce(self, name: str, *, skipna: bool = True, **kwargs):

1410 """

1411 Return a scalar result of performing the reduction operation.

1412

1413 Parameters

1414 ----------

1415 name : str

1416 Name of the function, supported values are:

1417 { any, all, min, max, sum, mean, median, prod,

1418 std, var, sem, kurt, skew }.

1419 skipna : bool, default True

1420 If True, skip NaN values.

1421 **kwargs

1422 Additional keyword arguments passed to the reduction function.

1423 Currently, `ddof` is the only supported kwarg.

1424

1425 Returns

1426 -------

1427 scalar

1428

1429 Raises

1430 ------

1431 TypeError : subclass does not define reductions

1432 """

1433 meth = getattr(self, name, None)

1434 if meth is None:

1435 raise TypeError(

1436 f"'{type(self).__name__}' with dtype {self.dtype} "

1437 f"does not support reduction '{name}'"

1438 )

1439 return meth(skipna=skipna, **kwargs)

1440

1441 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318

1442 # Incompatible types in assignment (expression has type "None", base class

1443 # "object" defined the type as "Callable[[object], int]")

1444 __hash__: ClassVar[None] # type: ignore[assignment]

1445

1446 # ------------------------------------------------------------------------

1447 # Non-Optimized Default Methods; in the case of the private methods here,

1448 # these are not guaranteed to be stable across pandas versions.

1449

1450 def tolist(self) -> list:

1451 """

1452 Return a list of the values.

1453

1454 These are each a scalar type, which is a Python scalar

1455 (for str, int, float) or a pandas scalar

1456 (for Timestamp/Timedelta/Interval/Period)

1457

1458 Returns

1459 -------

1460 list

1461 """

1462 if self.ndim > 1:

1463 return [x.tolist() for x in self]

1464 return list(self)

1465

1466 def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT:

1467 indexer = np.delete(np.arange(len(self)), loc)

1468 return self.take(indexer)

1469

1470 def insert(self: ExtensionArrayT, loc: int, item) -> ExtensionArrayT:

1471 """

1472 Insert an item at the given position.

1473

1474 Parameters

1475 ----------

1476 loc : int

1477 item : scalar-like

1478

1479 Returns

1480 -------

1481 same type as self

1482

1483 Notes

1484 -----

1485 This method should be both type and dtype-preserving. If the item

1486 cannot be held in an array of this type/dtype, either ValueError or

1487 TypeError should be raised.

1488

1489 The default implementation relies on _from_sequence to raise on invalid

1490 items.

1491 """

1492 loc = validate_insert_loc(loc, len(self))

1493

1494 item_arr = type(self)._from_sequence([item], dtype=self.dtype)

1495

1496 return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]])

1497

1498 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:

1499 """

1500 Analogue to np.putmask(self, mask, value)

1501

1502 Parameters

1503 ----------

1504 mask : np.ndarray[bool]

1505 value : scalar or listlike

1506 If listlike, must be arraylike with same length as self.

1507

1508 Returns

1509 -------

1510 None

1511

1512 Notes

1513 -----

1514 Unlike np.putmask, we do not repeat listlike values with mismatched length.

1515 'value' should either be a scalar or an arraylike with the same length

1516 as self.

1517 """

1518 if is_list_like(value):

1519 val = value[mask]

1520 else:

1521 val = value

1522

1523 self[mask] = val

1524

1525 def _where(

1526 self: ExtensionArrayT, mask: npt.NDArray[np.bool_], value

1527 ) -> ExtensionArrayT:

1528 """

1529 Analogue to np.where(mask, self, value)

1530

1531 Parameters

1532 ----------

1533 mask : np.ndarray[bool]

1534 value : scalar or listlike

1535

1536 Returns

1537 -------

1538 same type as self

1539 """

1540 result = self.copy()

1541

1542 if is_list_like(value):

1543 val = value[~mask]

1544 else:

1545 val = value

1546

1547 result[~mask] = val

1548 return result

1549

1550 def _fill_mask_inplace(

1551 self, method: str, limit, mask: npt.NDArray[np.bool_]

1552 ) -> None:

1553 """

1554 Replace values in locations specified by 'mask' using pad or backfill.

1555

1556 See also

1557 --------

1558 ExtensionArray.fillna

1559 """

1560 func = missing.get_fill_func(method)

1561 npvalues = self.astype(object)

1562 # NB: if we don't copy mask here, it may be altered inplace, which

1563 # would mess up the `self[mask] = ...` below.

1564 func(npvalues, limit=limit, mask=mask.copy())

1565 new_values = self._from_sequence(npvalues, dtype=self.dtype)

1566 self[mask] = new_values[mask]

1567 return

1568

1569 def _rank(

1570 self,

1571 *,

1572 axis: int = 0,

1573 method: str = "average",

1574 na_option: str = "keep",

1575 ascending: bool = True,

1576 pct: bool = False,

1577 ):

1578 """

1579 See Series.rank.__doc__.

1580 """

1581 if axis != 0:

1582 raise NotImplementedError

1583

1584 # TODO: we only have tests that get here with dt64 and td64

1585 # TODO: all tests that get here use the defaults for all the kwds

1586 return rank(

1587 self,

1588 axis=axis,

1589 method=method,

1590 na_option=na_option,

1591 ascending=ascending,

1592 pct=pct,

1593 )

1594

1595 @classmethod

1596 def _empty(cls, shape: Shape, dtype: ExtensionDtype):

1597 """

1598 Create an ExtensionArray with the given shape and dtype.

1599

1600 See also

1601 --------

1602 ExtensionDtype.empty

1603 ExtensionDtype.empty is the 'official' public version of this API.

1604 """

1605 # Implementer note: while ExtensionDtype.empty is the public way to

1606 # call this method, it is still required to implement this `_empty`

1607 # method as well (it is called internally in pandas)

1608 obj = cls._from_sequence([], dtype=dtype)

1609

1610 taker = np.broadcast_to(np.intp(-1), shape)

1611 result = obj.take(taker, allow_fill=True)

1612 if not isinstance(result, cls) or dtype != result.dtype:

1613 raise NotImplementedError(

1614 f"Default 'empty' implementation is invalid for dtype='{dtype}'"

1615 )

1616 return result

1617

1618 def _quantile(

1619 self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str

1620 ) -> ExtensionArrayT:

1621 """

1622 Compute the quantiles of self for each quantile in `qs`.

1623

1624 Parameters

1625 ----------

1626 qs : np.ndarray[float64]

1627 interpolation: str

1628

1629 Returns

1630 -------

1631 same type as self

1632 """

1633 mask = np.asarray(self.isna())

1634 arr = np.asarray(self)

1635 fill_value = np.nan

1636

1637 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)

1638 return type(self)._from_sequence(res_values)

1639

1640 def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT:

1641 """

1642 Returns the mode(s) of the ExtensionArray.

1643

1644 Always returns `ExtensionArray` even if only one value.

1645

1646 Parameters

1647 ----------

1648 dropna : bool, default True

1649 Don't consider counts of NA values.

1650

1651 Returns

1652 -------

1653 same type as self

1654 Sorted, if possible.

1655 """

1656 # error: Incompatible return value type (got "Union[ExtensionArray,

1657 # ndarray[Any, Any]]", expected "ExtensionArrayT")

1658 return mode(self, dropna=dropna) # type: ignore[return-value]

1659

1660 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

1661 if any(

1662 isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs

1663 ):

1664 return NotImplemented

1665

1666 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(

1667 self, ufunc, method, *inputs, **kwargs

1668 )

1669 if result is not NotImplemented:

1670 return result

1671

1672 if "out" in kwargs:

1673 return arraylike.dispatch_ufunc_with_out(

1674 self, ufunc, method, *inputs, **kwargs

1675 )

1676

1677 if method == "reduce":

1678 result = arraylike.dispatch_reduction_ufunc(

1679 self, ufunc, method, *inputs, **kwargs

1680 )

1681 if result is not NotImplemented:

1682 return result

1683

1684 return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)

1685

1686

1687class ExtensionOpsMixin:

1688 """

1689 A base class for linking the operators to their dunder names.

1690

1691 .. note::

1692

1693 You may want to set ``__array_priority__`` if you want your

1694 implementation to be called when involved in binary operations

1695 with NumPy arrays.

1696 """

1697

1698 @classmethod

1699 def _create_arithmetic_method(cls, op):

1700 raise AbstractMethodError(cls)

1701

1702 @classmethod

1703 def _add_arithmetic_ops(cls):

1704 setattr(cls, "__add__", cls._create_arithmetic_method(operator.add))

1705 setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd))

1706 setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub))

1707 setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub))

1708 setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul))

1709 setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul))

1710 setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow))

1711 setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow))

1712 setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod))

1713 setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod))

1714 setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv))

1715 setattr(

1716 cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv)

1717 )

1718 setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv))

1719 setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv))

1720 setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod))

1721 setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod))

1722

1723 @classmethod

1724 def _create_comparison_method(cls, op):

1725 raise AbstractMethodError(cls)

1726

1727 @classmethod

1728 def _add_comparison_ops(cls):

1729 setattr(cls, "__eq__", cls._create_comparison_method(operator.eq))

1730 setattr(cls, "__ne__", cls._create_comparison_method(operator.ne))

1731 setattr(cls, "__lt__", cls._create_comparison_method(operator.lt))

1732 setattr(cls, "__gt__", cls._create_comparison_method(operator.gt))

1733 setattr(cls, "__le__", cls._create_comparison_method(operator.le))

1734 setattr(cls, "__ge__", cls._create_comparison_method(operator.ge))

1735

1736 @classmethod

1737 def _create_logical_method(cls, op):

1738 raise AbstractMethodError(cls)

1739

1740 @classmethod

1741 def _add_logical_ops(cls):

1742 setattr(cls, "__and__", cls._create_logical_method(operator.and_))

1743 setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_))

1744 setattr(cls, "__or__", cls._create_logical_method(operator.or_))

1745 setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_))

1746 setattr(cls, "__xor__", cls._create_logical_method(operator.xor))

1747 setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor))

1748

1749

1750class ExtensionScalarOpsMixin(ExtensionOpsMixin):

1751 """

1752 A mixin for defining ops on an ExtensionArray.

1753

1754 It is assumed that the underlying scalar objects have the operators

1755 already defined.

1756

1757 Notes

1758 -----

1759 If you have defined a subclass MyExtensionArray(ExtensionArray), then

1760 use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to

1761 get the arithmetic operators. After the definition of MyExtensionArray,

1762 insert the lines

1763

1764 MyExtensionArray._add_arithmetic_ops()

1765 MyExtensionArray._add_comparison_ops()

1766

1767 to link the operators to your class.

1768

1769 .. note::

1770

1771 You may want to set ``__array_priority__`` if you want your

1772 implementation to be called when involved in binary operations

1773 with NumPy arrays.

1774 """

1775

1776 @classmethod

1777 def _create_method(cls, op, coerce_to_dtype=True, result_dtype=None):

1778 """

1779 A class method that returns a method that will correspond to an

1780 operator for an ExtensionArray subclass, by dispatching to the

1781 relevant operator defined on the individual elements of the

1782 ExtensionArray.

1783

1784 Parameters

1785 ----------

1786 op : function

1787 An operator that takes arguments op(a, b)

1788 coerce_to_dtype : bool, default True

1789 boolean indicating whether to attempt to convert

1790 the result to the underlying ExtensionArray dtype.

1791 If it's not possible to create a new ExtensionArray with the

1792 values, an ndarray is returned instead.

1793

1794 Returns

1795 -------

1796 Callable[[Any, Any], Union[ndarray, ExtensionArray]]

1797 A method that can be bound to a class. When used, the method

1798 receives the two arguments, one of which is the instance of

1799 this class, and should return an ExtensionArray or an ndarray.

1800

1801 Returning an ndarray may be necessary when the result of the

1802 `op` cannot be stored in the ExtensionArray. The dtype of the

1803 ndarray uses NumPy's normal inference rules.

1804

1805 Examples

1806 --------

1807 Given an ExtensionArray subclass called MyExtensionArray, use

1808

1809 __add__ = cls._create_method(operator.add)

1810

1811 in the class definition of MyExtensionArray to create the operator

1812 for addition, that will be based on the operator implementation

1813 of the underlying elements of the ExtensionArray

1814 """

1815

1816 def _binop(self, other):

1817 def convert_values(param):

1818 if isinstance(param, ExtensionArray) or is_list_like(param):

1819 ovalues = param

1820 else: # Assume its an object

1821 ovalues = [param] * len(self)

1822 return ovalues

1823

1824 if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)):

1825 # rely on pandas to unbox and dispatch to us

1826 return NotImplemented

1827

1828 lvalues = self

1829 rvalues = convert_values(other)

1830

1831 # If the operator is not defined for the underlying objects,

1832 # a TypeError should be raised

1833 res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]

1834

1835 def _maybe_convert(arr):

1836 if coerce_to_dtype:

1837 # https://github.com/pandas-dev/pandas/issues/22850

1838 # We catch all regular exceptions here, and fall back

1839 # to an ndarray.

1840 res = maybe_cast_to_extension_array(type(self), arr)

1841 if not isinstance(res, type(self)):

1842 # exception raised in _from_sequence; ensure we have ndarray

1843 res = np.asarray(arr)

1844 else:

1845 res = np.asarray(arr, dtype=result_dtype)

1846 return res

1847

1848 if op.__name__ in {"divmod", "rdivmod"}:

1849 a, b = zip(*res)

1850 return _maybe_convert(a), _maybe_convert(b)

1851

1852 return _maybe_convert(res)

1853

1854 op_name = f"__{op.__name__}__"

1855 return set_function_name(_binop, op_name, cls)

1856

1857 @classmethod

1858 def _create_arithmetic_method(cls, op):

1859 return cls._create_method(op)

1860

1861 @classmethod

1862 def _create_comparison_method(cls, op):

1863 return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)