Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/base.py: 29%

388 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2An interface for extending pandas with custom arrays. 

3 

4.. warning:: 

5 

6 This is an experimental API and subject to breaking changes 

7 without warning. 

8""" 

9from __future__ import annotations 

10 

11import inspect 

12import operator 

13from typing import ( 

14 TYPE_CHECKING, 

15 Any, 

16 Callable, 

17 ClassVar, 

18 Iterator, 

19 Literal, 

20 Sequence, 

21 TypeVar, 

22 cast, 

23 overload, 

24) 

25import warnings 

26 

27import numpy as np 

28 

29from pandas._libs import lib 

30from pandas._typing import ( 

31 ArrayLike, 

32 AstypeArg, 

33 Dtype, 

34 FillnaOptions, 

35 PositionalIndexer, 

36 ScalarIndexer, 

37 SequenceIndexer, 

38 Shape, 

39 TakeIndexer, 

40 npt, 

41) 

42from pandas.compat import set_function_name 

43from pandas.compat.numpy import function as nv 

44from pandas.errors import AbstractMethodError 

45from pandas.util._decorators import ( 

46 Appender, 

47 Substitution, 

48 cache_readonly, 

49 deprecate_nonkeyword_arguments, 

50) 

51from pandas.util._exceptions import find_stack_level 

52from pandas.util._validators import ( 

53 validate_bool_kwarg, 

54 validate_fillna_kwargs, 

55 validate_insert_loc, 

56) 

57 

58from pandas.core.dtypes.cast import maybe_cast_to_extension_array 

59from pandas.core.dtypes.common import ( 

60 is_dtype_equal, 

61 is_list_like, 

62 is_scalar, 

63 pandas_dtype, 

64) 

65from pandas.core.dtypes.dtypes import ExtensionDtype 

66from pandas.core.dtypes.generic import ( 

67 ABCDataFrame, 

68 ABCIndex, 

69 ABCSeries, 

70) 

71from pandas.core.dtypes.missing import isna 

72 

73from pandas.core import ( 

74 arraylike, 

75 missing, 

76 roperator, 

77) 

78from pandas.core.algorithms import ( 

79 factorize_array, 

80 isin, 

81 mode, 

82 rank, 

83 resolve_na_sentinel, 

84 unique, 

85) 

86from pandas.core.array_algos.quantile import quantile_with_mask 

87from pandas.core.sorting import ( 

88 nargminmax, 

89 nargsort, 

90) 

91 

92if TYPE_CHECKING: 92 ↛ 94line 92 didn't jump to line 94, because the condition on line 92 was never true

93 

94 class ExtensionArraySupportsAnyAll("ExtensionArray"): 

95 def any(self, *, skipna: bool = True) -> bool: 

96 pass 

97 

98 def all(self, *, skipna: bool = True) -> bool: 

99 pass 

100 

101 from pandas._typing import ( 

102 NumpySorter, 

103 NumpyValueArrayLike, 

104 ) 

105 

106 

107_extension_array_shared_docs: dict[str, str] = {} 

108 

109ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray") 

110 

111 

112class ExtensionArray: 

113 """ 

114 Abstract base class for custom 1-D array types. 

115 

116 pandas will recognize instances of this class as proper arrays 

117 with a custom type and will not attempt to coerce them to objects. They 

118 may be stored directly inside a :class:`DataFrame` or :class:`Series`. 

119 

120 Attributes 

121 ---------- 

122 dtype 

123 nbytes 

124 ndim 

125 shape 

126 

127 Methods 

128 ------- 

129 argsort 

130 astype 

131 copy 

132 dropna 

133 factorize 

134 fillna 

135 equals 

136 insert 

137 isin 

138 isna 

139 ravel 

140 repeat 

141 searchsorted 

142 shift 

143 take 

144 tolist 

145 unique 

146 view 

147 _concat_same_type 

148 _formatter 

149 _from_factorized 

150 _from_sequence 

151 _from_sequence_of_strings 

152 _reduce 

153 _values_for_argsort 

154 _values_for_factorize 

155 

156 Notes 

157 ----- 

158 The interface includes the following abstract methods that must be 

159 implemented by subclasses: 

160 

161 * _from_sequence 

162 * _from_factorized 

163 * __getitem__ 

164 * __len__ 

165 * __eq__ 

166 * dtype 

167 * nbytes 

168 * isna 

169 * take 

170 * copy 

171 * _concat_same_type 

172 

173 A default repr displaying the type, (truncated) data, length, 

174 and dtype is provided. It can be customized or replaced by 

175 by overriding: 

176 

177 * __repr__ : A default repr for the ExtensionArray. 

178 * _formatter : Print scalars inside a Series or DataFrame. 

179 

180 Some methods require casting the ExtensionArray to an ndarray of Python 

181 objects with ``self.astype(object)``, which may be expensive. When 

182 performance is a concern, we highly recommend overriding the following 

183 methods: 

184 

185 * fillna 

186 * dropna 

187 * unique 

188 * factorize / _values_for_factorize 

189 * argsort, argmax, argmin / _values_for_argsort 

190 * searchsorted 

191 

192 The remaining methods implemented on this class should be performant, 

193 as they only compose abstract methods. Still, a more efficient 

194 implementation may be available, and these methods can be overridden. 

195 

196 One can implement methods to handle array reductions. 

197 

198 * _reduce 

199 

200 One can implement methods to handle parsing from strings that will be used 

201 in methods such as ``pandas.io.parsers.read_csv``. 

202 

203 * _from_sequence_of_strings 

204 

205 This class does not inherit from 'abc.ABCMeta' for performance reasons. 

206 Methods and properties required by the interface raise 

207 ``pandas.errors.AbstractMethodError`` and no ``register`` method is 

208 provided for registering virtual subclasses. 

209 

210 ExtensionArrays are limited to 1 dimension. 

211 

212 They may be backed by none, one, or many NumPy arrays. For example, 

213 ``pandas.Categorical`` is an extension array backed by two arrays, 

214 one for codes and one for categories. An array of IPv6 address may 

215 be backed by a NumPy structured array with two fields, one for the 

216 lower 64 bits and one for the upper 64 bits. Or they may be backed 

217 by some other storage type, like Python lists. Pandas makes no 

218 assumptions on how the data are stored, just that it can be converted 

219 to a NumPy array. 

220 The ExtensionArray interface does not impose any rules on how this data 

221 is stored. However, currently, the backing data cannot be stored in 

222 attributes called ``.values`` or ``._values`` to ensure full compatibility 

223 with pandas internals. But other names as ``.data``, ``._data``, 

224 ``._items``, ... can be freely used. 

225 

226 If implementing NumPy's ``__array_ufunc__`` interface, pandas expects 

227 that 

228 

229 1. You defer by returning ``NotImplemented`` when any Series are present 

230 in `inputs`. Pandas will extract the arrays and call the ufunc again. 

231 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class. 

232 Pandas inspect this to determine whether the ufunc is valid for the 

233 types present. 

234 

235 See :ref:`extending.extension.ufunc` for more. 

236 

237 By default, ExtensionArrays are not hashable. Immutable subclasses may 

238 override this behavior. 

239 """ 

240 

241 # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. 

242 # Don't override this. 

243 _typ = "extension" 

244 

245 # ------------------------------------------------------------------------ 

246 # Constructors 

247 # ------------------------------------------------------------------------ 

248 

249 @classmethod 

250 def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): 

251 """ 

252 Construct a new ExtensionArray from a sequence of scalars. 

253 

254 Parameters 

255 ---------- 

256 scalars : Sequence 

257 Each element will be an instance of the scalar type for this 

258 array, ``cls.dtype.type`` or be converted into this type in this method. 

259 dtype : dtype, optional 

260 Construct for this particular dtype. This should be a Dtype 

261 compatible with the ExtensionArray. 

262 copy : bool, default False 

263 If True, copy the underlying data. 

264 

265 Returns 

266 ------- 

267 ExtensionArray 

268 """ 

269 raise AbstractMethodError(cls) 

270 

271 @classmethod 

272 def _from_sequence_of_strings( 

273 cls, strings, *, dtype: Dtype | None = None, copy=False 

274 ): 

275 """ 

276 Construct a new ExtensionArray from a sequence of strings. 

277 

278 Parameters 

279 ---------- 

280 strings : Sequence 

281 Each element will be an instance of the scalar type for this 

282 array, ``cls.dtype.type``. 

283 dtype : dtype, optional 

284 Construct for this particular dtype. This should be a Dtype 

285 compatible with the ExtensionArray. 

286 copy : bool, default False 

287 If True, copy the underlying data. 

288 

289 Returns 

290 ------- 

291 ExtensionArray 

292 """ 

293 raise AbstractMethodError(cls) 

294 

295 @classmethod 

296 def _from_factorized(cls, values, original): 

297 """ 

298 Reconstruct an ExtensionArray after factorization. 

299 

300 Parameters 

301 ---------- 

302 values : ndarray 

303 An integer ndarray with the factorized values. 

304 original : ExtensionArray 

305 The original ExtensionArray that factorize was called on. 

306 

307 See Also 

308 -------- 

309 factorize : Top-level factorize method that dispatches here. 

310 ExtensionArray.factorize : Encode the extension array as an enumerated type. 

311 """ 

312 raise AbstractMethodError(cls) 

313 

314 # ------------------------------------------------------------------------ 

315 # Must be a Sequence 

316 # ------------------------------------------------------------------------ 

317 @overload 

318 def __getitem__(self, item: ScalarIndexer) -> Any: 

319 ... 

320 

321 @overload 

322 def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT: 

323 ... 

324 

325 def __getitem__( 

326 self: ExtensionArrayT, item: PositionalIndexer 

327 ) -> ExtensionArrayT | Any: 

328 """ 

329 Select a subset of self. 

330 

331 Parameters 

332 ---------- 

333 item : int, slice, or ndarray 

334 * int: The position in 'self' to get. 

335 

336 * slice: A slice object, where 'start', 'stop', and 'step' are 

337 integers or None 

338 

339 * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' 

340 

341 * list[int]: A list of int 

342 

343 Returns 

344 ------- 

345 item : scalar or ExtensionArray 

346 

347 Notes 

348 ----- 

349 For scalar ``item``, return a scalar value suitable for the array's 

350 type. This should be an instance of ``self.dtype.type``. 

351 

352 For slice ``key``, return an instance of ``ExtensionArray``, even 

353 if the slice is length 0 or 1. 

354 

355 For a boolean mask, return an instance of ``ExtensionArray``, filtered 

356 to the values where ``item`` is True. 

357 """ 

358 raise AbstractMethodError(self) 

359 

360 def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: 

361 """ 

362 Set one or more values inplace. 

363 

364 This method is not required to satisfy the pandas extension array 

365 interface. 

366 

367 Parameters 

368 ---------- 

369 key : int, ndarray, or slice 

370 When called from, e.g. ``Series.__setitem__``, ``key`` will be 

371 one of 

372 

373 * scalar int 

374 * ndarray of integers. 

375 * boolean ndarray 

376 * slice object 

377 

378 value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object 

379 value or values to be set of ``key``. 

380 

381 Returns 

382 ------- 

383 None 

384 """ 

385 # Some notes to the ExtensionArray implementor who may have ended up 

386 # here. While this method is not required for the interface, if you 

387 # *do* choose to implement __setitem__, then some semantics should be 

388 # observed: 

389 # 

390 # * Setting multiple values : ExtensionArrays should support setting 

391 # multiple values at once, 'key' will be a sequence of integers and 

392 # 'value' will be a same-length sequence. 

393 # 

394 # * Broadcasting : For a sequence 'key' and a scalar 'value', 

395 # each position in 'key' should be set to 'value'. 

396 # 

397 # * Coercion : Most users will expect basic coercion to work. For 

398 # example, a string like '2018-01-01' is coerced to a datetime 

399 # when setting on a datetime64ns array. In general, if the 

400 # __init__ method coerces that value, then so should __setitem__ 

401 # Note, also, that Series/DataFrame.where internally use __setitem__ 

402 # on a copy of the data. 

403 raise NotImplementedError(f"{type(self)} does not implement __setitem__.") 

404 

405 def __len__(self) -> int: 

406 """ 

407 Length of this array 

408 

409 Returns 

410 ------- 

411 length : int 

412 """ 

413 raise AbstractMethodError(self) 

414 

415 def __iter__(self) -> Iterator[Any]: 

416 """ 

417 Iterate over elements of the array. 

418 """ 

419 # This needs to be implemented so that pandas recognizes extension 

420 # arrays as list-like. The default implementation makes successive 

421 # calls to ``__getitem__``, which may be slower than necessary. 

422 for i in range(len(self)): 

423 yield self[i] 

424 

425 def __contains__(self, item: object) -> bool | np.bool_: 

426 """ 

427 Return for `item in self`. 

428 """ 

429 # GH37867 

430 # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA] 

431 # would raise a TypeError. The implementation below works around that. 

432 if is_scalar(item) and isna(item): 

433 if not self._can_hold_na: 

434 return False 

435 elif item is self.dtype.na_value or isinstance(item, self.dtype.type): 

436 return self._hasna 

437 else: 

438 return False 

439 else: 

440 # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no 

441 # attribute "any" 

442 return (item == self).any() # type: ignore[union-attr] 

443 

444 # error: Signature of "__eq__" incompatible with supertype "object" 

445 def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override] 

446 """ 

447 Return for `self == other` (element-wise equality). 

448 """ 

449 # Implementer note: this should return a boolean numpy ndarray or 

450 # a boolean ExtensionArray. 

451 # When `other` is one of Series, Index, or DataFrame, this method should 

452 # return NotImplemented (to ensure that those objects are responsible for 

453 # first unpacking the arrays, and then dispatch the operation to the 

454 # underlying arrays) 

455 raise AbstractMethodError(self) 

456 

457 # error: Signature of "__ne__" incompatible with supertype "object" 

458 def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] 

459 """ 

460 Return for `self != other` (element-wise in-equality). 

461 """ 

462 return ~(self == other) 

463 

464 def __init_subclass__(cls, **kwargs) -> None: 

465 factorize = getattr(cls, "factorize") 

466 if ( 466 ↛ 472line 466 didn't jump to line 472

467 "use_na_sentinel" not in inspect.signature(factorize).parameters 

468 # TimelikeOps uses old factorize args to ensure we don't break things 

469 and cls.__name__ not in ("TimelikeOps", "DatetimeArray", "TimedeltaArray") 

470 ): 

471 # See GH#46910 for details on the deprecation 

472 name = cls.__name__ 

473 warnings.warn( 

474 f"The `na_sentinel` argument of `{name}.factorize` is deprecated. " 

475 f"In the future, pandas will use the `use_na_sentinel` argument " 

476 f"instead. Add this argument to `{name}.factorize` to be compatible " 

477 f"with future versions of pandas and silence this warning.", 

478 DeprecationWarning, 

479 stacklevel=find_stack_level(), 

480 ) 

481 

482 def to_numpy( 

483 self, 

484 dtype: npt.DTypeLike | None = None, 

485 copy: bool = False, 

486 na_value: object = lib.no_default, 

487 ) -> np.ndarray: 

488 """ 

489 Convert to a NumPy ndarray. 

490 

491 .. versionadded:: 1.0.0 

492 

493 This is similar to :meth:`numpy.asarray`, but may provide additional control 

494 over how the conversion is done. 

495 

496 Parameters 

497 ---------- 

498 dtype : str or numpy.dtype, optional 

499 The dtype to pass to :meth:`numpy.asarray`. 

500 copy : bool, default False 

501 Whether to ensure that the returned value is a not a view on 

502 another array. Note that ``copy=False`` does not *ensure* that 

503 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that 

504 a copy is made, even if not strictly necessary. 

505 na_value : Any, optional 

506 The value to use for missing values. The default value depends 

507 on `dtype` and the type of the array. 

508 

509 Returns 

510 ------- 

511 numpy.ndarray 

512 """ 

513 result = np.asarray(self, dtype=dtype) 

514 if copy or na_value is not lib.no_default: 

515 result = result.copy() 

516 if na_value is not lib.no_default: 

517 result[self.isna()] = na_value 

518 return result 

519 

520 # ------------------------------------------------------------------------ 

521 # Required attributes 

522 # ------------------------------------------------------------------------ 

523 

524 @property 

525 def dtype(self) -> ExtensionDtype: 

526 """ 

527 An instance of 'ExtensionDtype'. 

528 """ 

529 raise AbstractMethodError(self) 

530 

531 @property 

532 def shape(self) -> Shape: 

533 """ 

534 Return a tuple of the array dimensions. 

535 """ 

536 return (len(self),) 

537 

538 @property 

539 def size(self) -> int: 

540 """ 

541 The number of elements in the array. 

542 """ 

543 # error: Incompatible return value type (got "signedinteger[_64Bit]", 

544 # expected "int") [return-value] 

545 return np.prod(self.shape) # type: ignore[return-value] 

546 

547 @property 

548 def ndim(self) -> int: 

549 """ 

550 Extension Arrays are only allowed to be 1-dimensional. 

551 """ 

552 return 1 

553 

554 @property 

555 def nbytes(self) -> int: 

556 """ 

557 The number of bytes needed to store this object in memory. 

558 """ 

559 # If this is expensive to compute, return an approximate lower bound 

560 # on the number of bytes needed. 

561 raise AbstractMethodError(self) 

562 

563 # ------------------------------------------------------------------------ 

564 # Additional Methods 

565 # ------------------------------------------------------------------------ 

566 

567 @overload 

568 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: 

569 ... 

570 

571 @overload 

572 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: 

573 ... 

574 

575 @overload 

576 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: 

577 ... 

578 

579 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: 

580 """ 

581 Cast to a NumPy array or ExtensionArray with 'dtype'. 

582 

583 Parameters 

584 ---------- 

585 dtype : str or dtype 

586 Typecode or data-type to which the array is cast. 

587 copy : bool, default True 

588 Whether to copy the data, even if not necessary. If False, 

589 a copy is made only if the old dtype does not match the 

590 new dtype. 

591 

592 Returns 

593 ------- 

594 array : np.ndarray or ExtensionArray 

595 An ExtensionArray if dtype is ExtensionDtype, 

596 Otherwise a NumPy ndarray with 'dtype' for its dtype. 

597 """ 

598 

599 dtype = pandas_dtype(dtype) 

600 if is_dtype_equal(dtype, self.dtype): 

601 if not copy: 

602 return self 

603 else: 

604 return self.copy() 

605 

606 if isinstance(dtype, ExtensionDtype): 

607 cls = dtype.construct_array_type() 

608 return cls._from_sequence(self, dtype=dtype, copy=copy) 

609 

610 return np.array(self, dtype=dtype, copy=copy) 

611 

612 def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll: 

613 """ 

614 A 1-D array indicating if each value is missing. 

615 

616 Returns 

617 ------- 

618 na_values : Union[np.ndarray, ExtensionArray] 

619 In most cases, this should return a NumPy ndarray. For 

620 exceptional cases like ``SparseArray``, where returning 

621 an ndarray would be expensive, an ExtensionArray may be 

622 returned. 

623 

624 Notes 

625 ----- 

626 If returning an ExtensionArray, then 

627 

628 * ``na_values._is_boolean`` should be True 

629 * `na_values` should implement :func:`ExtensionArray._reduce` 

630 * ``na_values.any`` and ``na_values.all`` should be implemented 

631 """ 

632 raise AbstractMethodError(self) 

633 

634 @property 

635 def _hasna(self) -> bool: 

636 # GH#22680 

637 """ 

638 Equivalent to `self.isna().any()`. 

639 

640 Some ExtensionArray subclasses may be able to optimize this check. 

641 """ 

642 return bool(self.isna().any()) 

643 

644 def _values_for_argsort(self) -> np.ndarray: 

645 """ 

646 Return values for sorting. 

647 

648 Returns 

649 ------- 

650 ndarray 

651 The transformed values should maintain the ordering between values 

652 within the array. 

653 

654 See Also 

655 -------- 

656 ExtensionArray.argsort : Return the indices that would sort this array. 

657 

658 Notes 

659 ----- 

660 The caller is responsible for *not* modifying these values in-place, so 

661 it is safe for implementors to give views on `self`. 

662 

663 Functions that use this (e.g. ExtensionArray.argsort) should ignore 

664 entries with missing values in the original array (according to `self.isna()`). 

665 This means that the corresponding entries in the returned array don't need to 

666 be modified to sort correctly. 

667 """ 

668 # Note: this is used in `ExtensionArray.argsort/argmin/argmax`. 

669 return np.array(self) 

670 

671 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) 

672 def argsort( 

673 self, 

674 ascending: bool = True, 

675 kind: str = "quicksort", 

676 na_position: str = "last", 

677 *args, 

678 **kwargs, 

679 ) -> np.ndarray: 

680 """ 

681 Return the indices that would sort this array. 

682 

683 Parameters 

684 ---------- 

685 ascending : bool, default True 

686 Whether the indices should result in an ascending 

687 or descending sort. 

688 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional 

689 Sorting algorithm. 

690 *args, **kwargs: 

691 Passed through to :func:`numpy.argsort`. 

692 

693 Returns 

694 ------- 

695 np.ndarray[np.intp] 

696 Array of indices that sort ``self``. If NaN values are contained, 

697 NaN values are placed at the end. 

698 

699 See Also 

700 -------- 

701 numpy.argsort : Sorting implementation used internally. 

702 """ 

703 # Implementor note: You have two places to override the behavior of 

704 # argsort. 

705 # 1. _values_for_argsort : construct the values passed to np.argsort 

706 # 2. argsort : total control over sorting. In case of overriding this, 

707 # it is recommended to also override argmax/argmin 

708 ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) 

709 

710 values = self._values_for_argsort() 

711 return nargsort( 

712 values, 

713 kind=kind, 

714 ascending=ascending, 

715 na_position=na_position, 

716 mask=np.asarray(self.isna()), 

717 ) 

718 

719 def argmin(self, skipna: bool = True) -> int: 

720 """ 

721 Return the index of minimum value. 

722 

723 In case of multiple occurrences of the minimum value, the index 

724 corresponding to the first occurrence is returned. 

725 

726 Parameters 

727 ---------- 

728 skipna : bool, default True 

729 

730 Returns 

731 ------- 

732 int 

733 

734 See Also 

735 -------- 

736 ExtensionArray.argmax 

737 """ 

738 # Implementor note: You have two places to override the behavior of 

739 # argmin. 

740 # 1. _values_for_argsort : construct the values used in nargminmax 

741 # 2. argmin itself : total control over sorting. 

742 validate_bool_kwarg(skipna, "skipna") 

743 if not skipna and self._hasna: 

744 raise NotImplementedError 

745 return nargminmax(self, "argmin") 

746 

747 def argmax(self, skipna: bool = True) -> int: 

748 """ 

749 Return the index of maximum value. 

750 

751 In case of multiple occurrences of the maximum value, the index 

752 corresponding to the first occurrence is returned. 

753 

754 Parameters 

755 ---------- 

756 skipna : bool, default True 

757 

758 Returns 

759 ------- 

760 int 

761 

762 See Also 

763 -------- 

764 ExtensionArray.argmin 

765 """ 

766 # Implementor note: You have two places to override the behavior of 

767 # argmax. 

768 # 1. _values_for_argsort : construct the values used in nargminmax 

769 # 2. argmax itself : total control over sorting. 

770 validate_bool_kwarg(skipna, "skipna") 

771 if not skipna and self._hasna: 

772 raise NotImplementedError 

773 return nargminmax(self, "argmax") 

774 

775 def fillna( 

776 self: ExtensionArrayT, 

777 value: object | ArrayLike | None = None, 

778 method: FillnaOptions | None = None, 

779 limit: int | None = None, 

780 ) -> ExtensionArrayT: 

781 """ 

782 Fill NA/NaN values using the specified method. 

783 

784 Parameters 

785 ---------- 

786 value : scalar, array-like 

787 If a scalar value is passed it is used to fill all missing values. 

788 Alternatively, an array-like 'value' can be given. It's expected 

789 that the array-like have the same length as 'self'. 

790 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None 

791 Method to use for filling holes in reindexed Series 

792 pad / ffill: propagate last valid observation forward to next valid 

793 backfill / bfill: use NEXT valid observation to fill gap. 

794 limit : int, default None 

795 If method is specified, this is the maximum number of consecutive 

796 NaN values to forward/backward fill. In other words, if there is 

797 a gap with more than this number of consecutive NaNs, it will only 

798 be partially filled. If method is not specified, this is the 

799 maximum number of entries along the entire axis where NaNs will be 

800 filled. 

801 

802 Returns 

803 ------- 

804 ExtensionArray 

805 With NA/NaN filled. 

806 """ 

807 value, method = validate_fillna_kwargs(value, method) 

808 

809 mask = self.isna() 

810 # error: Argument 2 to "check_value_size" has incompatible type 

811 # "ExtensionArray"; expected "ndarray" 

812 value = missing.check_value_size( 

813 value, mask, len(self) # type: ignore[arg-type] 

814 ) 

815 

816 if mask.any(): 

817 if method is not None: 

818 func = missing.get_fill_func(method) 

819 npvalues = self.astype(object) 

820 func(npvalues, limit=limit, mask=mask) 

821 new_values = self._from_sequence(npvalues, dtype=self.dtype) 

822 else: 

823 # fill with value 

824 new_values = self.copy() 

825 new_values[mask] = value 

826 else: 

827 new_values = self.copy() 

828 return new_values 

829 

830 def dropna(self: ExtensionArrayT) -> ExtensionArrayT: 

831 """ 

832 Return ExtensionArray without NA values. 

833 

834 Returns 

835 ------- 

836 valid : ExtensionArray 

837 """ 

838 # error: Unsupported operand type for ~ ("ExtensionArray") 

839 return self[~self.isna()] # type: ignore[operator] 

840 

841 def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray: 

842 """ 

843 Shift values by desired number. 

844 

845 Newly introduced missing values are filled with 

846 ``self.dtype.na_value``. 

847 

848 Parameters 

849 ---------- 

850 periods : int, default 1 

851 The number of periods to shift. Negative values are allowed 

852 for shifting backwards. 

853 

854 fill_value : object, optional 

855 The scalar value to use for newly introduced missing values. 

856 The default is ``self.dtype.na_value``. 

857 

858 Returns 

859 ------- 

860 ExtensionArray 

861 Shifted. 

862 

863 Notes 

864 ----- 

865 If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is 

866 returned. 

867 

868 If ``periods > len(self)``, then an array of size 

869 len(self) is returned, with all values filled with 

870 ``self.dtype.na_value``. 

871 """ 

872 # Note: this implementation assumes that `self.dtype.na_value` can be 

873 # stored in an instance of your ExtensionArray with `self.dtype`. 

874 if not len(self) or periods == 0: 

875 return self.copy() 

876 

877 if isna(fill_value): 

878 fill_value = self.dtype.na_value 

879 

880 empty = self._from_sequence( 

881 [fill_value] * min(abs(periods), len(self)), dtype=self.dtype 

882 ) 

883 if periods > 0: 

884 a = empty 

885 b = self[:-periods] 

886 else: 

887 a = self[abs(periods) :] 

888 b = empty 

889 return self._concat_same_type([a, b]) 

890 

891 def unique(self: ExtensionArrayT) -> ExtensionArrayT: 

892 """ 

893 Compute the ExtensionArray of unique values. 

894 

895 Returns 

896 ------- 

897 uniques : ExtensionArray 

898 """ 

899 uniques = unique(self.astype(object)) 

900 return self._from_sequence(uniques, dtype=self.dtype) 

901 

902 def searchsorted( 

903 self, 

904 value: NumpyValueArrayLike | ExtensionArray, 

905 side: Literal["left", "right"] = "left", 

906 sorter: NumpySorter = None, 

907 ) -> npt.NDArray[np.intp] | np.intp: 

908 """ 

909 Find indices where elements should be inserted to maintain order. 

910 

911 Find the indices into a sorted array `self` (a) such that, if the 

912 corresponding elements in `value` were inserted before the indices, 

913 the order of `self` would be preserved. 

914 

915 Assuming that `self` is sorted: 

916 

917 ====== ================================ 

918 `side` returned index `i` satisfies 

919 ====== ================================ 

920 left ``self[i-1] < value <= self[i]`` 

921 right ``self[i-1] <= value < self[i]`` 

922 ====== ================================ 

923 

924 Parameters 

925 ---------- 

926 value : array-like, list or scalar 

927 Value(s) to insert into `self`. 

928 side : {'left', 'right'}, optional 

929 If 'left', the index of the first suitable location found is given. 

930 If 'right', return the last such index. If there is no suitable 

931 index, return either 0 or N (where N is the length of `self`). 

932 sorter : 1-D array-like, optional 

933 Optional array of integer indices that sort array a into ascending 

934 order. They are typically the result of argsort. 

935 

936 Returns 

937 ------- 

938 array of ints or int 

939 If value is array-like, array of insertion points. 

940 If value is scalar, a single integer. 

941 

942 See Also 

943 -------- 

944 numpy.searchsorted : Similar method from NumPy. 

945 """ 

946 # Note: the base tests provided by pandas only test the basics. 

947 # We do not test 

948 # 1. Values outside the range of the `data_for_sorting` fixture 

949 # 2. Values between the values in the `data_for_sorting` fixture 

950 # 3. Missing values. 

951 arr = self.astype(object) 

952 if isinstance(value, ExtensionArray): 

953 value = value.astype(object) 

954 return arr.searchsorted(value, side=side, sorter=sorter) 

955 

956 def equals(self, other: object) -> bool: 

957 """ 

958 Return if another array is equivalent to this array. 

959 

960 Equivalent means that both arrays have the same shape and dtype, and 

961 all values compare equal. Missing values in the same location are 

962 considered equal (in contrast with normal equality). 

963 

964 Parameters 

965 ---------- 

966 other : ExtensionArray 

967 Array to compare to this Array. 

968 

969 Returns 

970 ------- 

971 boolean 

972 Whether the arrays are equivalent. 

973 """ 

974 if type(self) != type(other): 

975 return False 

976 other = cast(ExtensionArray, other) 

977 if not is_dtype_equal(self.dtype, other.dtype): 

978 return False 

979 elif len(self) != len(other): 

980 return False 

981 else: 

982 equal_values = self == other 

983 if isinstance(equal_values, ExtensionArray): 

984 # boolean array with NA -> fill with False 

985 equal_values = equal_values.fillna(False) 

986 # error: Unsupported left operand type for & ("ExtensionArray") 

987 equal_na = self.isna() & other.isna() # type: ignore[operator] 

988 return bool((equal_values | equal_na).all()) 

989 

990 def isin(self, values) -> npt.NDArray[np.bool_]: 

991 """ 

992 Pointwise comparison for set containment in the given values. 

993 

994 Roughly equivalent to `np.array([x in values for x in self])` 

995 

996 Parameters 

997 ---------- 

998 values : Sequence 

999 

1000 Returns 

1001 ------- 

1002 np.ndarray[bool] 

1003 """ 

1004 return isin(np.asarray(self), values) 

1005 

1006 def _values_for_factorize(self) -> tuple[np.ndarray, Any]: 

1007 """ 

1008 Return an array and missing value suitable for factorization. 

1009 

1010 Returns 

1011 ------- 

1012 values : ndarray 

1013 

1014 An array suitable for factorization. This should maintain order 

1015 and be a supported dtype (Float64, Int64, UInt64, String, Object). 

1016 By default, the extension array is cast to object dtype. 

1017 na_value : object 

1018 The value in `values` to consider missing. This will be treated 

1019 as NA in the factorization routines, so it will be coded as 

1020 `na_sentinel` and not included in `uniques`. By default, 

1021 ``np.nan`` is used. 

1022 

1023 Notes 

1024 ----- 

1025 The values returned by this method are also used in 

1026 :func:`pandas.util.hash_pandas_object`. 

1027 """ 

1028 return self.astype(object), np.nan 

1029 

1030 def factorize( 

1031 self, 

1032 na_sentinel: int | lib.NoDefault = lib.no_default, 

1033 use_na_sentinel: bool | lib.NoDefault = lib.no_default, 

1034 ) -> tuple[np.ndarray, ExtensionArray]: 

1035 """ 

1036 Encode the extension array as an enumerated type. 

1037 

1038 Parameters 

1039 ---------- 

1040 na_sentinel : int, default -1 

1041 Value to use in the `codes` array to indicate missing values. 

1042 

1043 .. deprecated:: 1.5.0 

1044 The na_sentinel argument is deprecated and 

1045 will be removed in a future version of pandas. Specify use_na_sentinel 

1046 as either True or False. 

1047 

1048 use_na_sentinel : bool, default True 

1049 If True, the sentinel -1 will be used for NaN values. If False, 

1050 NaN values will be encoded as non-negative integers and will not drop the 

1051 NaN from the uniques of the values. 

1052 

1053 .. versionadded:: 1.5.0 

1054 

1055 Returns 

1056 ------- 

1057 codes : ndarray 

1058 An integer NumPy array that's an indexer into the original 

1059 ExtensionArray. 

1060 uniques : ExtensionArray 

1061 An ExtensionArray containing the unique values of `self`. 

1062 

1063 .. note:: 

1064 

1065 uniques will *not* contain an entry for the NA value of 

1066 the ExtensionArray if there are any missing values present 

1067 in `self`. 

1068 

1069 See Also 

1070 -------- 

1071 factorize : Top-level factorize method that dispatches here. 

1072 

1073 Notes 

1074 ----- 

1075 :meth:`pandas.factorize` offers a `sort` keyword as well. 

1076 """ 

1077 # Implementer note: There are two ways to override the behavior of 

1078 # pandas.factorize 

1079 # 1. _values_for_factorize and _from_factorize. 

1080 # Specify the values passed to pandas' internal factorization 

1081 # routines, and how to convert from those values back to the 

1082 # original ExtensionArray. 

1083 # 2. ExtensionArray.factorize. 

1084 # Complete control over factorization. 

1085 resolved_na_sentinel = resolve_na_sentinel(na_sentinel, use_na_sentinel) 

1086 arr, na_value = self._values_for_factorize() 

1087 

1088 codes, uniques = factorize_array( 

1089 arr, na_sentinel=resolved_na_sentinel, na_value=na_value 

1090 ) 

1091 

1092 uniques_ea = self._from_factorized(uniques, self) 

1093 return codes, uniques_ea 

1094 

1095 _extension_array_shared_docs[ 

1096 "repeat" 

1097 ] = """ 

1098 Repeat elements of a %(klass)s. 

1099 

1100 Returns a new %(klass)s where each element of the current %(klass)s 

1101 is repeated consecutively a given number of times. 

1102 

1103 Parameters 

1104 ---------- 

1105 repeats : int or array of ints 

1106 The number of repetitions for each element. This should be a 

1107 non-negative integer. Repeating 0 times will return an empty 

1108 %(klass)s. 

1109 axis : None 

1110 Must be ``None``. Has no effect but is accepted for compatibility 

1111 with numpy. 

1112 

1113 Returns 

1114 ------- 

1115 repeated_array : %(klass)s 

1116 Newly created %(klass)s with repeated elements. 

1117 

1118 See Also 

1119 -------- 

1120 Series.repeat : Equivalent function for Series. 

1121 Index.repeat : Equivalent function for Index. 

1122 numpy.repeat : Similar method for :class:`numpy.ndarray`. 

1123 ExtensionArray.take : Take arbitrary positions. 

1124 

1125 Examples 

1126 -------- 

1127 >>> cat = pd.Categorical(['a', 'b', 'c']) 

1128 >>> cat 

1129 ['a', 'b', 'c'] 

1130 Categories (3, object): ['a', 'b', 'c'] 

1131 >>> cat.repeat(2) 

1132 ['a', 'a', 'b', 'b', 'c', 'c'] 

1133 Categories (3, object): ['a', 'b', 'c'] 

1134 >>> cat.repeat([1, 2, 3]) 

1135 ['a', 'b', 'b', 'c', 'c', 'c'] 

1136 Categories (3, object): ['a', 'b', 'c'] 

1137 """ 

1138 

1139 @Substitution(klass="ExtensionArray") 

1140 @Appender(_extension_array_shared_docs["repeat"]) 

1141 def repeat( 

1142 self: ExtensionArrayT, repeats: int | Sequence[int], axis: int | None = None 

1143 ) -> ExtensionArrayT: 

1144 nv.validate_repeat((), {"axis": axis}) 

1145 ind = np.arange(len(self)).repeat(repeats) 

1146 return self.take(ind) 

1147 

1148 # ------------------------------------------------------------------------ 

1149 # Indexing methods 

1150 # ------------------------------------------------------------------------ 

1151 

1152 def take( 

1153 self: ExtensionArrayT, 

1154 indices: TakeIndexer, 

1155 *, 

1156 allow_fill: bool = False, 

1157 fill_value: Any = None, 

1158 ) -> ExtensionArrayT: 

1159 """ 

1160 Take elements from an array. 

1161 

1162 Parameters 

1163 ---------- 

1164 indices : sequence of int or one-dimensional np.ndarray of int 

1165 Indices to be taken. 

1166 allow_fill : bool, default False 

1167 How to handle negative values in `indices`. 

1168 

1169 * False: negative values in `indices` indicate positional indices 

1170 from the right (the default). This is similar to 

1171 :func:`numpy.take`. 

1172 

1173 * True: negative values in `indices` indicate 

1174 missing values. These values are set to `fill_value`. Any other 

1175 other negative values raise a ``ValueError``. 

1176 

1177 fill_value : any, optional 

1178 Fill value to use for NA-indices when `allow_fill` is True. 

1179 This may be ``None``, in which case the default NA value for 

1180 the type, ``self.dtype.na_value``, is used. 

1181 

1182 For many ExtensionArrays, there will be two representations of 

1183 `fill_value`: a user-facing "boxed" scalar, and a low-level 

1184 physical NA value. `fill_value` should be the user-facing version, 

1185 and the implementation should handle translating that to the 

1186 physical version for processing the take if necessary. 

1187 

1188 Returns 

1189 ------- 

1190 ExtensionArray 

1191 

1192 Raises 

1193 ------ 

1194 IndexError 

1195 When the indices are out of bounds for the array. 

1196 ValueError 

1197 When `indices` contains negative values other than ``-1`` 

1198 and `allow_fill` is True. 

1199 

1200 See Also 

1201 -------- 

1202 numpy.take : Take elements from an array along an axis. 

1203 api.extensions.take : Take elements from an array. 

1204 

1205 Notes 

1206 ----- 

1207 ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, 

1208 ``iloc``, when `indices` is a sequence of values. Additionally, 

1209 it's called by :meth:`Series.reindex`, or any other method 

1210 that causes realignment, with a `fill_value`. 

1211 

1212 Examples 

1213 -------- 

1214 Here's an example implementation, which relies on casting the 

1215 extension array to object dtype. This uses the helper method 

1216 :func:`pandas.api.extensions.take`. 

1217 

1218 .. code-block:: python 

1219 

1220 def take(self, indices, allow_fill=False, fill_value=None): 

1221 from pandas.core.algorithms import take 

1222 

1223 # If the ExtensionArray is backed by an ndarray, then 

1224 # just pass that here instead of coercing to object. 

1225 data = self.astype(object) 

1226 

1227 if allow_fill and fill_value is None: 

1228 fill_value = self.dtype.na_value 

1229 

1230 # fill value should always be translated from the scalar 

1231 # type for the array, to the physical storage type for 

1232 # the data, before passing to take. 

1233 

1234 result = take(data, indices, fill_value=fill_value, 

1235 allow_fill=allow_fill) 

1236 return self._from_sequence(result, dtype=self.dtype) 

1237 """ 

1238 # Implementer note: The `fill_value` parameter should be a user-facing 

1239 # value, an instance of self.dtype.type. When passed `fill_value=None`, 

1240 # the default of `self.dtype.na_value` should be used. 

1241 # This may differ from the physical storage type your ExtensionArray 

1242 # uses. In this case, your implementation is responsible for casting 

1243 # the user-facing type to the storage type, before using 

1244 # pandas.api.extensions.take 

1245 raise AbstractMethodError(self) 

1246 

1247 def copy(self: ExtensionArrayT) -> ExtensionArrayT: 

1248 """ 

1249 Return a copy of the array. 

1250 

1251 Returns 

1252 ------- 

1253 ExtensionArray 

1254 """ 

1255 raise AbstractMethodError(self) 

1256 

1257 def view(self, dtype: Dtype | None = None) -> ArrayLike: 

1258 """ 

1259 Return a view on the array. 

1260 

1261 Parameters 

1262 ---------- 

1263 dtype : str, np.dtype, or ExtensionDtype, optional 

1264 Default None. 

1265 

1266 Returns 

1267 ------- 

1268 ExtensionArray or np.ndarray 

1269 A view on the :class:`ExtensionArray`'s data. 

1270 """ 

1271 # NB: 

1272 # - This must return a *new* object referencing the same data, not self. 

1273 # - The only case that *must* be implemented is with dtype=None, 

1274 # giving a view with the same dtype as self. 

1275 if dtype is not None: 

1276 raise NotImplementedError(dtype) 

1277 return self[:] 

1278 

1279 # ------------------------------------------------------------------------ 

1280 # Printing 

1281 # ------------------------------------------------------------------------ 

1282 

1283 def __repr__(self) -> str: 

1284 if self.ndim > 1: 

1285 return self._repr_2d() 

1286 

1287 from pandas.io.formats.printing import format_object_summary 

1288 

1289 # the short repr has no trailing newline, while the truncated 

1290 # repr does. So we include a newline in our template, and strip 

1291 # any trailing newlines from format_object_summary 

1292 data = format_object_summary( 

1293 self, self._formatter(), indent_for_name=False 

1294 ).rstrip(", \n") 

1295 class_name = f"<{type(self).__name__}>\n" 

1296 return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" 

1297 

1298 def _repr_2d(self) -> str: 

1299 from pandas.io.formats.printing import format_object_summary 

1300 

1301 # the short repr has no trailing newline, while the truncated 

1302 # repr does. So we include a newline in our template, and strip 

1303 # any trailing newlines from format_object_summary 

1304 lines = [ 

1305 format_object_summary(x, self._formatter(), indent_for_name=False).rstrip( 

1306 ", \n" 

1307 ) 

1308 for x in self 

1309 ] 

1310 data = ",\n".join(lines) 

1311 class_name = f"<{type(self).__name__}>" 

1312 return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}" 

1313 

1314 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: 

1315 """ 

1316 Formatting function for scalar values. 

1317 

1318 This is used in the default '__repr__'. The returned formatting 

1319 function receives instances of your scalar type. 

1320 

1321 Parameters 

1322 ---------- 

1323 boxed : bool, default False 

1324 An indicated for whether or not your array is being printed 

1325 within a Series, DataFrame, or Index (True), or just by 

1326 itself (False). This may be useful if you want scalar values 

1327 to appear differently within a Series versus on its own (e.g. 

1328 quoted or not). 

1329 

1330 Returns 

1331 ------- 

1332 Callable[[Any], str] 

1333 A callable that gets instances of the scalar type and 

1334 returns a string. By default, :func:`repr` is used 

1335 when ``boxed=False`` and :func:`str` is used when 

1336 ``boxed=True``. 

1337 """ 

1338 if boxed: 

1339 return str 

1340 return repr 

1341 

1342 # ------------------------------------------------------------------------ 

1343 # Reshaping 

1344 # ------------------------------------------------------------------------ 

1345 

1346 def transpose(self, *axes: int) -> ExtensionArray: 

1347 """ 

1348 Return a transposed view on this array. 

1349 

1350 Because ExtensionArrays are always 1D, this is a no-op. It is included 

1351 for compatibility with np.ndarray. 

1352 """ 

1353 return self[:] 

1354 

1355 @property 

1356 def T(self) -> ExtensionArray: 

1357 return self.transpose() 

1358 

1359 def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray: 

1360 """ 

1361 Return a flattened view on this array. 

1362 

1363 Parameters 

1364 ---------- 

1365 order : {None, 'C', 'F', 'A', 'K'}, default 'C' 

1366 

1367 Returns 

1368 ------- 

1369 ExtensionArray 

1370 

1371 Notes 

1372 ----- 

1373 - Because ExtensionArrays are 1D-only, this is a no-op. 

1374 - The "order" argument is ignored, is for compatibility with NumPy. 

1375 """ 

1376 return self 

1377 

1378 @classmethod 

1379 def _concat_same_type( 

1380 cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT] 

1381 ) -> ExtensionArrayT: 

1382 """ 

1383 Concatenate multiple array of this dtype. 

1384 

1385 Parameters 

1386 ---------- 

1387 to_concat : sequence of this type 

1388 

1389 Returns 

1390 ------- 

1391 ExtensionArray 

1392 """ 

1393 # Implementer note: this method will only be called with a sequence of 

1394 # ExtensionArrays of this class and with the same dtype as self. This 

1395 # should allow "easy" concatenation (no upcasting needed), and result 

1396 # in a new ExtensionArray of the same dtype. 

1397 # Note: this strict behaviour is only guaranteed starting with pandas 1.1 

1398 raise AbstractMethodError(cls) 

1399 

1400 # The _can_hold_na attribute is set to True so that pandas internals 

1401 # will use the ExtensionDtype.na_value as the NA value in operations 

1402 # such as take(), reindex(), shift(), etc. In addition, those results 

1403 # will then be of the ExtensionArray subclass rather than an array 

1404 # of objects 

1405 @cache_readonly 

1406 def _can_hold_na(self) -> bool: 

1407 return self.dtype._can_hold_na 

1408 

1409 def _reduce(self, name: str, *, skipna: bool = True, **kwargs): 

1410 """ 

1411 Return a scalar result of performing the reduction operation. 

1412 

1413 Parameters 

1414 ---------- 

1415 name : str 

1416 Name of the function, supported values are: 

1417 { any, all, min, max, sum, mean, median, prod, 

1418 std, var, sem, kurt, skew }. 

1419 skipna : bool, default True 

1420 If True, skip NaN values. 

1421 **kwargs 

1422 Additional keyword arguments passed to the reduction function. 

1423 Currently, `ddof` is the only supported kwarg. 

1424 

1425 Returns 

1426 ------- 

1427 scalar 

1428 

1429 Raises 

1430 ------ 

1431 TypeError : subclass does not define reductions 

1432 """ 

1433 meth = getattr(self, name, None) 

1434 if meth is None: 

1435 raise TypeError( 

1436 f"'{type(self).__name__}' with dtype {self.dtype} " 

1437 f"does not support reduction '{name}'" 

1438 ) 

1439 return meth(skipna=skipna, **kwargs) 

1440 

1441 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 

1442 # Incompatible types in assignment (expression has type "None", base class 

1443 # "object" defined the type as "Callable[[object], int]") 

1444 __hash__: ClassVar[None] # type: ignore[assignment] 

1445 

1446 # ------------------------------------------------------------------------ 

1447 # Non-Optimized Default Methods; in the case of the private methods here, 

1448 # these are not guaranteed to be stable across pandas versions. 

1449 

1450 def tolist(self) -> list: 

1451 """ 

1452 Return a list of the values. 

1453 

1454 These are each a scalar type, which is a Python scalar 

1455 (for str, int, float) or a pandas scalar 

1456 (for Timestamp/Timedelta/Interval/Period) 

1457 

1458 Returns 

1459 ------- 

1460 list 

1461 """ 

1462 if self.ndim > 1: 

1463 return [x.tolist() for x in self] 

1464 return list(self) 

1465 

1466 def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT: 

1467 indexer = np.delete(np.arange(len(self)), loc) 

1468 return self.take(indexer) 

1469 

1470 def insert(self: ExtensionArrayT, loc: int, item) -> ExtensionArrayT: 

1471 """ 

1472 Insert an item at the given position. 

1473 

1474 Parameters 

1475 ---------- 

1476 loc : int 

1477 item : scalar-like 

1478 

1479 Returns 

1480 ------- 

1481 same type as self 

1482 

1483 Notes 

1484 ----- 

1485 This method should be both type and dtype-preserving. If the item 

1486 cannot be held in an array of this type/dtype, either ValueError or 

1487 TypeError should be raised. 

1488 

1489 The default implementation relies on _from_sequence to raise on invalid 

1490 items. 

1491 """ 

1492 loc = validate_insert_loc(loc, len(self)) 

1493 

1494 item_arr = type(self)._from_sequence([item], dtype=self.dtype) 

1495 

1496 return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]]) 

1497 

1498 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: 

1499 """ 

1500 Analogue to np.putmask(self, mask, value) 

1501 

1502 Parameters 

1503 ---------- 

1504 mask : np.ndarray[bool] 

1505 value : scalar or listlike 

1506 If listlike, must be arraylike with same length as self. 

1507 

1508 Returns 

1509 ------- 

1510 None 

1511 

1512 Notes 

1513 ----- 

1514 Unlike np.putmask, we do not repeat listlike values with mismatched length. 

1515 'value' should either be a scalar or an arraylike with the same length 

1516 as self. 

1517 """ 

1518 if is_list_like(value): 

1519 val = value[mask] 

1520 else: 

1521 val = value 

1522 

1523 self[mask] = val 

1524 

1525 def _where( 

1526 self: ExtensionArrayT, mask: npt.NDArray[np.bool_], value 

1527 ) -> ExtensionArrayT: 

1528 """ 

1529 Analogue to np.where(mask, self, value) 

1530 

1531 Parameters 

1532 ---------- 

1533 mask : np.ndarray[bool] 

1534 value : scalar or listlike 

1535 

1536 Returns 

1537 ------- 

1538 same type as self 

1539 """ 

1540 result = self.copy() 

1541 

1542 if is_list_like(value): 

1543 val = value[~mask] 

1544 else: 

1545 val = value 

1546 

1547 result[~mask] = val 

1548 return result 

1549 

1550 def _fill_mask_inplace( 

1551 self, method: str, limit, mask: npt.NDArray[np.bool_] 

1552 ) -> None: 

1553 """ 

1554 Replace values in locations specified by 'mask' using pad or backfill. 

1555 

1556 See also 

1557 -------- 

1558 ExtensionArray.fillna 

1559 """ 

1560 func = missing.get_fill_func(method) 

1561 npvalues = self.astype(object) 

1562 # NB: if we don't copy mask here, it may be altered inplace, which 

1563 # would mess up the `self[mask] = ...` below. 

1564 func(npvalues, limit=limit, mask=mask.copy()) 

1565 new_values = self._from_sequence(npvalues, dtype=self.dtype) 

1566 self[mask] = new_values[mask] 

1567 return 

1568 

1569 def _rank( 

1570 self, 

1571 *, 

1572 axis: int = 0, 

1573 method: str = "average", 

1574 na_option: str = "keep", 

1575 ascending: bool = True, 

1576 pct: bool = False, 

1577 ): 

1578 """ 

1579 See Series.rank.__doc__. 

1580 """ 

1581 if axis != 0: 

1582 raise NotImplementedError 

1583 

1584 # TODO: we only have tests that get here with dt64 and td64 

1585 # TODO: all tests that get here use the defaults for all the kwds 

1586 return rank( 

1587 self, 

1588 axis=axis, 

1589 method=method, 

1590 na_option=na_option, 

1591 ascending=ascending, 

1592 pct=pct, 

1593 ) 

1594 

1595 @classmethod 

1596 def _empty(cls, shape: Shape, dtype: ExtensionDtype): 

1597 """ 

1598 Create an ExtensionArray with the given shape and dtype. 

1599 

1600 See also 

1601 -------- 

1602 ExtensionDtype.empty 

1603 ExtensionDtype.empty is the 'official' public version of this API. 

1604 """ 

1605 # Implementer note: while ExtensionDtype.empty is the public way to 

1606 # call this method, it is still required to implement this `_empty` 

1607 # method as well (it is called internally in pandas) 

1608 obj = cls._from_sequence([], dtype=dtype) 

1609 

1610 taker = np.broadcast_to(np.intp(-1), shape) 

1611 result = obj.take(taker, allow_fill=True) 

1612 if not isinstance(result, cls) or dtype != result.dtype: 

1613 raise NotImplementedError( 

1614 f"Default 'empty' implementation is invalid for dtype='{dtype}'" 

1615 ) 

1616 return result 

1617 

1618 def _quantile( 

1619 self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str 

1620 ) -> ExtensionArrayT: 

1621 """ 

1622 Compute the quantiles of self for each quantile in `qs`. 

1623 

1624 Parameters 

1625 ---------- 

1626 qs : np.ndarray[float64] 

1627 interpolation: str 

1628 

1629 Returns 

1630 ------- 

1631 same type as self 

1632 """ 

1633 mask = np.asarray(self.isna()) 

1634 arr = np.asarray(self) 

1635 fill_value = np.nan 

1636 

1637 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation) 

1638 return type(self)._from_sequence(res_values) 

1639 

1640 def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT: 

1641 """ 

1642 Returns the mode(s) of the ExtensionArray. 

1643 

1644 Always returns `ExtensionArray` even if only one value. 

1645 

1646 Parameters 

1647 ---------- 

1648 dropna : bool, default True 

1649 Don't consider counts of NA values. 

1650 

1651 Returns 

1652 ------- 

1653 same type as self 

1654 Sorted, if possible. 

1655 """ 

1656 # error: Incompatible return value type (got "Union[ExtensionArray, 

1657 # ndarray[Any, Any]]", expected "ExtensionArrayT") 

1658 return mode(self, dropna=dropna) # type: ignore[return-value] 

1659 

1660 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

1661 if any( 

1662 isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs 

1663 ): 

1664 return NotImplemented 

1665 

1666 result = arraylike.maybe_dispatch_ufunc_to_dunder_op( 

1667 self, ufunc, method, *inputs, **kwargs 

1668 ) 

1669 if result is not NotImplemented: 

1670 return result 

1671 

1672 if "out" in kwargs: 

1673 return arraylike.dispatch_ufunc_with_out( 

1674 self, ufunc, method, *inputs, **kwargs 

1675 ) 

1676 

1677 if method == "reduce": 

1678 result = arraylike.dispatch_reduction_ufunc( 

1679 self, ufunc, method, *inputs, **kwargs 

1680 ) 

1681 if result is not NotImplemented: 

1682 return result 

1683 

1684 return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) 

1685 

1686 

1687class ExtensionOpsMixin: 

1688 """ 

1689 A base class for linking the operators to their dunder names. 

1690 

1691 .. note:: 

1692 

1693 You may want to set ``__array_priority__`` if you want your 

1694 implementation to be called when involved in binary operations 

1695 with NumPy arrays. 

1696 """ 

1697 

1698 @classmethod 

1699 def _create_arithmetic_method(cls, op): 

1700 raise AbstractMethodError(cls) 

1701 

1702 @classmethod 

1703 def _add_arithmetic_ops(cls): 

1704 setattr(cls, "__add__", cls._create_arithmetic_method(operator.add)) 

1705 setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd)) 

1706 setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub)) 

1707 setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub)) 

1708 setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul)) 

1709 setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul)) 

1710 setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow)) 

1711 setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow)) 

1712 setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod)) 

1713 setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod)) 

1714 setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv)) 

1715 setattr( 

1716 cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv) 

1717 ) 

1718 setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv)) 

1719 setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv)) 

1720 setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod)) 

1721 setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod)) 

1722 

1723 @classmethod 

1724 def _create_comparison_method(cls, op): 

1725 raise AbstractMethodError(cls) 

1726 

1727 @classmethod 

1728 def _add_comparison_ops(cls): 

1729 setattr(cls, "__eq__", cls._create_comparison_method(operator.eq)) 

1730 setattr(cls, "__ne__", cls._create_comparison_method(operator.ne)) 

1731 setattr(cls, "__lt__", cls._create_comparison_method(operator.lt)) 

1732 setattr(cls, "__gt__", cls._create_comparison_method(operator.gt)) 

1733 setattr(cls, "__le__", cls._create_comparison_method(operator.le)) 

1734 setattr(cls, "__ge__", cls._create_comparison_method(operator.ge)) 

1735 

1736 @classmethod 

1737 def _create_logical_method(cls, op): 

1738 raise AbstractMethodError(cls) 

1739 

1740 @classmethod 

1741 def _add_logical_ops(cls): 

1742 setattr(cls, "__and__", cls._create_logical_method(operator.and_)) 

1743 setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_)) 

1744 setattr(cls, "__or__", cls._create_logical_method(operator.or_)) 

1745 setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_)) 

1746 setattr(cls, "__xor__", cls._create_logical_method(operator.xor)) 

1747 setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor)) 

1748 

1749 

1750class ExtensionScalarOpsMixin(ExtensionOpsMixin): 

1751 """ 

1752 A mixin for defining ops on an ExtensionArray. 

1753 

1754 It is assumed that the underlying scalar objects have the operators 

1755 already defined. 

1756 

1757 Notes 

1758 ----- 

1759 If you have defined a subclass MyExtensionArray(ExtensionArray), then 

1760 use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to 

1761 get the arithmetic operators. After the definition of MyExtensionArray, 

1762 insert the lines 

1763 

1764 MyExtensionArray._add_arithmetic_ops() 

1765 MyExtensionArray._add_comparison_ops() 

1766 

1767 to link the operators to your class. 

1768 

1769 .. note:: 

1770 

1771 You may want to set ``__array_priority__`` if you want your 

1772 implementation to be called when involved in binary operations 

1773 with NumPy arrays. 

1774 """ 

1775 

1776 @classmethod 

1777 def _create_method(cls, op, coerce_to_dtype=True, result_dtype=None): 

1778 """ 

1779 A class method that returns a method that will correspond to an 

1780 operator for an ExtensionArray subclass, by dispatching to the 

1781 relevant operator defined on the individual elements of the 

1782 ExtensionArray. 

1783 

1784 Parameters 

1785 ---------- 

1786 op : function 

1787 An operator that takes arguments op(a, b) 

1788 coerce_to_dtype : bool, default True 

1789 boolean indicating whether to attempt to convert 

1790 the result to the underlying ExtensionArray dtype. 

1791 If it's not possible to create a new ExtensionArray with the 

1792 values, an ndarray is returned instead. 

1793 

1794 Returns 

1795 ------- 

1796 Callable[[Any, Any], Union[ndarray, ExtensionArray]] 

1797 A method that can be bound to a class. When used, the method 

1798 receives the two arguments, one of which is the instance of 

1799 this class, and should return an ExtensionArray or an ndarray. 

1800 

1801 Returning an ndarray may be necessary when the result of the 

1802 `op` cannot be stored in the ExtensionArray. The dtype of the 

1803 ndarray uses NumPy's normal inference rules. 

1804 

1805 Examples 

1806 -------- 

1807 Given an ExtensionArray subclass called MyExtensionArray, use 

1808 

1809 __add__ = cls._create_method(operator.add) 

1810 

1811 in the class definition of MyExtensionArray to create the operator 

1812 for addition, that will be based on the operator implementation 

1813 of the underlying elements of the ExtensionArray 

1814 """ 

1815 

1816 def _binop(self, other): 

1817 def convert_values(param): 

1818 if isinstance(param, ExtensionArray) or is_list_like(param): 

1819 ovalues = param 

1820 else: # Assume its an object 

1821 ovalues = [param] * len(self) 

1822 return ovalues 

1823 

1824 if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)): 

1825 # rely on pandas to unbox and dispatch to us 

1826 return NotImplemented 

1827 

1828 lvalues = self 

1829 rvalues = convert_values(other) 

1830 

1831 # If the operator is not defined for the underlying objects, 

1832 # a TypeError should be raised 

1833 res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] 

1834 

1835 def _maybe_convert(arr): 

1836 if coerce_to_dtype: 

1837 # https://github.com/pandas-dev/pandas/issues/22850 

1838 # We catch all regular exceptions here, and fall back 

1839 # to an ndarray. 

1840 res = maybe_cast_to_extension_array(type(self), arr) 

1841 if not isinstance(res, type(self)): 

1842 # exception raised in _from_sequence; ensure we have ndarray 

1843 res = np.asarray(arr) 

1844 else: 

1845 res = np.asarray(arr, dtype=result_dtype) 

1846 return res 

1847 

1848 if op.__name__ in {"divmod", "rdivmod"}: 

1849 a, b = zip(*res) 

1850 return _maybe_convert(a), _maybe_convert(b) 

1851 

1852 return _maybe_convert(res) 

1853 

1854 op_name = f"__{op.__name__}__" 

1855 return set_function_name(_binop, op_name, cls) 

1856 

1857 @classmethod 

1858 def _create_arithmetic_method(cls, op): 

1859 return cls._create_method(op) 

1860 

1861 @classmethod 

1862 def _create_comparison_method(cls, op): 

1863 return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)