Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/masked.py: 16%

1from __future__ import annotations

3from typing import (

4 TYPE_CHECKING,

5 Any,

6 Literal,

7 Sequence,

8 TypeVar,

9 overload,

10)

11import warnings

13import numpy as np

15from pandas._libs import (

16 lib,

17 missing as libmissing,

18)

19from pandas._typing import (

20 ArrayLike,

21 AstypeArg,

22 DtypeObj,

23 NpDtype,

24 PositionalIndexer,

25 Scalar,

26 ScalarIndexer,

27 SequenceIndexer,

28 Shape,

29 npt,

30)

31from pandas.errors import AbstractMethodError

32from pandas.util._decorators import doc

33from pandas.util._validators import validate_fillna_kwargs

35from pandas.core.dtypes.astype import astype_nansafe

36from pandas.core.dtypes.base import ExtensionDtype

37from pandas.core.dtypes.common import (

38 is_bool,

39 is_bool_dtype,

40 is_datetime64_dtype,

41 is_dtype_equal,

42 is_float_dtype,

43 is_integer_dtype,

44 is_list_like,

45 is_object_dtype,

46 is_scalar,

47 is_string_dtype,

48 pandas_dtype,

49)

50from pandas.core.dtypes.dtypes import BaseMaskedDtype

51from pandas.core.dtypes.inference import is_array_like

52from pandas.core.dtypes.missing import (

53 array_equivalent,

54 is_valid_na_for_dtype,

55 isna,

56 notna,

57)

59from pandas.core import (

60 algorithms as algos,

61 arraylike,

62 missing,

63 nanops,

64 ops,

65)

66from pandas.core.algorithms import (

67 factorize_array,

68 isin,

69 take,

70)

71from pandas.core.array_algos import masked_reductions

72from pandas.core.array_algos.quantile import quantile_with_mask

73from pandas.core.arraylike import OpsMixin

74from pandas.core.arrays import ExtensionArray

75from pandas.core.construction import ensure_wrapped_if_datetimelike

76from pandas.core.indexers import check_array_indexer

77from pandas.core.ops import invalid_comparison

79if TYPE_CHECKING: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true

80 from pandas import Series

81 from pandas.core.arrays import BooleanArray

82 from pandas._typing import (

83 NumpySorter,

84 NumpyValueArrayLike,

85 )

87from pandas.compat.numpy import function as nv

89BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray")

92class BaseMaskedArray(OpsMixin, ExtensionArray):

93 """

94 Base class for masked arrays (which use _data and _mask to store the data).

96 numpy based

97 """

99 # The value used to fill '_data' to avoid upcasting

100 _internal_fill_value: Scalar

101 # our underlying data and mask are each ndarrays

102 _data: np.ndarray

103 _mask: npt.NDArray[np.bool_]

104

105 # Fill values used for any/all

106 _truthy_value = Scalar # bool(_truthy_value) = True

107 _falsey_value = Scalar # bool(_falsey_value) = False

108

109 def __init__(

110 self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False

111 ) -> None:

112 # values is supposed to already be validated in the subclass

113 if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):

114 raise TypeError(

115 "mask should be boolean numpy array. Use "

116 "the 'pd.array' function instead"

117 )

118 if values.shape != mask.shape:

119 raise ValueError("values.shape must match mask.shape")

120

121 if copy:

122 values = values.copy()

123 mask = mask.copy()

124

125 self._data = values

126 self._mask = mask

127

128 @classmethod

129 def _from_sequence(

130 cls: type[BaseMaskedArrayT], scalars, *, dtype=None, copy: bool = False

131 ) -> BaseMaskedArrayT:

132 values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy)

133 return cls(values, mask)

134

135 @property

136 def dtype(self) -> BaseMaskedDtype:

137 raise AbstractMethodError(self)

138

139 @overload

140 def __getitem__(self, item: ScalarIndexer) -> Any:

141 ...

142

143 @overload

144 def __getitem__(self: BaseMaskedArrayT, item: SequenceIndexer) -> BaseMaskedArrayT:

145 ...

146

147 def __getitem__(

148 self: BaseMaskedArrayT, item: PositionalIndexer

149 ) -> BaseMaskedArrayT | Any:

150 item = check_array_indexer(self, item)

151

152 newmask = self._mask[item]

153 if is_bool(newmask):

154 # This is a scalar indexing

155 if newmask:

156 return self.dtype.na_value

157 return self._data[item]

158

159 return type(self)(self._data[item], newmask)

160

161 @doc(ExtensionArray.fillna)

162 def fillna(

163 self: BaseMaskedArrayT, value=None, method=None, limit=None

164 ) -> BaseMaskedArrayT:

165 value, method = validate_fillna_kwargs(value, method)

166

167 mask = self._mask

168

169 if is_array_like(value):

170 if len(value) != len(self):

171 raise ValueError(

172 f"Length of 'value' does not match. Got ({len(value)}) "

173 f" expected {len(self)}"

174 )

175 value = value[mask]

176

177 if mask.any():

178 if method is not None:

179 func = missing.get_fill_func(method, ndim=self.ndim)

180 npvalues = self._data.copy().T

181 new_mask = mask.copy().T

182 func(npvalues, limit=limit, mask=new_mask)

183 return type(self)(npvalues.T, new_mask.T)

184 else:

185 # fill with value

186 new_values = self.copy()

187 new_values[mask] = value

188 else:

189 new_values = self.copy()

190 return new_values

191

192 @classmethod

193 def _coerce_to_array(

194 cls, values, *, dtype: DtypeObj, copy: bool = False

195 ) -> tuple[np.ndarray, np.ndarray]:

196 raise AbstractMethodError(cls)

197

198 def _validate_setitem_value(self, value):

199 """

200 Check if we have a scalar that we can cast losslessly.

201

202 Raises

203 ------

204 TypeError

205 """

206 kind = self.dtype.kind

207 # TODO: get this all from np_can_hold_element?

208 if kind == "b":

209 if lib.is_bool(value):

210 return value

211

212 elif kind == "f":

213 if lib.is_integer(value) or lib.is_float(value):

214 return value

215

216 else:

217 if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()):

218 return value

219 # TODO: unsigned checks

220

221 # Note: without the "str" here, the f-string rendering raises in

222 # py38 builds.

223 raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}")

224

225 def __setitem__(self, key, value) -> None:

226 key = check_array_indexer(self, key)

227

228 if is_scalar(value):

229 if is_valid_na_for_dtype(value, self.dtype):

230 self._mask[key] = True

231 else:

232 value = self._validate_setitem_value(value)

233 self._data[key] = value

234 self._mask[key] = False

235 return

236

237 value, mask = self._coerce_to_array(value, dtype=self.dtype)

238

239 self._data[key] = value

240 self._mask[key] = mask

241

242 def __iter__(self):

243 if self.ndim == 1:

244 for i in range(len(self)):

245 if self._mask[i]:

246 yield self.dtype.na_value

247 else:

248 yield self._data[i]

249 else:

250 for i in range(len(self)):

251 yield self[i]

252

253 def __len__(self) -> int:

254 return len(self._data)

255

256 @property

257 def shape(self) -> Shape:

258 return self._data.shape

259

260 @property

261 def ndim(self) -> int:

262 return self._data.ndim

263

264 def swapaxes(self: BaseMaskedArrayT, axis1, axis2) -> BaseMaskedArrayT:

265 data = self._data.swapaxes(axis1, axis2)

266 mask = self._mask.swapaxes(axis1, axis2)

267 return type(self)(data, mask)

268

269 def delete(self: BaseMaskedArrayT, loc, axis: int = 0) -> BaseMaskedArrayT:

270 data = np.delete(self._data, loc, axis=axis)

271 mask = np.delete(self._mask, loc, axis=axis)

272 return type(self)(data, mask)

273

274 def reshape(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT:

275 data = self._data.reshape(*args, **kwargs)

276 mask = self._mask.reshape(*args, **kwargs)

277 return type(self)(data, mask)

278

279 def ravel(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT:

280 # TODO: need to make sure we have the same order for data/mask

281 data = self._data.ravel(*args, **kwargs)

282 mask = self._mask.ravel(*args, **kwargs)

283 return type(self)(data, mask)

284

285 @property

286 def T(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

287 return type(self)(self._data.T, self._mask.T)

288

289 def round(self, decimals: int = 0, *args, **kwargs):

290 """

291 Round each value in the array a to the given number of decimals.

292

293 Parameters

294 ----------

295 decimals : int, default 0

296 Number of decimal places to round to. If decimals is negative,

297 it specifies the number of positions to the left of the decimal point.

298 *args, **kwargs

299 Additional arguments and keywords have no effect but might be

300 accepted for compatibility with NumPy.

301

302 Returns

303 -------

304 NumericArray

305 Rounded values of the NumericArray.

306

307 See Also

308 --------

309 numpy.around : Round values of an np.array.

310 DataFrame.round : Round values of a DataFrame.

311 Series.round : Round values of a Series.

312 """

313 nv.validate_round(args, kwargs)

314 values = np.round(self._data, decimals=decimals, **kwargs)

315

316 # Usually we'll get same type as self, but ndarray[bool] casts to float

317 return self._maybe_mask_result(values, self._mask.copy())

318

319 # ------------------------------------------------------------------

320 # Unary Methods

321

322 def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

323 return type(self)(~self._data, self._mask.copy())

324

325 def __neg__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

326 return type(self)(-self._data, self._mask.copy())

327

328 def __pos__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

329 return self.copy()

330

331 def __abs__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

332 return type(self)(abs(self._data), self._mask.copy())

333

334 # ------------------------------------------------------------------

335

336 def to_numpy(

337 self,

338 dtype: npt.DTypeLike | None = None,

339 copy: bool = False,

340 na_value: object = lib.no_default,

341 ) -> np.ndarray:

342 """

343 Convert to a NumPy Array.

344

345 By default converts to an object-dtype NumPy array. Specify the `dtype` and

346 `na_value` keywords to customize the conversion.

347

348 Parameters

349 ----------

350 dtype : dtype, default object

351 The numpy dtype to convert to.

352 copy : bool, default False

353 Whether to ensure that the returned value is a not a view on

354 the array. Note that ``copy=False`` does not *ensure* that

355 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that

356 a copy is made, even if not strictly necessary. This is typically

357 only possible when no missing values are present and `dtype`

358 is the equivalent numpy dtype.

359 na_value : scalar, optional

360 Scalar missing value indicator to use in numpy array. Defaults

361 to the native missing value indicator of this array (pd.NA).

362

363 Returns

364 -------

365 numpy.ndarray

366

367 Examples

368 --------

369 An object-dtype is the default result

370

371 >>> a = pd.array([True, False, pd.NA], dtype="boolean")

372 >>> a.to_numpy()

373 array([True, False, <NA>], dtype=object)

374

375 When no missing values are present, an equivalent dtype can be used.

376

377 >>> pd.array([True, False], dtype="boolean").to_numpy(dtype="bool")

378 array([ True, False])

379 >>> pd.array([1, 2], dtype="Int64").to_numpy("int64")

380 array([1, 2])

381

382 However, requesting such dtype will raise a ValueError if

383 missing values are present and the default missing value :attr:`NA`

384 is used.

385

386 >>> a = pd.array([True, False, pd.NA], dtype="boolean")

387 >>> a

388 <BooleanArray>

389 [True, False, <NA>]

390 Length: 3, dtype: boolean

391

392 >>> a.to_numpy(dtype="bool")

393 Traceback (most recent call last):

394 ...

395 ValueError: cannot convert to bool numpy array in presence of missing values

396

397 Specify a valid `na_value` instead

398

399 >>> a.to_numpy(dtype="bool", na_value=False)

400 array([ True, False, False])

401 """

402 if na_value is lib.no_default:

403 na_value = libmissing.NA

404 if dtype is None:

405 dtype = object

406 if self._hasna:

407 if (

408 not is_object_dtype(dtype)

409 and not is_string_dtype(dtype)

410 and na_value is libmissing.NA

411 ):

412 raise ValueError(

413 f"cannot convert to '{dtype}'-dtype NumPy array "

414 "with missing values. Specify an appropriate 'na_value' "

415 "for this dtype."

416 )

417 # don't pass copy to astype -> always need a copy since we are mutating

418 data = self._data.astype(dtype)

419 data[self._mask] = na_value

420 else:

421 data = self._data.astype(dtype, copy=copy)

422 return data

423

424 @overload

425 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:

426 ...

427

428 @overload

429 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:

430 ...

431

432 @overload

433 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:

434 ...

435

436 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

437 dtype = pandas_dtype(dtype)

438

439 if is_dtype_equal(dtype, self.dtype):

440 if copy:

441 return self.copy()

442 return self

443

444 # if we are astyping to another nullable masked dtype, we can fastpath

445 if isinstance(dtype, BaseMaskedDtype):

446 # TODO deal with NaNs for FloatingArray case

447 data = self._data.astype(dtype.numpy_dtype, copy=copy)

448 # mask is copied depending on whether the data was copied, and

449 # not directly depending on the `copy` keyword

450 mask = self._mask if data is self._data else self._mask.copy()

451 cls = dtype.construct_array_type()

452 return cls(data, mask, copy=False)

453

454 if isinstance(dtype, ExtensionDtype):

455 eacls = dtype.construct_array_type()

456 return eacls._from_sequence(self, dtype=dtype, copy=copy)

457

458 na_value: float | np.datetime64 | lib.NoDefault

459

460 # coerce

461 if is_float_dtype(dtype):

462 # In astype, we consider dtype=float to also mean na_value=np.nan

463 na_value = np.nan

464 elif is_datetime64_dtype(dtype):

465 na_value = np.datetime64("NaT")

466 else:

467 na_value = lib.no_default

468

469 # to_numpy will also raise, but we get somewhat nicer exception messages here

470 if is_integer_dtype(dtype) and self._hasna:

471 raise ValueError("cannot convert NA to integer")

472 if is_bool_dtype(dtype) and self._hasna:

473 # careful: astype_nansafe converts np.nan to True

474 raise ValueError("cannot convert float NaN to bool")

475

476 data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy)

477 if self.dtype.kind == "f":

478 # TODO: make this consistent between IntegerArray/FloatingArray,

479 # see test_astype_str

480 return astype_nansafe(data, dtype, copy=False)

481 return data

482

483 __array_priority__ = 1000 # higher than ndarray so ops dispatch to us

484

485 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:

486 """

487 the array interface, return my values

488 We return an object array here to preserve our scalar values

489 """

490 return self.to_numpy(dtype=dtype)

491

492 _HANDLED_TYPES: tuple[type, ...]

493

494 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

495 # For MaskedArray inputs, we apply the ufunc to ._data

496 # and mask the result.

497

498 out = kwargs.get("out", ())

499

500 for x in inputs + out:

501 if not isinstance(x, self._HANDLED_TYPES + (BaseMaskedArray,)):

502 return NotImplemented

503

504 # for binary ops, use our custom dunder methods

505 result = ops.maybe_dispatch_ufunc_to_dunder_op(

506 self, ufunc, method, *inputs, **kwargs

507 )

508 if result is not NotImplemented:

509 return result

510

511 if "out" in kwargs:

512 # e.g. test_ufunc_with_out

513 return arraylike.dispatch_ufunc_with_out(

514 self, ufunc, method, *inputs, **kwargs

515 )

516

517 if method == "reduce":

518 result = arraylike.dispatch_reduction_ufunc(

519 self, ufunc, method, *inputs, **kwargs

520 )

521 if result is not NotImplemented:

522 return result

523

524 mask = np.zeros(len(self), dtype=bool)

525 inputs2 = []

526 for x in inputs:

527 if isinstance(x, BaseMaskedArray):

528 mask |= x._mask

529 inputs2.append(x._data)

530 else:

531 inputs2.append(x)

532

533 def reconstruct(x):

534 # we don't worry about scalar `x` here, since we

535 # raise for reduce up above.

536 from pandas.core.arrays import (

537 BooleanArray,

538 FloatingArray,

539 IntegerArray,

540 )

541

542 if is_bool_dtype(x.dtype):

543 m = mask.copy()

544 return BooleanArray(x, m)

545 elif is_integer_dtype(x.dtype):

546 m = mask.copy()

547 return IntegerArray(x, m)

548 elif is_float_dtype(x.dtype):

549 m = mask.copy()

550 if x.dtype == np.float16:

551 # reached in e.g. np.sqrt on BooleanArray

552 # we don't support float16

553 x = x.astype(np.float32)

554 return FloatingArray(x, m)

555 else:

556 x[mask] = np.nan

557 return x

558

559 result = getattr(ufunc, method)(*inputs2, **kwargs)

560 if ufunc.nout > 1:

561 # e.g. np.divmod

562 return tuple(reconstruct(x) for x in result)

563 elif method == "reduce":

564 # e.g. np.add.reduce; test_ufunc_reduce_raises

565 if self._mask.any():

566 return self._na_value

567 return result

568 else:

569 return reconstruct(result)

570

571 def __arrow_array__(self, type=None):

572 """

573 Convert myself into a pyarrow Array.

574 """

575 import pyarrow as pa

576

577 return pa.array(self._data, mask=self._mask, type=type)

578

579 @property

580 def _hasna(self) -> bool:

581 # Note: this is expensive right now! The hope is that we can

582 # make this faster by having an optional mask, but not have to change

583 # source code using it..

584

585 # error: Incompatible return value type (got "bool_", expected "bool")

586 return self._mask.any() # type: ignore[return-value]

587

588 def _propagate_mask(

589 self, mask: npt.NDArray[np.bool_] | None, other

590 ) -> npt.NDArray[np.bool_]:

591 if mask is None:

592 mask = self._mask.copy() # TODO: need test for BooleanArray needing a copy

593 if other is libmissing.NA:

594 # GH#45421 don't alter inplace

595 mask = mask | True

596 else:

597 mask = self._mask | mask

598 return mask

599

600 def _arith_method(self, other, op):

601 op_name = op.__name__

602 omask = None

603

604 if isinstance(other, BaseMaskedArray):

605 other, omask = other._data, other._mask

606

607 elif is_list_like(other):

608 if not isinstance(other, ExtensionArray):

609 other = np.asarray(other)

610 if other.ndim > 1:

611 raise NotImplementedError("can only perform ops with 1-d structures")

612

613 # We wrap the non-masked arithmetic logic used for numpy dtypes

614 # in Series/Index arithmetic ops.

615 other = ops.maybe_prepare_scalar_for_op(other, (len(self),))

616 pd_op = ops.get_array_op(op)

617 other = ensure_wrapped_if_datetimelike(other)

618

619 if op_name in {"pow", "rpow"} and isinstance(other, np.bool_):

620 # Avoid DeprecationWarning: In future, it will be an error

621 # for 'np.bool_' scalars to be interpreted as an index

622 # e.g. test_array_scalar_like_equivalence

623 other = bool(other)

624

625 mask = self._propagate_mask(omask, other)

626

627 if other is libmissing.NA:

628 result = np.ones_like(self._data)

629 if self.dtype.kind == "b":

630 if op_name in {

631 "floordiv",

632 "rfloordiv",

633 "pow",

634 "rpow",

635 "truediv",

636 "rtruediv",

637 }:

638 # GH#41165 Try to match non-masked Series behavior

639 # This is still imperfect GH#46043

640 raise NotImplementedError(

641 f"operator '{op_name}' not implemented for bool dtypes"

642 )

643 elif op_name in {"mod", "rmod"}:

644 dtype = "int8"

645 else:

646 dtype = "bool"

647 result = result.astype(dtype)

648 elif "truediv" in op_name and self.dtype.kind != "f":

649 # The actual data here doesn't matter since the mask

650 # will be all-True, but since this is division, we want

651 # to end up with floating dtype.

652 result = result.astype(np.float64)

653 else:

654 # Make sure we do this before the "pow" mask checks

655 # to get an expected exception message on shape mismatch.

656 if self.dtype.kind in ["i", "u"] and op_name in ["floordiv", "mod"]:

657 # TODO(GH#30188) ATM we don't match the behavior of non-masked

658 # types with respect to floordiv-by-zero

659 pd_op = op

660

661 with np.errstate(all="ignore"):

662 result = pd_op(self._data, other)

663

664 if op_name == "pow":

665 # 1 ** x is 1.

666 mask = np.where((self._data == 1) & ~self._mask, False, mask)

667 # x ** 0 is 1.

668 if omask is not None:

669 mask = np.where((other == 0) & ~omask, False, mask)

670 elif other is not libmissing.NA:

671 mask = np.where(other == 0, False, mask)

672

673 elif op_name == "rpow":

674 # 1 ** x is 1.

675 if omask is not None:

676 mask = np.where((other == 1) & ~omask, False, mask)

677 elif other is not libmissing.NA:

678 mask = np.where(other == 1, False, mask)

679 # x ** 0 is 1.

680 mask = np.where((self._data == 0) & ~self._mask, False, mask)

681

682 return self._maybe_mask_result(result, mask)

683

684 _logical_method = _arith_method

685

686 def _cmp_method(self, other, op) -> BooleanArray:

687 from pandas.core.arrays import BooleanArray

688

689 mask = None

690

691 if isinstance(other, BaseMaskedArray):

692 other, mask = other._data, other._mask

693

694 elif is_list_like(other):

695 other = np.asarray(other)

696 if other.ndim > 1:

697 raise NotImplementedError("can only perform ops with 1-d structures")

698 if len(self) != len(other):

699 raise ValueError("Lengths must match to compare")

700

701 if other is libmissing.NA:

702 # numpy does not handle pd.NA well as "other" scalar (it returns

703 # a scalar False instead of an array)

704 # This may be fixed by NA.__array_ufunc__. Revisit this check

705 # once that's implemented.

706 result = np.zeros(self._data.shape, dtype="bool")

707 mask = np.ones(self._data.shape, dtype="bool")

708 else:

709 with warnings.catch_warnings():

710 # numpy may show a FutureWarning:

711 # elementwise comparison failed; returning scalar instead,

712 # but in the future will perform elementwise comparison

713 # before returning NotImplemented. We fall back to the correct

714 # behavior today, so that should be fine to ignore.

715 warnings.filterwarnings("ignore", "elementwise", FutureWarning)

716 with np.errstate(all="ignore"):

717 method = getattr(self._data, f"__{op.__name__}__")

718 result = method(other)

719

720 if result is NotImplemented:

721 result = invalid_comparison(self._data, other, op)

722

723 mask = self._propagate_mask(mask, other)

724 return BooleanArray(result, mask, copy=False)

725

726 def _maybe_mask_result(self, result, mask):

727 """

728 Parameters

729 ----------

730 result : array-like or tuple[array-like]

731 mask : array-like bool

732 """

733 if isinstance(result, tuple):

734 # i.e. divmod

735 div, mod = result

736 return (

737 self._maybe_mask_result(div, mask),

738 self._maybe_mask_result(mod, mask),

739 )

740

741 if is_float_dtype(result.dtype):

742 from pandas.core.arrays import FloatingArray

743

744 return FloatingArray(result, mask, copy=False)

745

746 elif is_bool_dtype(result.dtype):

747 from pandas.core.arrays import BooleanArray

748

749 return BooleanArray(result, mask, copy=False)

750

751 elif result.dtype == "timedelta64[ns]":

752 # e.g. test_numeric_arr_mul_tdscalar_numexpr_path

753 from pandas.core.arrays import TimedeltaArray

754

755 if not isinstance(result, TimedeltaArray):

756 result = TimedeltaArray._simple_new(result)

757

758 result[mask] = result.dtype.type("NaT")

759 return result

760

761 elif is_integer_dtype(result.dtype):

762 from pandas.core.arrays import IntegerArray

763

764 return IntegerArray(result, mask, copy=False)

765

766 else:

767 result[mask] = np.nan

768 return result

769

770 def isna(self) -> np.ndarray:

771 return self._mask.copy()

772

773 @property

774 def _na_value(self):

775 return self.dtype.na_value

776

777 @property

778 def nbytes(self) -> int:

779 return self._data.nbytes + self._mask.nbytes

780

781 @classmethod

782 def _concat_same_type(

783 cls: type[BaseMaskedArrayT],

784 to_concat: Sequence[BaseMaskedArrayT],

785 axis: int = 0,

786 ) -> BaseMaskedArrayT:

787 data = np.concatenate([x._data for x in to_concat], axis=axis)

788 mask = np.concatenate([x._mask for x in to_concat], axis=axis)

789 return cls(data, mask)

790

791 def take(

792 self: BaseMaskedArrayT,

793 indexer,

794 *,

795 allow_fill: bool = False,

796 fill_value: Scalar | None = None,

797 axis: int = 0,

798 ) -> BaseMaskedArrayT:

799 # we always fill with 1 internally

800 # to avoid upcasting

801 data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value

802 result = take(

803 self._data,

804 indexer,

805 fill_value=data_fill_value,

806 allow_fill=allow_fill,

807 axis=axis,

808 )

809

810 mask = take(

811 self._mask, indexer, fill_value=True, allow_fill=allow_fill, axis=axis

812 )

813

814 # if we are filling

815 # we only fill where the indexer is null

816 # not existing missing values

817 # TODO(jreback) what if we have a non-na float as a fill value?

818 if allow_fill and notna(fill_value):

819 fill_mask = np.asarray(indexer) == -1

820 result[fill_mask] = fill_value

821 mask = mask ^ fill_mask

822

823 return type(self)(result, mask, copy=False)

824

825 # error: Return type "BooleanArray" of "isin" incompatible with return type

826 # "ndarray" in supertype "ExtensionArray"

827 def isin(self, values) -> BooleanArray: # type: ignore[override]

828

829 from pandas.core.arrays import BooleanArray

830

831 # algorithms.isin will eventually convert values to an ndarray, so no extra

832 # cost to doing it here first

833 values_arr = np.asarray(values)

834 result = isin(self._data, values_arr)

835

836 if self._hasna:

837 values_have_NA = is_object_dtype(values_arr.dtype) and any(

838 val is self.dtype.na_value for val in values_arr

839 )

840

841 # For now, NA does not propagate so set result according to presence of NA,

842 # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion

843 result[self._mask] = values_have_NA

844

845 mask = np.zeros(self._data.shape, dtype=bool)

846 return BooleanArray(result, mask, copy=False)

847

848 def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

849 data, mask = self._data, self._mask

850 data = data.copy()

851 mask = mask.copy()

852 return type(self)(data, mask, copy=False)

853

854 def unique(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

855 """

856 Compute the BaseMaskedArray of unique values.

857

858 Returns

859 -------

860 uniques : BaseMaskedArray

861 """

862 uniques, mask = algos.unique_with_mask(self._data, self._mask)

863 return type(self)(uniques, mask, copy=False)

864

865 @doc(ExtensionArray.searchsorted)

866 def searchsorted(

867 self,

868 value: NumpyValueArrayLike | ExtensionArray,

869 side: Literal["left", "right"] = "left",

870 sorter: NumpySorter = None,

871 ) -> npt.NDArray[np.intp] | np.intp:

872 if self._hasna:

873 raise ValueError(

874 "searchsorted requires array to be sorted, which is impossible "

875 "with NAs present."

876 )

877 if isinstance(value, ExtensionArray):

878 value = value.astype(object)

879 # Base class searchsorted would cast to object, which is *much* slower.

880 return self._data.searchsorted(value, side=side, sorter=sorter)

881

882 @doc(ExtensionArray.factorize)

883 def factorize(

884 self,

885 na_sentinel: int | lib.NoDefault = lib.no_default,

886 use_na_sentinel: bool | lib.NoDefault = lib.no_default,

887 ) -> tuple[np.ndarray, ExtensionArray]:

888 resolved_na_sentinel = algos.resolve_na_sentinel(na_sentinel, use_na_sentinel)

889 arr = self._data

890 mask = self._mask

891

892 # Pass non-None na_sentinel; recode and add NA to uniques if necessary below

893 na_sentinel_arg = -1 if resolved_na_sentinel is None else resolved_na_sentinel

894 codes, uniques = factorize_array(arr, na_sentinel=na_sentinel_arg, mask=mask)

895

896 # check that factorize_array correctly preserves dtype.

897 assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype, self.dtype)

898

899 has_na = mask.any()

900 if resolved_na_sentinel is not None or not has_na:

901 size = len(uniques)

902 else:

903 # Make room for an NA value

904 size = len(uniques) + 1

905 uniques_mask = np.zeros(size, dtype=bool)

906 if resolved_na_sentinel is None and has_na:

907 na_index = mask.argmax()

908 # Insert na with the proper code

909 if na_index == 0:

910 na_code = np.intp(0)

911 else:

912 # mypy error: Slice index must be an integer or None

913 # https://github.com/python/mypy/issues/2410

914 na_code = codes[:na_index].max() + 1 # type: ignore[misc]

915 codes[codes >= na_code] += 1

916 codes[codes == -1] = na_code

917 # dummy value for uniques; not used since uniques_mask will be True

918 uniques = np.insert(uniques, na_code, 0)

919 uniques_mask[na_code] = True

920 uniques_ea = type(self)(uniques, uniques_mask)

921

922 return codes, uniques_ea

923

924 @doc(ExtensionArray._values_for_argsort)

925 def _values_for_argsort(self) -> np.ndarray:

926 return self._data

927

928 def value_counts(self, dropna: bool = True) -> Series:

929 """

930 Returns a Series containing counts of each unique value.

931

932 Parameters

933 ----------

934 dropna : bool, default True

935 Don't include counts of missing values.

936

937 Returns

938 -------

939 counts : Series

940

941 See Also

942 --------

943 Series.value_counts

944 """

945 from pandas import (

946 Index,

947 Series,

948 )

949 from pandas.arrays import IntegerArray

950

951 if dropna:

952 keys, counts = algos.value_counts_arraylike(

953 self._data, dropna=True, mask=self._mask

954 )

955 res = Series(counts, index=keys)

956 res.index = res.index.astype(self.dtype)

957 res = res.astype("Int64")

958 return res

959

960 # compute counts on the data with no nans

961 data = self._data[~self._mask]

962 value_counts = Index(data).value_counts()

963

964 index = value_counts.index

965

966 # if we want nans, count the mask

967 if dropna:

968 counts = value_counts._values

969 else:

970 counts = np.empty(len(value_counts) + 1, dtype="int64")

971 counts[:-1] = value_counts

972 counts[-1] = self._mask.sum()

973

974 index = index.insert(len(index), self.dtype.na_value)

975

976 index = index.astype(self.dtype)

977

978 mask = np.zeros(len(counts), dtype="bool")

979 counts_array = IntegerArray(counts, mask)

980

981 return Series(counts_array, index=index)

982

983 @doc(ExtensionArray.equals)

984 def equals(self, other) -> bool:

985 if type(self) != type(other):

986 return False

987 if other.dtype != self.dtype:

988 return False

989

990 # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT

991 # equal.

992 if not np.array_equal(self._mask, other._mask):

993 return False

994

995 left = self._data[~self._mask]

996 right = other._data[~other._mask]

997 return array_equivalent(left, right, dtype_equal=True)

998

999 def _quantile(

1000 self, qs: npt.NDArray[np.float64], interpolation: str

1001 ) -> BaseMaskedArray:

1002 """

1003 Dispatch to quantile_with_mask, needed because we do not have

1004 _from_factorized.

1005

1006 Notes

1007 -----

1008 We assume that all impacted cases are 1D-only.

1009 """

1010 res = quantile_with_mask(

1011 self._data,

1012 mask=self._mask,

1013 # TODO(GH#40932): na_value_for_dtype(self.dtype.numpy_dtype)

1014 # instead of np.nan

1015 fill_value=np.nan,

1016 qs=qs,

1017 interpolation=interpolation,

1018 )

1019

1020 if self._hasna:

1021 # Our result mask is all-False unless we are all-NA, in which

1022 # case it is all-True.

1023 if self.ndim == 2:

1024 # I think this should be out_mask=self.isna().all(axis=1)

1025 # but am holding off until we have tests

1026 raise NotImplementedError

1027 elif self.isna().all():

1028 out_mask = np.ones(res.shape, dtype=bool)

1029 else:

1030 out_mask = np.zeros(res.shape, dtype=bool)

1031 else:

1032 out_mask = np.zeros(res.shape, dtype=bool)

1033 return self._maybe_mask_result(res, mask=out_mask)

1034

1035 # ------------------------------------------------------------------

1036 # Reductions

1037

1038 def _reduce(self, name: str, *, skipna: bool = True, **kwargs):

1039 if name in {"any", "all", "min", "max", "sum", "prod"}:

1040 return getattr(self, name)(skipna=skipna, **kwargs)

1041

1042 data = self._data

1043 mask = self._mask

1044

1045 if name in {"mean"}:

1046 op = getattr(masked_reductions, name)

1047 result = op(data, mask, skipna=skipna, **kwargs)

1048 return result

1049

1050 # coerce to a nan-aware float if needed

1051 # (we explicitly use NaN within reductions)

1052 if self._hasna:

1053 data = self.to_numpy("float64", na_value=np.nan)

1054

1055 # median, var, std, skew, kurt, idxmin, idxmax

1056 op = getattr(nanops, "nan" + name)

1057 result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)

1058

1059 if np.isnan(result):

1060 return libmissing.NA

1061

1062 return result

1063

1064 def _wrap_reduction_result(self, name: str, result, skipna, **kwargs):

1065 if isinstance(result, np.ndarray):

1066 axis = kwargs["axis"]

1067 if skipna:

1068 # we only retain mask for all-NA rows/columns

1069 mask = self._mask.all(axis=axis)

1070 else:

1071 mask = self._mask.any(axis=axis)

1072

1073 return self._maybe_mask_result(result, mask)

1074 return result

1075

1076 def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):

1077 nv.validate_sum((), kwargs)

1078

1079 # TODO: do this in validate_sum?

1080 if "out" in kwargs:

1081 # np.sum; test_floating_array_numpy_sum

1082 if kwargs["out"] is not None:

1083 raise NotImplementedError

1084 kwargs.pop("out")

1085

1086 result = masked_reductions.sum(

1087 self._data,

1088 self._mask,

1089 skipna=skipna,

1090 min_count=min_count,

1091 axis=axis,

1092 )

1093 return self._wrap_reduction_result(

1094 "sum", result, skipna=skipna, axis=axis, **kwargs

1095 )

1096

1097 def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):

1098 nv.validate_prod((), kwargs)

1099 result = masked_reductions.prod(

1100 self._data,

1101 self._mask,

1102 skipna=skipna,

1103 min_count=min_count,

1104 axis=axis,

1105 )

1106 return self._wrap_reduction_result(

1107 "prod", result, skipna=skipna, axis=axis, **kwargs

1108 )

1109

1110 def min(self, *, skipna=True, axis: int | None = 0, **kwargs):

1111 nv.validate_min((), kwargs)

1112 return masked_reductions.min(

1113 self._data,

1114 self._mask,

1115 skipna=skipna,

1116 axis=axis,

1117 )

1118

1119 def max(self, *, skipna=True, axis: int | None = 0, **kwargs):

1120 nv.validate_max((), kwargs)

1121 return masked_reductions.max(

1122 self._data,

1123 self._mask,

1124 skipna=skipna,

1125 axis=axis,

1126 )

1127

1128 def any(self, *, skipna: bool = True, **kwargs):

1129 """

1130 Return whether any element is truthy.

1131

1132 Returns False unless there is at least one element that is truthy.

1133 By default, NAs are skipped. If ``skipna=False`` is specified and

1134 missing values are present, similar :ref:`Kleene logic <boolean.kleene>`

1135 is used as for logical operations.

1136

1137 .. versionchanged:: 1.4.0

1138

1139 Parameters

1140 ----------

1141 skipna : bool, default True

1142 Exclude NA values. If the entire array is NA and `skipna` is

1143 True, then the result will be False, as for an empty array.

1144 If `skipna` is False, the result will still be True if there is

1145 at least one element that is truthy, otherwise NA will be returned

1146 if there are NA's present.

1147 **kwargs : any, default None

1148 Additional keywords have no effect but might be accepted for

1149 compatibility with NumPy.

1150

1151 Returns

1152 -------

1153 bool or :attr:`pandas.NA`

1154

1155 See Also

1156 --------

1157 numpy.any : Numpy version of this method.

1158 BaseMaskedArray.all : Return whether all elements are truthy.

1159

1160 Examples

1161 --------

1162 The result indicates whether any element is truthy (and by default

1163 skips NAs):

1164

1165 >>> pd.array([True, False, True]).any()

1166 True

1167 >>> pd.array([True, False, pd.NA]).any()

1168 True

1169 >>> pd.array([False, False, pd.NA]).any()

1170 False

1171 >>> pd.array([], dtype="boolean").any()

1172 False

1173 >>> pd.array([pd.NA], dtype="boolean").any()

1174 False

1175 >>> pd.array([pd.NA], dtype="Float64").any()

1176 False

1177

1178 With ``skipna=False``, the result can be NA if this is logically

1179 required (whether ``pd.NA`` is True or False influences the result):

1180

1181 >>> pd.array([True, False, pd.NA]).any(skipna=False)

1182 True

1183 >>> pd.array([1, 0, pd.NA]).any(skipna=False)

1184 True

1185 >>> pd.array([False, False, pd.NA]).any(skipna=False)

1186 <NA>

1187 >>> pd.array([0, 0, pd.NA]).any(skipna=False)

1188 <NA>

1189 """

1190 kwargs.pop("axis", None)

1191 nv.validate_any((), kwargs)

1192

1193 values = self._data.copy()

1194 # error: Argument 3 to "putmask" has incompatible type "object";

1195 # expected "Union[_SupportsArray[dtype[Any]],

1196 # _NestedSequence[_SupportsArray[dtype[Any]]],

1197 # bool, int, float, complex, str, bytes,

1198 # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"

1199 np.putmask(values, self._mask, self._falsey_value) # type: ignore[arg-type]

1200 result = values.any()

1201 if skipna:

1202 return result

1203 else:

1204 if result or len(self) == 0 or not self._mask.any():

1205 return result

1206 else:

1207 return self.dtype.na_value

1208

1209 def all(self, *, skipna: bool = True, **kwargs):

1210 """

1211 Return whether all elements are truthy.

1212

1213 Returns True unless there is at least one element that is falsey.

1214 By default, NAs are skipped. If ``skipna=False`` is specified and

1215 missing values are present, similar :ref:`Kleene logic <boolean.kleene>`

1216 is used as for logical operations.

1217

1218 .. versionchanged:: 1.4.0

1219

1220 Parameters

1221 ----------

1222 skipna : bool, default True

1223 Exclude NA values. If the entire array is NA and `skipna` is

1224 True, then the result will be True, as for an empty array.

1225 If `skipna` is False, the result will still be False if there is

1226 at least one element that is falsey, otherwise NA will be returned

1227 if there are NA's present.

1228 **kwargs : any, default None

1229 Additional keywords have no effect but might be accepted for

1230 compatibility with NumPy.

1231

1232 Returns

1233 -------

1234 bool or :attr:`pandas.NA`

1235

1236 See Also

1237 --------

1238 numpy.all : Numpy version of this method.

1239 BooleanArray.any : Return whether any element is truthy.

1240

1241 Examples

1242 --------

1243 The result indicates whether all elements are truthy (and by default

1244 skips NAs):

1245

1246 >>> pd.array([True, True, pd.NA]).all()

1247 True

1248 >>> pd.array([1, 1, pd.NA]).all()

1249 True

1250 >>> pd.array([True, False, pd.NA]).all()

1251 False

1252 >>> pd.array([], dtype="boolean").all()

1253 True

1254 >>> pd.array([pd.NA], dtype="boolean").all()

1255 True

1256 >>> pd.array([pd.NA], dtype="Float64").all()

1257 True

1258

1259 With ``skipna=False``, the result can be NA if this is logically

1260 required (whether ``pd.NA`` is True or False influences the result):

1261

1262 >>> pd.array([True, True, pd.NA]).all(skipna=False)

1263 <NA>

1264 >>> pd.array([1, 1, pd.NA]).all(skipna=False)

1265 <NA>

1266 >>> pd.array([True, False, pd.NA]).all(skipna=False)

1267 False

1268 >>> pd.array([1, 0, pd.NA]).all(skipna=False)

1269 False

1270 """

1271 kwargs.pop("axis", None)

1272 nv.validate_all((), kwargs)

1273

1274 values = self._data.copy()

1275 # error: Argument 3 to "putmask" has incompatible type "object";

1276 # expected "Union[_SupportsArray[dtype[Any]],

1277 # _NestedSequence[_SupportsArray[dtype[Any]]],

1278 # bool, int, float, complex, str, bytes,

1279 # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"

1280 np.putmask(values, self._mask, self._truthy_value) # type: ignore[arg-type]

1281 result = values.all()

1282

1283 if skipna:

1284 return result

1285 else:

1286 if not result or len(self) == 0 or not self._mask.any():

1287 return result

1288 else:

1289 return self.dtype.na_value