Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/masked.py: 16%

537 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from typing import ( 

4 TYPE_CHECKING, 

5 Any, 

6 Literal, 

7 Sequence, 

8 TypeVar, 

9 overload, 

10) 

11import warnings 

12 

13import numpy as np 

14 

15from pandas._libs import ( 

16 lib, 

17 missing as libmissing, 

18) 

19from pandas._typing import ( 

20 ArrayLike, 

21 AstypeArg, 

22 DtypeObj, 

23 NpDtype, 

24 PositionalIndexer, 

25 Scalar, 

26 ScalarIndexer, 

27 SequenceIndexer, 

28 Shape, 

29 npt, 

30) 

31from pandas.errors import AbstractMethodError 

32from pandas.util._decorators import doc 

33from pandas.util._validators import validate_fillna_kwargs 

34 

35from pandas.core.dtypes.astype import astype_nansafe 

36from pandas.core.dtypes.base import ExtensionDtype 

37from pandas.core.dtypes.common import ( 

38 is_bool, 

39 is_bool_dtype, 

40 is_datetime64_dtype, 

41 is_dtype_equal, 

42 is_float_dtype, 

43 is_integer_dtype, 

44 is_list_like, 

45 is_object_dtype, 

46 is_scalar, 

47 is_string_dtype, 

48 pandas_dtype, 

49) 

50from pandas.core.dtypes.dtypes import BaseMaskedDtype 

51from pandas.core.dtypes.inference import is_array_like 

52from pandas.core.dtypes.missing import ( 

53 array_equivalent, 

54 is_valid_na_for_dtype, 

55 isna, 

56 notna, 

57) 

58 

59from pandas.core import ( 

60 algorithms as algos, 

61 arraylike, 

62 missing, 

63 nanops, 

64 ops, 

65) 

66from pandas.core.algorithms import ( 

67 factorize_array, 

68 isin, 

69 take, 

70) 

71from pandas.core.array_algos import masked_reductions 

72from pandas.core.array_algos.quantile import quantile_with_mask 

73from pandas.core.arraylike import OpsMixin 

74from pandas.core.arrays import ExtensionArray 

75from pandas.core.construction import ensure_wrapped_if_datetimelike 

76from pandas.core.indexers import check_array_indexer 

77from pandas.core.ops import invalid_comparison 

78 

79if TYPE_CHECKING: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true

80 from pandas import Series 

81 from pandas.core.arrays import BooleanArray 

82 from pandas._typing import ( 

83 NumpySorter, 

84 NumpyValueArrayLike, 

85 ) 

86 

87from pandas.compat.numpy import function as nv 

88 

89BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray") 

90 

91 

92class BaseMaskedArray(OpsMixin, ExtensionArray): 

93 """ 

94 Base class for masked arrays (which use _data and _mask to store the data). 

95 

96 numpy based 

97 """ 

98 

99 # The value used to fill '_data' to avoid upcasting 

100 _internal_fill_value: Scalar 

101 # our underlying data and mask are each ndarrays 

102 _data: np.ndarray 

103 _mask: npt.NDArray[np.bool_] 

104 

105 # Fill values used for any/all 

106 _truthy_value = Scalar # bool(_truthy_value) = True 

107 _falsey_value = Scalar # bool(_falsey_value) = False 

108 

109 def __init__( 

110 self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False 

111 ) -> None: 

112 # values is supposed to already be validated in the subclass 

113 if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_): 

114 raise TypeError( 

115 "mask should be boolean numpy array. Use " 

116 "the 'pd.array' function instead" 

117 ) 

118 if values.shape != mask.shape: 

119 raise ValueError("values.shape must match mask.shape") 

120 

121 if copy: 

122 values = values.copy() 

123 mask = mask.copy() 

124 

125 self._data = values 

126 self._mask = mask 

127 

128 @classmethod 

129 def _from_sequence( 

130 cls: type[BaseMaskedArrayT], scalars, *, dtype=None, copy: bool = False 

131 ) -> BaseMaskedArrayT: 

132 values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy) 

133 return cls(values, mask) 

134 

135 @property 

136 def dtype(self) -> BaseMaskedDtype: 

137 raise AbstractMethodError(self) 

138 

139 @overload 

140 def __getitem__(self, item: ScalarIndexer) -> Any: 

141 ... 

142 

143 @overload 

144 def __getitem__(self: BaseMaskedArrayT, item: SequenceIndexer) -> BaseMaskedArrayT: 

145 ... 

146 

147 def __getitem__( 

148 self: BaseMaskedArrayT, item: PositionalIndexer 

149 ) -> BaseMaskedArrayT | Any: 

150 item = check_array_indexer(self, item) 

151 

152 newmask = self._mask[item] 

153 if is_bool(newmask): 

154 # This is a scalar indexing 

155 if newmask: 

156 return self.dtype.na_value 

157 return self._data[item] 

158 

159 return type(self)(self._data[item], newmask) 

160 

161 @doc(ExtensionArray.fillna) 

162 def fillna( 

163 self: BaseMaskedArrayT, value=None, method=None, limit=None 

164 ) -> BaseMaskedArrayT: 

165 value, method = validate_fillna_kwargs(value, method) 

166 

167 mask = self._mask 

168 

169 if is_array_like(value): 

170 if len(value) != len(self): 

171 raise ValueError( 

172 f"Length of 'value' does not match. Got ({len(value)}) " 

173 f" expected {len(self)}" 

174 ) 

175 value = value[mask] 

176 

177 if mask.any(): 

178 if method is not None: 

179 func = missing.get_fill_func(method, ndim=self.ndim) 

180 npvalues = self._data.copy().T 

181 new_mask = mask.copy().T 

182 func(npvalues, limit=limit, mask=new_mask) 

183 return type(self)(npvalues.T, new_mask.T) 

184 else: 

185 # fill with value 

186 new_values = self.copy() 

187 new_values[mask] = value 

188 else: 

189 new_values = self.copy() 

190 return new_values 

191 

192 @classmethod 

193 def _coerce_to_array( 

194 cls, values, *, dtype: DtypeObj, copy: bool = False 

195 ) -> tuple[np.ndarray, np.ndarray]: 

196 raise AbstractMethodError(cls) 

197 

198 def _validate_setitem_value(self, value): 

199 """ 

200 Check if we have a scalar that we can cast losslessly. 

201 

202 Raises 

203 ------ 

204 TypeError 

205 """ 

206 kind = self.dtype.kind 

207 # TODO: get this all from np_can_hold_element? 

208 if kind == "b": 

209 if lib.is_bool(value): 

210 return value 

211 

212 elif kind == "f": 

213 if lib.is_integer(value) or lib.is_float(value): 

214 return value 

215 

216 else: 

217 if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()): 

218 return value 

219 # TODO: unsigned checks 

220 

221 # Note: without the "str" here, the f-string rendering raises in 

222 # py38 builds. 

223 raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}") 

224 

225 def __setitem__(self, key, value) -> None: 

226 key = check_array_indexer(self, key) 

227 

228 if is_scalar(value): 

229 if is_valid_na_for_dtype(value, self.dtype): 

230 self._mask[key] = True 

231 else: 

232 value = self._validate_setitem_value(value) 

233 self._data[key] = value 

234 self._mask[key] = False 

235 return 

236 

237 value, mask = self._coerce_to_array(value, dtype=self.dtype) 

238 

239 self._data[key] = value 

240 self._mask[key] = mask 

241 

242 def __iter__(self): 

243 if self.ndim == 1: 

244 for i in range(len(self)): 

245 if self._mask[i]: 

246 yield self.dtype.na_value 

247 else: 

248 yield self._data[i] 

249 else: 

250 for i in range(len(self)): 

251 yield self[i] 

252 

253 def __len__(self) -> int: 

254 return len(self._data) 

255 

256 @property 

257 def shape(self) -> Shape: 

258 return self._data.shape 

259 

260 @property 

261 def ndim(self) -> int: 

262 return self._data.ndim 

263 

264 def swapaxes(self: BaseMaskedArrayT, axis1, axis2) -> BaseMaskedArrayT: 

265 data = self._data.swapaxes(axis1, axis2) 

266 mask = self._mask.swapaxes(axis1, axis2) 

267 return type(self)(data, mask) 

268 

269 def delete(self: BaseMaskedArrayT, loc, axis: int = 0) -> BaseMaskedArrayT: 

270 data = np.delete(self._data, loc, axis=axis) 

271 mask = np.delete(self._mask, loc, axis=axis) 

272 return type(self)(data, mask) 

273 

274 def reshape(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT: 

275 data = self._data.reshape(*args, **kwargs) 

276 mask = self._mask.reshape(*args, **kwargs) 

277 return type(self)(data, mask) 

278 

279 def ravel(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT: 

280 # TODO: need to make sure we have the same order for data/mask 

281 data = self._data.ravel(*args, **kwargs) 

282 mask = self._mask.ravel(*args, **kwargs) 

283 return type(self)(data, mask) 

284 

285 @property 

286 def T(self: BaseMaskedArrayT) -> BaseMaskedArrayT: 

287 return type(self)(self._data.T, self._mask.T) 

288 

289 def round(self, decimals: int = 0, *args, **kwargs): 

290 """ 

291 Round each value in the array a to the given number of decimals. 

292 

293 Parameters 

294 ---------- 

295 decimals : int, default 0 

296 Number of decimal places to round to. If decimals is negative, 

297 it specifies the number of positions to the left of the decimal point. 

298 *args, **kwargs 

299 Additional arguments and keywords have no effect but might be 

300 accepted for compatibility with NumPy. 

301 

302 Returns 

303 ------- 

304 NumericArray 

305 Rounded values of the NumericArray. 

306 

307 See Also 

308 -------- 

309 numpy.around : Round values of an np.array. 

310 DataFrame.round : Round values of a DataFrame. 

311 Series.round : Round values of a Series. 

312 """ 

313 nv.validate_round(args, kwargs) 

314 values = np.round(self._data, decimals=decimals, **kwargs) 

315 

316 # Usually we'll get same type as self, but ndarray[bool] casts to float 

317 return self._maybe_mask_result(values, self._mask.copy()) 

318 

319 # ------------------------------------------------------------------ 

320 # Unary Methods 

321 

322 def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: 

323 return type(self)(~self._data, self._mask.copy()) 

324 

325 def __neg__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: 

326 return type(self)(-self._data, self._mask.copy()) 

327 

328 def __pos__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: 

329 return self.copy() 

330 

331 def __abs__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: 

332 return type(self)(abs(self._data), self._mask.copy()) 

333 

334 # ------------------------------------------------------------------ 

335 

336 def to_numpy( 

337 self, 

338 dtype: npt.DTypeLike | None = None, 

339 copy: bool = False, 

340 na_value: object = lib.no_default, 

341 ) -> np.ndarray: 

342 """ 

343 Convert to a NumPy Array. 

344 

345 By default converts to an object-dtype NumPy array. Specify the `dtype` and 

346 `na_value` keywords to customize the conversion. 

347 

348 Parameters 

349 ---------- 

350 dtype : dtype, default object 

351 The numpy dtype to convert to. 

352 copy : bool, default False 

353 Whether to ensure that the returned value is a not a view on 

354 the array. Note that ``copy=False`` does not *ensure* that 

355 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that 

356 a copy is made, even if not strictly necessary. This is typically 

357 only possible when no missing values are present and `dtype` 

358 is the equivalent numpy dtype. 

359 na_value : scalar, optional 

360 Scalar missing value indicator to use in numpy array. Defaults 

361 to the native missing value indicator of this array (pd.NA). 

362 

363 Returns 

364 ------- 

365 numpy.ndarray 

366 

367 Examples 

368 -------- 

369 An object-dtype is the default result 

370 

371 >>> a = pd.array([True, False, pd.NA], dtype="boolean") 

372 >>> a.to_numpy() 

373 array([True, False, <NA>], dtype=object) 

374 

375 When no missing values are present, an equivalent dtype can be used. 

376 

377 >>> pd.array([True, False], dtype="boolean").to_numpy(dtype="bool") 

378 array([ True, False]) 

379 >>> pd.array([1, 2], dtype="Int64").to_numpy("int64") 

380 array([1, 2]) 

381 

382 However, requesting such dtype will raise a ValueError if 

383 missing values are present and the default missing value :attr:`NA` 

384 is used. 

385 

386 >>> a = pd.array([True, False, pd.NA], dtype="boolean") 

387 >>> a 

388 <BooleanArray> 

389 [True, False, <NA>] 

390 Length: 3, dtype: boolean 

391 

392 >>> a.to_numpy(dtype="bool") 

393 Traceback (most recent call last): 

394 ... 

395 ValueError: cannot convert to bool numpy array in presence of missing values 

396 

397 Specify a valid `na_value` instead 

398 

399 >>> a.to_numpy(dtype="bool", na_value=False) 

400 array([ True, False, False]) 

401 """ 

402 if na_value is lib.no_default: 

403 na_value = libmissing.NA 

404 if dtype is None: 

405 dtype = object 

406 if self._hasna: 

407 if ( 

408 not is_object_dtype(dtype) 

409 and not is_string_dtype(dtype) 

410 and na_value is libmissing.NA 

411 ): 

412 raise ValueError( 

413 f"cannot convert to '{dtype}'-dtype NumPy array " 

414 "with missing values. Specify an appropriate 'na_value' " 

415 "for this dtype." 

416 ) 

417 # don't pass copy to astype -> always need a copy since we are mutating 

418 data = self._data.astype(dtype) 

419 data[self._mask] = na_value 

420 else: 

421 data = self._data.astype(dtype, copy=copy) 

422 return data 

423 

424 @overload 

425 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: 

426 ... 

427 

428 @overload 

429 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: 

430 ... 

431 

432 @overload 

433 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: 

434 ... 

435 

436 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: 

437 dtype = pandas_dtype(dtype) 

438 

439 if is_dtype_equal(dtype, self.dtype): 

440 if copy: 

441 return self.copy() 

442 return self 

443 

444 # if we are astyping to another nullable masked dtype, we can fastpath 

445 if isinstance(dtype, BaseMaskedDtype): 

446 # TODO deal with NaNs for FloatingArray case 

447 data = self._data.astype(dtype.numpy_dtype, copy=copy) 

448 # mask is copied depending on whether the data was copied, and 

449 # not directly depending on the `copy` keyword 

450 mask = self._mask if data is self._data else self._mask.copy() 

451 cls = dtype.construct_array_type() 

452 return cls(data, mask, copy=False) 

453 

454 if isinstance(dtype, ExtensionDtype): 

455 eacls = dtype.construct_array_type() 

456 return eacls._from_sequence(self, dtype=dtype, copy=copy) 

457 

458 na_value: float | np.datetime64 | lib.NoDefault 

459 

460 # coerce 

461 if is_float_dtype(dtype): 

462 # In astype, we consider dtype=float to also mean na_value=np.nan 

463 na_value = np.nan 

464 elif is_datetime64_dtype(dtype): 

465 na_value = np.datetime64("NaT") 

466 else: 

467 na_value = lib.no_default 

468 

469 # to_numpy will also raise, but we get somewhat nicer exception messages here 

470 if is_integer_dtype(dtype) and self._hasna: 

471 raise ValueError("cannot convert NA to integer") 

472 if is_bool_dtype(dtype) and self._hasna: 

473 # careful: astype_nansafe converts np.nan to True 

474 raise ValueError("cannot convert float NaN to bool") 

475 

476 data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy) 

477 if self.dtype.kind == "f": 

478 # TODO: make this consistent between IntegerArray/FloatingArray, 

479 # see test_astype_str 

480 return astype_nansafe(data, dtype, copy=False) 

481 return data 

482 

483 __array_priority__ = 1000 # higher than ndarray so ops dispatch to us 

484 

485 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: 

486 """ 

487 the array interface, return my values 

488 We return an object array here to preserve our scalar values 

489 """ 

490 return self.to_numpy(dtype=dtype) 

491 

492 _HANDLED_TYPES: tuple[type, ...] 

493 

494 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

495 # For MaskedArray inputs, we apply the ufunc to ._data 

496 # and mask the result. 

497 

498 out = kwargs.get("out", ()) 

499 

500 for x in inputs + out: 

501 if not isinstance(x, self._HANDLED_TYPES + (BaseMaskedArray,)): 

502 return NotImplemented 

503 

504 # for binary ops, use our custom dunder methods 

505 result = ops.maybe_dispatch_ufunc_to_dunder_op( 

506 self, ufunc, method, *inputs, **kwargs 

507 ) 

508 if result is not NotImplemented: 

509 return result 

510 

511 if "out" in kwargs: 

512 # e.g. test_ufunc_with_out 

513 return arraylike.dispatch_ufunc_with_out( 

514 self, ufunc, method, *inputs, **kwargs 

515 ) 

516 

517 if method == "reduce": 

518 result = arraylike.dispatch_reduction_ufunc( 

519 self, ufunc, method, *inputs, **kwargs 

520 ) 

521 if result is not NotImplemented: 

522 return result 

523 

524 mask = np.zeros(len(self), dtype=bool) 

525 inputs2 = [] 

526 for x in inputs: 

527 if isinstance(x, BaseMaskedArray): 

528 mask |= x._mask 

529 inputs2.append(x._data) 

530 else: 

531 inputs2.append(x) 

532 

533 def reconstruct(x): 

534 # we don't worry about scalar `x` here, since we 

535 # raise for reduce up above. 

536 from pandas.core.arrays import ( 

537 BooleanArray, 

538 FloatingArray, 

539 IntegerArray, 

540 ) 

541 

542 if is_bool_dtype(x.dtype): 

543 m = mask.copy() 

544 return BooleanArray(x, m) 

545 elif is_integer_dtype(x.dtype): 

546 m = mask.copy() 

547 return IntegerArray(x, m) 

548 elif is_float_dtype(x.dtype): 

549 m = mask.copy() 

550 if x.dtype == np.float16: 

551 # reached in e.g. np.sqrt on BooleanArray 

552 # we don't support float16 

553 x = x.astype(np.float32) 

554 return FloatingArray(x, m) 

555 else: 

556 x[mask] = np.nan 

557 return x 

558 

559 result = getattr(ufunc, method)(*inputs2, **kwargs) 

560 if ufunc.nout > 1: 

561 # e.g. np.divmod 

562 return tuple(reconstruct(x) for x in result) 

563 elif method == "reduce": 

564 # e.g. np.add.reduce; test_ufunc_reduce_raises 

565 if self._mask.any(): 

566 return self._na_value 

567 return result 

568 else: 

569 return reconstruct(result) 

570 

571 def __arrow_array__(self, type=None): 

572 """ 

573 Convert myself into a pyarrow Array. 

574 """ 

575 import pyarrow as pa 

576 

577 return pa.array(self._data, mask=self._mask, type=type) 

578 

579 @property 

580 def _hasna(self) -> bool: 

581 # Note: this is expensive right now! The hope is that we can 

582 # make this faster by having an optional mask, but not have to change 

583 # source code using it.. 

584 

585 # error: Incompatible return value type (got "bool_", expected "bool") 

586 return self._mask.any() # type: ignore[return-value] 

587 

588 def _propagate_mask( 

589 self, mask: npt.NDArray[np.bool_] | None, other 

590 ) -> npt.NDArray[np.bool_]: 

591 if mask is None: 

592 mask = self._mask.copy() # TODO: need test for BooleanArray needing a copy 

593 if other is libmissing.NA: 

594 # GH#45421 don't alter inplace 

595 mask = mask | True 

596 else: 

597 mask = self._mask | mask 

598 return mask 

599 

600 def _arith_method(self, other, op): 

601 op_name = op.__name__ 

602 omask = None 

603 

604 if isinstance(other, BaseMaskedArray): 

605 other, omask = other._data, other._mask 

606 

607 elif is_list_like(other): 

608 if not isinstance(other, ExtensionArray): 

609 other = np.asarray(other) 

610 if other.ndim > 1: 

611 raise NotImplementedError("can only perform ops with 1-d structures") 

612 

613 # We wrap the non-masked arithmetic logic used for numpy dtypes 

614 # in Series/Index arithmetic ops. 

615 other = ops.maybe_prepare_scalar_for_op(other, (len(self),)) 

616 pd_op = ops.get_array_op(op) 

617 other = ensure_wrapped_if_datetimelike(other) 

618 

619 if op_name in {"pow", "rpow"} and isinstance(other, np.bool_): 

620 # Avoid DeprecationWarning: In future, it will be an error 

621 # for 'np.bool_' scalars to be interpreted as an index 

622 # e.g. test_array_scalar_like_equivalence 

623 other = bool(other) 

624 

625 mask = self._propagate_mask(omask, other) 

626 

627 if other is libmissing.NA: 

628 result = np.ones_like(self._data) 

629 if self.dtype.kind == "b": 

630 if op_name in { 

631 "floordiv", 

632 "rfloordiv", 

633 "pow", 

634 "rpow", 

635 "truediv", 

636 "rtruediv", 

637 }: 

638 # GH#41165 Try to match non-masked Series behavior 

639 # This is still imperfect GH#46043 

640 raise NotImplementedError( 

641 f"operator '{op_name}' not implemented for bool dtypes" 

642 ) 

643 elif op_name in {"mod", "rmod"}: 

644 dtype = "int8" 

645 else: 

646 dtype = "bool" 

647 result = result.astype(dtype) 

648 elif "truediv" in op_name and self.dtype.kind != "f": 

649 # The actual data here doesn't matter since the mask 

650 # will be all-True, but since this is division, we want 

651 # to end up with floating dtype. 

652 result = result.astype(np.float64) 

653 else: 

654 # Make sure we do this before the "pow" mask checks 

655 # to get an expected exception message on shape mismatch. 

656 if self.dtype.kind in ["i", "u"] and op_name in ["floordiv", "mod"]: 

657 # TODO(GH#30188) ATM we don't match the behavior of non-masked 

658 # types with respect to floordiv-by-zero 

659 pd_op = op 

660 

661 with np.errstate(all="ignore"): 

662 result = pd_op(self._data, other) 

663 

664 if op_name == "pow": 

665 # 1 ** x is 1. 

666 mask = np.where((self._data == 1) & ~self._mask, False, mask) 

667 # x ** 0 is 1. 

668 if omask is not None: 

669 mask = np.where((other == 0) & ~omask, False, mask) 

670 elif other is not libmissing.NA: 

671 mask = np.where(other == 0, False, mask) 

672 

673 elif op_name == "rpow": 

674 # 1 ** x is 1. 

675 if omask is not None: 

676 mask = np.where((other == 1) & ~omask, False, mask) 

677 elif other is not libmissing.NA: 

678 mask = np.where(other == 1, False, mask) 

679 # x ** 0 is 1. 

680 mask = np.where((self._data == 0) & ~self._mask, False, mask) 

681 

682 return self._maybe_mask_result(result, mask) 

683 

684 _logical_method = _arith_method 

685 

686 def _cmp_method(self, other, op) -> BooleanArray: 

687 from pandas.core.arrays import BooleanArray 

688 

689 mask = None 

690 

691 if isinstance(other, BaseMaskedArray): 

692 other, mask = other._data, other._mask 

693 

694 elif is_list_like(other): 

695 other = np.asarray(other) 

696 if other.ndim > 1: 

697 raise NotImplementedError("can only perform ops with 1-d structures") 

698 if len(self) != len(other): 

699 raise ValueError("Lengths must match to compare") 

700 

701 if other is libmissing.NA: 

702 # numpy does not handle pd.NA well as "other" scalar (it returns 

703 # a scalar False instead of an array) 

704 # This may be fixed by NA.__array_ufunc__. Revisit this check 

705 # once that's implemented. 

706 result = np.zeros(self._data.shape, dtype="bool") 

707 mask = np.ones(self._data.shape, dtype="bool") 

708 else: 

709 with warnings.catch_warnings(): 

710 # numpy may show a FutureWarning: 

711 # elementwise comparison failed; returning scalar instead, 

712 # but in the future will perform elementwise comparison 

713 # before returning NotImplemented. We fall back to the correct 

714 # behavior today, so that should be fine to ignore. 

715 warnings.filterwarnings("ignore", "elementwise", FutureWarning) 

716 with np.errstate(all="ignore"): 

717 method = getattr(self._data, f"__{op.__name__}__") 

718 result = method(other) 

719 

720 if result is NotImplemented: 

721 result = invalid_comparison(self._data, other, op) 

722 

723 mask = self._propagate_mask(mask, other) 

724 return BooleanArray(result, mask, copy=False) 

725 

726 def _maybe_mask_result(self, result, mask): 

727 """ 

728 Parameters 

729 ---------- 

730 result : array-like or tuple[array-like] 

731 mask : array-like bool 

732 """ 

733 if isinstance(result, tuple): 

734 # i.e. divmod 

735 div, mod = result 

736 return ( 

737 self._maybe_mask_result(div, mask), 

738 self._maybe_mask_result(mod, mask), 

739 ) 

740 

741 if is_float_dtype(result.dtype): 

742 from pandas.core.arrays import FloatingArray 

743 

744 return FloatingArray(result, mask, copy=False) 

745 

746 elif is_bool_dtype(result.dtype): 

747 from pandas.core.arrays import BooleanArray 

748 

749 return BooleanArray(result, mask, copy=False) 

750 

751 elif result.dtype == "timedelta64[ns]": 

752 # e.g. test_numeric_arr_mul_tdscalar_numexpr_path 

753 from pandas.core.arrays import TimedeltaArray 

754 

755 if not isinstance(result, TimedeltaArray): 

756 result = TimedeltaArray._simple_new(result) 

757 

758 result[mask] = result.dtype.type("NaT") 

759 return result 

760 

761 elif is_integer_dtype(result.dtype): 

762 from pandas.core.arrays import IntegerArray 

763 

764 return IntegerArray(result, mask, copy=False) 

765 

766 else: 

767 result[mask] = np.nan 

768 return result 

769 

770 def isna(self) -> np.ndarray: 

771 return self._mask.copy() 

772 

773 @property 

774 def _na_value(self): 

775 return self.dtype.na_value 

776 

777 @property 

778 def nbytes(self) -> int: 

779 return self._data.nbytes + self._mask.nbytes 

780 

781 @classmethod 

782 def _concat_same_type( 

783 cls: type[BaseMaskedArrayT], 

784 to_concat: Sequence[BaseMaskedArrayT], 

785 axis: int = 0, 

786 ) -> BaseMaskedArrayT: 

787 data = np.concatenate([x._data for x in to_concat], axis=axis) 

788 mask = np.concatenate([x._mask for x in to_concat], axis=axis) 

789 return cls(data, mask) 

790 

791 def take( 

792 self: BaseMaskedArrayT, 

793 indexer, 

794 *, 

795 allow_fill: bool = False, 

796 fill_value: Scalar | None = None, 

797 axis: int = 0, 

798 ) -> BaseMaskedArrayT: 

799 # we always fill with 1 internally 

800 # to avoid upcasting 

801 data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value 

802 result = take( 

803 self._data, 

804 indexer, 

805 fill_value=data_fill_value, 

806 allow_fill=allow_fill, 

807 axis=axis, 

808 ) 

809 

810 mask = take( 

811 self._mask, indexer, fill_value=True, allow_fill=allow_fill, axis=axis 

812 ) 

813 

814 # if we are filling 

815 # we only fill where the indexer is null 

816 # not existing missing values 

817 # TODO(jreback) what if we have a non-na float as a fill value? 

818 if allow_fill and notna(fill_value): 

819 fill_mask = np.asarray(indexer) == -1 

820 result[fill_mask] = fill_value 

821 mask = mask ^ fill_mask 

822 

823 return type(self)(result, mask, copy=False) 

824 

825 # error: Return type "BooleanArray" of "isin" incompatible with return type 

826 # "ndarray" in supertype "ExtensionArray" 

827 def isin(self, values) -> BooleanArray: # type: ignore[override] 

828 

829 from pandas.core.arrays import BooleanArray 

830 

831 # algorithms.isin will eventually convert values to an ndarray, so no extra 

832 # cost to doing it here first 

833 values_arr = np.asarray(values) 

834 result = isin(self._data, values_arr) 

835 

836 if self._hasna: 

837 values_have_NA = is_object_dtype(values_arr.dtype) and any( 

838 val is self.dtype.na_value for val in values_arr 

839 ) 

840 

841 # For now, NA does not propagate so set result according to presence of NA, 

842 # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion 

843 result[self._mask] = values_have_NA 

844 

845 mask = np.zeros(self._data.shape, dtype=bool) 

846 return BooleanArray(result, mask, copy=False) 

847 

848 def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT: 

849 data, mask = self._data, self._mask 

850 data = data.copy() 

851 mask = mask.copy() 

852 return type(self)(data, mask, copy=False) 

853 

854 def unique(self: BaseMaskedArrayT) -> BaseMaskedArrayT: 

855 """ 

856 Compute the BaseMaskedArray of unique values. 

857 

858 Returns 

859 ------- 

860 uniques : BaseMaskedArray 

861 """ 

862 uniques, mask = algos.unique_with_mask(self._data, self._mask) 

863 return type(self)(uniques, mask, copy=False) 

864 

865 @doc(ExtensionArray.searchsorted) 

866 def searchsorted( 

867 self, 

868 value: NumpyValueArrayLike | ExtensionArray, 

869 side: Literal["left", "right"] = "left", 

870 sorter: NumpySorter = None, 

871 ) -> npt.NDArray[np.intp] | np.intp: 

872 if self._hasna: 

873 raise ValueError( 

874 "searchsorted requires array to be sorted, which is impossible " 

875 "with NAs present." 

876 ) 

877 if isinstance(value, ExtensionArray): 

878 value = value.astype(object) 

879 # Base class searchsorted would cast to object, which is *much* slower. 

880 return self._data.searchsorted(value, side=side, sorter=sorter) 

881 

882 @doc(ExtensionArray.factorize) 

883 def factorize( 

884 self, 

885 na_sentinel: int | lib.NoDefault = lib.no_default, 

886 use_na_sentinel: bool | lib.NoDefault = lib.no_default, 

887 ) -> tuple[np.ndarray, ExtensionArray]: 

888 resolved_na_sentinel = algos.resolve_na_sentinel(na_sentinel, use_na_sentinel) 

889 arr = self._data 

890 mask = self._mask 

891 

892 # Pass non-None na_sentinel; recode and add NA to uniques if necessary below 

893 na_sentinel_arg = -1 if resolved_na_sentinel is None else resolved_na_sentinel 

894 codes, uniques = factorize_array(arr, na_sentinel=na_sentinel_arg, mask=mask) 

895 

896 # check that factorize_array correctly preserves dtype. 

897 assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype, self.dtype) 

898 

899 has_na = mask.any() 

900 if resolved_na_sentinel is not None or not has_na: 

901 size = len(uniques) 

902 else: 

903 # Make room for an NA value 

904 size = len(uniques) + 1 

905 uniques_mask = np.zeros(size, dtype=bool) 

906 if resolved_na_sentinel is None and has_na: 

907 na_index = mask.argmax() 

908 # Insert na with the proper code 

909 if na_index == 0: 

910 na_code = np.intp(0) 

911 else: 

912 # mypy error: Slice index must be an integer or None 

913 # https://github.com/python/mypy/issues/2410 

914 na_code = codes[:na_index].max() + 1 # type: ignore[misc] 

915 codes[codes >= na_code] += 1 

916 codes[codes == -1] = na_code 

917 # dummy value for uniques; not used since uniques_mask will be True 

918 uniques = np.insert(uniques, na_code, 0) 

919 uniques_mask[na_code] = True 

920 uniques_ea = type(self)(uniques, uniques_mask) 

921 

922 return codes, uniques_ea 

923 

924 @doc(ExtensionArray._values_for_argsort) 

925 def _values_for_argsort(self) -> np.ndarray: 

926 return self._data 

927 

928 def value_counts(self, dropna: bool = True) -> Series: 

929 """ 

930 Returns a Series containing counts of each unique value. 

931 

932 Parameters 

933 ---------- 

934 dropna : bool, default True 

935 Don't include counts of missing values. 

936 

937 Returns 

938 ------- 

939 counts : Series 

940 

941 See Also 

942 -------- 

943 Series.value_counts 

944 """ 

945 from pandas import ( 

946 Index, 

947 Series, 

948 ) 

949 from pandas.arrays import IntegerArray 

950 

951 if dropna: 

952 keys, counts = algos.value_counts_arraylike( 

953 self._data, dropna=True, mask=self._mask 

954 ) 

955 res = Series(counts, index=keys) 

956 res.index = res.index.astype(self.dtype) 

957 res = res.astype("Int64") 

958 return res 

959 

960 # compute counts on the data with no nans 

961 data = self._data[~self._mask] 

962 value_counts = Index(data).value_counts() 

963 

964 index = value_counts.index 

965 

966 # if we want nans, count the mask 

967 if dropna: 

968 counts = value_counts._values 

969 else: 

970 counts = np.empty(len(value_counts) + 1, dtype="int64") 

971 counts[:-1] = value_counts 

972 counts[-1] = self._mask.sum() 

973 

974 index = index.insert(len(index), self.dtype.na_value) 

975 

976 index = index.astype(self.dtype) 

977 

978 mask = np.zeros(len(counts), dtype="bool") 

979 counts_array = IntegerArray(counts, mask) 

980 

981 return Series(counts_array, index=index) 

982 

983 @doc(ExtensionArray.equals) 

984 def equals(self, other) -> bool: 

985 if type(self) != type(other): 

986 return False 

987 if other.dtype != self.dtype: 

988 return False 

989 

990 # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT 

991 # equal. 

992 if not np.array_equal(self._mask, other._mask): 

993 return False 

994 

995 left = self._data[~self._mask] 

996 right = other._data[~other._mask] 

997 return array_equivalent(left, right, dtype_equal=True) 

998 

999 def _quantile( 

1000 self, qs: npt.NDArray[np.float64], interpolation: str 

1001 ) -> BaseMaskedArray: 

1002 """ 

1003 Dispatch to quantile_with_mask, needed because we do not have 

1004 _from_factorized. 

1005 

1006 Notes 

1007 ----- 

1008 We assume that all impacted cases are 1D-only. 

1009 """ 

1010 res = quantile_with_mask( 

1011 self._data, 

1012 mask=self._mask, 

1013 # TODO(GH#40932): na_value_for_dtype(self.dtype.numpy_dtype) 

1014 # instead of np.nan 

1015 fill_value=np.nan, 

1016 qs=qs, 

1017 interpolation=interpolation, 

1018 ) 

1019 

1020 if self._hasna: 

1021 # Our result mask is all-False unless we are all-NA, in which 

1022 # case it is all-True. 

1023 if self.ndim == 2: 

1024 # I think this should be out_mask=self.isna().all(axis=1) 

1025 # but am holding off until we have tests 

1026 raise NotImplementedError 

1027 elif self.isna().all(): 

1028 out_mask = np.ones(res.shape, dtype=bool) 

1029 else: 

1030 out_mask = np.zeros(res.shape, dtype=bool) 

1031 else: 

1032 out_mask = np.zeros(res.shape, dtype=bool) 

1033 return self._maybe_mask_result(res, mask=out_mask) 

1034 

1035 # ------------------------------------------------------------------ 

1036 # Reductions 

1037 

1038 def _reduce(self, name: str, *, skipna: bool = True, **kwargs): 

1039 if name in {"any", "all", "min", "max", "sum", "prod"}: 

1040 return getattr(self, name)(skipna=skipna, **kwargs) 

1041 

1042 data = self._data 

1043 mask = self._mask 

1044 

1045 if name in {"mean"}: 

1046 op = getattr(masked_reductions, name) 

1047 result = op(data, mask, skipna=skipna, **kwargs) 

1048 return result 

1049 

1050 # coerce to a nan-aware float if needed 

1051 # (we explicitly use NaN within reductions) 

1052 if self._hasna: 

1053 data = self.to_numpy("float64", na_value=np.nan) 

1054 

1055 # median, var, std, skew, kurt, idxmin, idxmax 

1056 op = getattr(nanops, "nan" + name) 

1057 result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) 

1058 

1059 if np.isnan(result): 

1060 return libmissing.NA 

1061 

1062 return result 

1063 

1064 def _wrap_reduction_result(self, name: str, result, skipna, **kwargs): 

1065 if isinstance(result, np.ndarray): 

1066 axis = kwargs["axis"] 

1067 if skipna: 

1068 # we only retain mask for all-NA rows/columns 

1069 mask = self._mask.all(axis=axis) 

1070 else: 

1071 mask = self._mask.any(axis=axis) 

1072 

1073 return self._maybe_mask_result(result, mask) 

1074 return result 

1075 

1076 def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): 

1077 nv.validate_sum((), kwargs) 

1078 

1079 # TODO: do this in validate_sum? 

1080 if "out" in kwargs: 

1081 # np.sum; test_floating_array_numpy_sum 

1082 if kwargs["out"] is not None: 

1083 raise NotImplementedError 

1084 kwargs.pop("out") 

1085 

1086 result = masked_reductions.sum( 

1087 self._data, 

1088 self._mask, 

1089 skipna=skipna, 

1090 min_count=min_count, 

1091 axis=axis, 

1092 ) 

1093 return self._wrap_reduction_result( 

1094 "sum", result, skipna=skipna, axis=axis, **kwargs 

1095 ) 

1096 

1097 def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): 

1098 nv.validate_prod((), kwargs) 

1099 result = masked_reductions.prod( 

1100 self._data, 

1101 self._mask, 

1102 skipna=skipna, 

1103 min_count=min_count, 

1104 axis=axis, 

1105 ) 

1106 return self._wrap_reduction_result( 

1107 "prod", result, skipna=skipna, axis=axis, **kwargs 

1108 ) 

1109 

1110 def min(self, *, skipna=True, axis: int | None = 0, **kwargs): 

1111 nv.validate_min((), kwargs) 

1112 return masked_reductions.min( 

1113 self._data, 

1114 self._mask, 

1115 skipna=skipna, 

1116 axis=axis, 

1117 ) 

1118 

1119 def max(self, *, skipna=True, axis: int | None = 0, **kwargs): 

1120 nv.validate_max((), kwargs) 

1121 return masked_reductions.max( 

1122 self._data, 

1123 self._mask, 

1124 skipna=skipna, 

1125 axis=axis, 

1126 ) 

1127 

1128 def any(self, *, skipna: bool = True, **kwargs): 

1129 """ 

1130 Return whether any element is truthy. 

1131 

1132 Returns False unless there is at least one element that is truthy. 

1133 By default, NAs are skipped. If ``skipna=False`` is specified and 

1134 missing values are present, similar :ref:`Kleene logic <boolean.kleene>` 

1135 is used as for logical operations. 

1136 

1137 .. versionchanged:: 1.4.0 

1138 

1139 Parameters 

1140 ---------- 

1141 skipna : bool, default True 

1142 Exclude NA values. If the entire array is NA and `skipna` is 

1143 True, then the result will be False, as for an empty array. 

1144 If `skipna` is False, the result will still be True if there is 

1145 at least one element that is truthy, otherwise NA will be returned 

1146 if there are NA's present. 

1147 **kwargs : any, default None 

1148 Additional keywords have no effect but might be accepted for 

1149 compatibility with NumPy. 

1150 

1151 Returns 

1152 ------- 

1153 bool or :attr:`pandas.NA` 

1154 

1155 See Also 

1156 -------- 

1157 numpy.any : Numpy version of this method. 

1158 BaseMaskedArray.all : Return whether all elements are truthy. 

1159 

1160 Examples 

1161 -------- 

1162 The result indicates whether any element is truthy (and by default 

1163 skips NAs): 

1164 

1165 >>> pd.array([True, False, True]).any() 

1166 True 

1167 >>> pd.array([True, False, pd.NA]).any() 

1168 True 

1169 >>> pd.array([False, False, pd.NA]).any() 

1170 False 

1171 >>> pd.array([], dtype="boolean").any() 

1172 False 

1173 >>> pd.array([pd.NA], dtype="boolean").any() 

1174 False 

1175 >>> pd.array([pd.NA], dtype="Float64").any() 

1176 False 

1177 

1178 With ``skipna=False``, the result can be NA if this is logically 

1179 required (whether ``pd.NA`` is True or False influences the result): 

1180 

1181 >>> pd.array([True, False, pd.NA]).any(skipna=False) 

1182 True 

1183 >>> pd.array([1, 0, pd.NA]).any(skipna=False) 

1184 True 

1185 >>> pd.array([False, False, pd.NA]).any(skipna=False) 

1186 <NA> 

1187 >>> pd.array([0, 0, pd.NA]).any(skipna=False) 

1188 <NA> 

1189 """ 

1190 kwargs.pop("axis", None) 

1191 nv.validate_any((), kwargs) 

1192 

1193 values = self._data.copy() 

1194 # error: Argument 3 to "putmask" has incompatible type "object"; 

1195 # expected "Union[_SupportsArray[dtype[Any]], 

1196 # _NestedSequence[_SupportsArray[dtype[Any]]], 

1197 # bool, int, float, complex, str, bytes, 

1198 # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]" 

1199 np.putmask(values, self._mask, self._falsey_value) # type: ignore[arg-type] 

1200 result = values.any() 

1201 if skipna: 

1202 return result 

1203 else: 

1204 if result or len(self) == 0 or not self._mask.any(): 

1205 return result 

1206 else: 

1207 return self.dtype.na_value 

1208 

1209 def all(self, *, skipna: bool = True, **kwargs): 

1210 """ 

1211 Return whether all elements are truthy. 

1212 

1213 Returns True unless there is at least one element that is falsey. 

1214 By default, NAs are skipped. If ``skipna=False`` is specified and 

1215 missing values are present, similar :ref:`Kleene logic <boolean.kleene>` 

1216 is used as for logical operations. 

1217 

1218 .. versionchanged:: 1.4.0 

1219 

1220 Parameters 

1221 ---------- 

1222 skipna : bool, default True 

1223 Exclude NA values. If the entire array is NA and `skipna` is 

1224 True, then the result will be True, as for an empty array. 

1225 If `skipna` is False, the result will still be False if there is 

1226 at least one element that is falsey, otherwise NA will be returned 

1227 if there are NA's present. 

1228 **kwargs : any, default None 

1229 Additional keywords have no effect but might be accepted for 

1230 compatibility with NumPy. 

1231 

1232 Returns 

1233 ------- 

1234 bool or :attr:`pandas.NA` 

1235 

1236 See Also 

1237 -------- 

1238 numpy.all : Numpy version of this method. 

1239 BooleanArray.any : Return whether any element is truthy. 

1240 

1241 Examples 

1242 -------- 

1243 The result indicates whether all elements are truthy (and by default 

1244 skips NAs): 

1245 

1246 >>> pd.array([True, True, pd.NA]).all() 

1247 True 

1248 >>> pd.array([1, 1, pd.NA]).all() 

1249 True 

1250 >>> pd.array([True, False, pd.NA]).all() 

1251 False 

1252 >>> pd.array([], dtype="boolean").all() 

1253 True 

1254 >>> pd.array([pd.NA], dtype="boolean").all() 

1255 True 

1256 >>> pd.array([pd.NA], dtype="Float64").all() 

1257 True 

1258 

1259 With ``skipna=False``, the result can be NA if this is logically 

1260 required (whether ``pd.NA`` is True or False influences the result): 

1261 

1262 >>> pd.array([True, True, pd.NA]).all(skipna=False) 

1263 <NA> 

1264 >>> pd.array([1, 1, pd.NA]).all(skipna=False) 

1265 <NA> 

1266 >>> pd.array([True, False, pd.NA]).all(skipna=False) 

1267 False 

1268 >>> pd.array([1, 0, pd.NA]).all(skipna=False) 

1269 False 

1270 """ 

1271 kwargs.pop("axis", None) 

1272 nv.validate_all((), kwargs) 

1273 

1274 values = self._data.copy() 

1275 # error: Argument 3 to "putmask" has incompatible type "object"; 

1276 # expected "Union[_SupportsArray[dtype[Any]], 

1277 # _NestedSequence[_SupportsArray[dtype[Any]]], 

1278 # bool, int, float, complex, str, bytes, 

1279 # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]" 

1280 np.putmask(values, self._mask, self._truthy_value) # type: ignore[arg-type] 

1281 result = values.all() 

1282 

1283 if skipna: 

1284 return result 

1285 else: 

1286 if not result or len(self) == 0 or not self._mask.any(): 

1287 return result 

1288 else: 

1289 return self.dtype.na_value