Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/nanops.py: 14%

630 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import functools 

4import itertools 

5import operator 

6from typing import ( 

7 Any, 

8 Callable, 

9 cast, 

10) 

11import warnings 

12 

13import numpy as np 

14 

15from pandas._config import get_option 

16 

17from pandas._libs import ( 

18 NaT, 

19 NaTType, 

20 iNaT, 

21 lib, 

22) 

23from pandas._typing import ( 

24 ArrayLike, 

25 Dtype, 

26 DtypeObj, 

27 F, 

28 Scalar, 

29 Shape, 

30 npt, 

31) 

32from pandas.compat._optional import import_optional_dependency 

33 

34from pandas.core.dtypes.common import ( 

35 is_any_int_dtype, 

36 is_bool_dtype, 

37 is_complex, 

38 is_datetime64_any_dtype, 

39 is_float, 

40 is_float_dtype, 

41 is_integer, 

42 is_integer_dtype, 

43 is_numeric_dtype, 

44 is_object_dtype, 

45 is_scalar, 

46 is_timedelta64_dtype, 

47 needs_i8_conversion, 

48 pandas_dtype, 

49) 

50from pandas.core.dtypes.dtypes import PeriodDtype 

51from pandas.core.dtypes.missing import ( 

52 isna, 

53 na_value_for_dtype, 

54 notna, 

55) 

56 

57from pandas.core.construction import extract_array 

58 

59bn = import_optional_dependency("bottleneck", errors="warn") 

60_BOTTLENECK_INSTALLED = bn is not None 

61_USE_BOTTLENECK = False 

62 

63 

64def set_use_bottleneck(v: bool = True) -> None: 

65 # set/unset to use bottleneck 

66 global _USE_BOTTLENECK 

67 if _BOTTLENECK_INSTALLED: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true

68 _USE_BOTTLENECK = v 

69 

70 

71set_use_bottleneck(get_option("compute.use_bottleneck")) 

72 

73 

74class disallow: 

75 def __init__(self, *dtypes: Dtype) -> None: 

76 super().__init__() 

77 self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) 

78 

79 def check(self, obj) -> bool: 

80 return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) 

81 

82 def __call__(self, f: F) -> F: 

83 @functools.wraps(f) 

84 def _f(*args, **kwargs): 

85 obj_iter = itertools.chain(args, kwargs.values()) 

86 if any(self.check(obj) for obj in obj_iter): 

87 f_name = f.__name__.replace("nan", "") 

88 raise TypeError( 

89 f"reduction operation '{f_name}' not allowed for this dtype" 

90 ) 

91 try: 

92 with np.errstate(invalid="ignore"): 

93 return f(*args, **kwargs) 

94 except ValueError as e: 

95 # we want to transform an object array 

96 # ValueError message to the more typical TypeError 

97 # e.g. this is normally a disallowed function on 

98 # object arrays that contain strings 

99 if is_object_dtype(args[0]): 

100 raise TypeError(e) from e 

101 raise 

102 

103 return cast(F, _f) 

104 

105 

106class bottleneck_switch: 

107 def __init__(self, name=None, **kwargs) -> None: 

108 self.name = name 

109 self.kwargs = kwargs 

110 

111 def __call__(self, alt: F) -> F: 

112 bn_name = self.name or alt.__name__ 

113 

114 try: 

115 bn_func = getattr(bn, bn_name) 

116 except (AttributeError, NameError): # pragma: no cover 

117 bn_func = None 

118 

119 @functools.wraps(alt) 

120 def f( 

121 values: np.ndarray, 

122 *, 

123 axis: int | None = None, 

124 skipna: bool = True, 

125 **kwds, 

126 ): 

127 if len(self.kwargs) > 0: 

128 for k, v in self.kwargs.items(): 

129 if k not in kwds: 

130 kwds[k] = v 

131 

132 if values.size == 0 and kwds.get("min_count") is None: 

133 # We are empty, returning NA for our type 

134 # Only applies for the default `min_count` of None 

135 # since that affects how empty arrays are handled. 

136 # TODO(GH-18976) update all the nanops methods to 

137 # correctly handle empty inputs and remove this check. 

138 # It *may* just be `var` 

139 return _na_for_min_count(values, axis) 

140 

141 if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name): 

142 if kwds.get("mask", None) is None: 

143 # `mask` is not recognised by bottleneck, would raise 

144 # TypeError if called 

145 kwds.pop("mask", None) 

146 result = bn_func(values, axis=axis, **kwds) 

147 

148 # prefer to treat inf/-inf as NA, but must compute the func 

149 # twice :( 

150 if _has_infs(result): 

151 result = alt(values, axis=axis, skipna=skipna, **kwds) 

152 else: 

153 result = alt(values, axis=axis, skipna=skipna, **kwds) 

154 else: 

155 result = alt(values, axis=axis, skipna=skipna, **kwds) 

156 

157 return result 

158 

159 return cast(F, f) 

160 

161 

162def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool: 

163 # Bottleneck chokes on datetime64, PeriodDtype (or and EA) 

164 if not is_object_dtype(dtype) and not needs_i8_conversion(dtype): 

165 # GH 42878 

166 # Bottleneck uses naive summation leading to O(n) loss of precision 

167 # unlike numpy which implements pairwise summation, which has O(log(n)) loss 

168 # crossref: https://github.com/pydata/bottleneck/issues/379 

169 

170 # GH 15507 

171 # bottleneck does not properly upcast during the sum 

172 # so can overflow 

173 

174 # GH 9422 

175 # further we also want to preserve NaN when all elements 

176 # are NaN, unlike bottleneck/numpy which consider this 

177 # to be 0 

178 return name not in ["nansum", "nanprod", "nanmean"] 

179 return False 

180 

181 

182def _has_infs(result) -> bool: 

183 if isinstance(result, np.ndarray): 

184 if result.dtype == "f8" or result.dtype == "f4": 

185 # Note: outside of an nanops-specific test, we always have 

186 # result.ndim == 1, so there is no risk of this ravel making a copy. 

187 return lib.has_infs(result.ravel("K")) 

188 try: 

189 return np.isinf(result).any() 

190 except (TypeError, NotImplementedError): 

191 # if it doesn't support infs, then it can't have infs 

192 return False 

193 

194 

195def _get_fill_value( 

196 dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None 

197): 

198 """return the correct fill value for the dtype of the values""" 

199 if fill_value is not None: 

200 return fill_value 

201 if _na_ok_dtype(dtype): 

202 if fill_value_typ is None: 

203 return np.nan 

204 else: 

205 if fill_value_typ == "+inf": 

206 return np.inf 

207 else: 

208 return -np.inf 

209 else: 

210 if fill_value_typ == "+inf": 

211 # need the max int here 

212 return lib.i8max 

213 else: 

214 return iNaT 

215 

216 

217def _maybe_get_mask( 

218 values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None 

219) -> npt.NDArray[np.bool_] | None: 

220 """ 

221 Compute a mask if and only if necessary. 

222 

223 This function will compute a mask iff it is necessary. Otherwise, 

224 return the provided mask (potentially None) when a mask does not need to be 

225 computed. 

226 

227 A mask is never necessary if the values array is of boolean or integer 

228 dtypes, as these are incapable of storing NaNs. If passing a NaN-capable 

229 dtype that is interpretable as either boolean or integer data (eg, 

230 timedelta64), a mask must be provided. 

231 

232 If the skipna parameter is False, a new mask will not be computed. 

233 

234 The mask is computed using isna() by default. Setting invert=True selects 

235 notna() as the masking function. 

236 

237 Parameters 

238 ---------- 

239 values : ndarray 

240 input array to potentially compute mask for 

241 skipna : bool 

242 boolean for whether NaNs should be skipped 

243 mask : Optional[ndarray] 

244 nan-mask if known 

245 

246 Returns 

247 ------- 

248 Optional[np.ndarray[bool]] 

249 """ 

250 if mask is None: 

251 if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype): 

252 # Boolean data cannot contain nulls, so signal via mask being None 

253 return None 

254 

255 if skipna or needs_i8_conversion(values.dtype): 

256 mask = isna(values) 

257 

258 return mask 

259 

260 

261def _get_values( 

262 values: np.ndarray, 

263 skipna: bool, 

264 fill_value: Any = None, 

265 fill_value_typ: str | None = None, 

266 mask: npt.NDArray[np.bool_] | None = None, 

267) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]: 

268 """ 

269 Utility to get the values view, mask, dtype, dtype_max, and fill_value. 

270 

271 If both mask and fill_value/fill_value_typ are not None and skipna is True, 

272 the values array will be copied. 

273 

274 For input arrays of boolean or integer dtypes, copies will only occur if a 

275 precomputed mask, a fill_value/fill_value_typ, and skipna=True are 

276 provided. 

277 

278 Parameters 

279 ---------- 

280 values : ndarray 

281 input array to potentially compute mask for 

282 skipna : bool 

283 boolean for whether NaNs should be skipped 

284 fill_value : Any 

285 value to fill NaNs with 

286 fill_value_typ : str 

287 Set to '+inf' or '-inf' to handle dtype-specific infinities 

288 mask : Optional[np.ndarray[bool]] 

289 nan-mask if known 

290 

291 Returns 

292 ------- 

293 values : ndarray 

294 Potential copy of input value array 

295 mask : Optional[ndarray[bool]] 

296 Mask for values, if deemed necessary to compute 

297 dtype : np.dtype 

298 dtype for values 

299 dtype_max : np.dtype 

300 platform independent dtype 

301 fill_value : Any 

302 fill value used 

303 """ 

304 # In _get_values is only called from within nanops, and in all cases 

305 # with scalar fill_value. This guarantee is important for the 

306 # np.where call below 

307 assert is_scalar(fill_value) 

308 # error: Incompatible types in assignment (expression has type "Union[Any, 

309 # Union[ExtensionArray, ndarray]]", variable has type "ndarray") 

310 values = extract_array(values, extract_numpy=True) # type: ignore[assignment] 

311 

312 mask = _maybe_get_mask(values, skipna, mask) 

313 

314 dtype = values.dtype 

315 

316 datetimelike = False 

317 if needs_i8_conversion(values.dtype): 

318 # changing timedelta64/datetime64 to int64 needs to happen after 

319 # finding `mask` above 

320 values = np.asarray(values.view("i8")) 

321 datetimelike = True 

322 

323 dtype_ok = _na_ok_dtype(dtype) 

324 

325 # get our fill value (in case we need to provide an alternative 

326 # dtype for it) 

327 fill_value = _get_fill_value( 

328 dtype, fill_value=fill_value, fill_value_typ=fill_value_typ 

329 ) 

330 

331 if skipna and (mask is not None) and (fill_value is not None): 

332 if mask.any(): 

333 if dtype_ok or datetimelike: 

334 values = values.copy() 

335 np.putmask(values, mask, fill_value) 

336 else: 

337 # np.where will promote if needed 

338 values = np.where(~mask, values, fill_value) 

339 

340 # return a platform independent precision dtype 

341 dtype_max = dtype 

342 if is_integer_dtype(dtype) or is_bool_dtype(dtype): 

343 dtype_max = np.dtype(np.int64) 

344 elif is_float_dtype(dtype): 

345 dtype_max = np.dtype(np.float64) 

346 

347 return values, mask, dtype, dtype_max, fill_value 

348 

349 

350def _na_ok_dtype(dtype: DtypeObj) -> bool: 

351 if needs_i8_conversion(dtype): 

352 return False 

353 return not issubclass(dtype.type, np.integer) 

354 

355 

356def _wrap_results(result, dtype: np.dtype, fill_value=None): 

357 """wrap our results if needed""" 

358 if result is NaT: 

359 pass 

360 

361 elif is_datetime64_any_dtype(dtype): 

362 if fill_value is None: 

363 # GH#24293 

364 fill_value = iNaT 

365 if not isinstance(result, np.ndarray): 

366 assert not isna(fill_value), "Expected non-null fill_value" 

367 if result == fill_value: 

368 result = np.nan 

369 

370 if isna(result): 

371 result = np.datetime64("NaT", "ns") 

372 else: 

373 result = np.int64(result).view("datetime64[ns]") 

374 # retain original unit 

375 result = result.astype(dtype, copy=False) 

376 else: 

377 # If we have float dtype, taking a view will give the wrong result 

378 result = result.astype(dtype) 

379 elif is_timedelta64_dtype(dtype): 

380 if not isinstance(result, np.ndarray): 

381 if result == fill_value or np.isnan(result): 

382 result = np.timedelta64("NaT").astype(dtype) 

383 

384 elif np.fabs(result) > lib.i8max: 

385 # raise if we have a timedelta64[ns] which is too large 

386 raise ValueError("overflow in timedelta operation") 

387 else: 

388 # return a timedelta64 with the original unit 

389 result = np.int64(result).astype(dtype, copy=False) 

390 

391 else: 

392 result = result.astype("m8[ns]").view(dtype) 

393 

394 return result 

395 

396 

397def _datetimelike_compat(func: F) -> F: 

398 """ 

399 If we have datetime64 or timedelta64 values, ensure we have a correct 

400 mask before calling the wrapped function, then cast back afterwards. 

401 """ 

402 

403 @functools.wraps(func) 

404 def new_func( 

405 values: np.ndarray, 

406 *, 

407 axis: int | None = None, 

408 skipna: bool = True, 

409 mask: npt.NDArray[np.bool_] | None = None, 

410 **kwargs, 

411 ): 

412 orig_values = values 

413 

414 datetimelike = values.dtype.kind in ["m", "M"] 

415 if datetimelike and mask is None: 

416 mask = isna(values) 

417 

418 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs) 

419 

420 if datetimelike: 

421 result = _wrap_results(result, orig_values.dtype, fill_value=iNaT) 

422 if not skipna: 

423 assert mask is not None # checked above 

424 result = _mask_datetimelike_result(result, axis, mask, orig_values) 

425 

426 return result 

427 

428 return cast(F, new_func) 

429 

430 

431def _na_for_min_count(values: np.ndarray, axis: int | None) -> Scalar | np.ndarray: 

432 """ 

433 Return the missing value for `values`. 

434 

435 Parameters 

436 ---------- 

437 values : ndarray 

438 axis : int or None 

439 axis for the reduction, required if values.ndim > 1. 

440 

441 Returns 

442 ------- 

443 result : scalar or ndarray 

444 For 1-D values, returns a scalar of the correct missing type. 

445 For 2-D values, returns a 1-D array where each element is missing. 

446 """ 

447 # we either return np.nan or pd.NaT 

448 if is_numeric_dtype(values): 

449 values = values.astype("float64") 

450 fill_value = na_value_for_dtype(values.dtype) 

451 

452 if values.ndim == 1: 

453 return fill_value 

454 elif axis is None: 

455 return fill_value 

456 else: 

457 result_shape = values.shape[:axis] + values.shape[axis + 1 :] 

458 

459 return np.full(result_shape, fill_value, dtype=values.dtype) 

460 

461 

462def maybe_operate_rowwise(func: F) -> F: 

463 """ 

464 NumPy operations on C-contiguous ndarrays with axis=1 can be 

465 very slow if axis 1 >> axis 0. 

466 Operate row-by-row and concatenate the results. 

467 """ 

468 

469 @functools.wraps(func) 

470 def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs): 

471 if ( 

472 axis == 1 

473 and values.ndim == 2 

474 and values.flags["C_CONTIGUOUS"] 

475 # only takes this path for wide arrays (long dataframes), for threshold see 

476 # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737 

477 and (values.shape[1] / 1000) > values.shape[0] 

478 and values.dtype != object 

479 and values.dtype != bool 

480 ): 

481 arrs = list(values) 

482 if kwargs.get("mask") is not None: 

483 mask = kwargs.pop("mask") 

484 results = [ 

485 func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs)) 

486 ] 

487 else: 

488 results = [func(x, **kwargs) for x in arrs] 

489 return np.array(results) 

490 

491 return func(values, axis=axis, **kwargs) 

492 

493 return cast(F, newfunc) 

494 

495 

496def nanany( 

497 values: np.ndarray, 

498 *, 

499 axis: int | None = None, 

500 skipna: bool = True, 

501 mask: npt.NDArray[np.bool_] | None = None, 

502) -> bool: 

503 """ 

504 Check if any elements along an axis evaluate to True. 

505 

506 Parameters 

507 ---------- 

508 values : ndarray 

509 axis : int, optional 

510 skipna : bool, default True 

511 mask : ndarray[bool], optional 

512 nan-mask if known 

513 

514 Returns 

515 ------- 

516 result : bool 

517 

518 Examples 

519 -------- 

520 >>> import pandas.core.nanops as nanops 

521 >>> s = pd.Series([1, 2]) 

522 >>> nanops.nanany(s) 

523 True 

524 

525 >>> import pandas.core.nanops as nanops 

526 >>> s = pd.Series([np.nan]) 

527 >>> nanops.nanany(s) 

528 False 

529 """ 

530 values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask) 

531 

532 # For object type, any won't necessarily return 

533 # boolean values (numpy/numpy#4352) 

534 if is_object_dtype(values): 

535 values = values.astype(bool) 

536 

537 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected 

538 # "bool") 

539 return values.any(axis) # type: ignore[return-value] 

540 

541 

542def nanall( 

543 values: np.ndarray, 

544 *, 

545 axis: int | None = None, 

546 skipna: bool = True, 

547 mask: npt.NDArray[np.bool_] | None = None, 

548) -> bool: 

549 """ 

550 Check if all elements along an axis evaluate to True. 

551 

552 Parameters 

553 ---------- 

554 values : ndarray 

555 axis : int, optional 

556 skipna : bool, default True 

557 mask : ndarray[bool], optional 

558 nan-mask if known 

559 

560 Returns 

561 ------- 

562 result : bool 

563 

564 Examples 

565 -------- 

566 >>> import pandas.core.nanops as nanops 

567 >>> s = pd.Series([1, 2, np.nan]) 

568 >>> nanops.nanall(s) 

569 True 

570 

571 >>> import pandas.core.nanops as nanops 

572 >>> s = pd.Series([1, 0]) 

573 >>> nanops.nanall(s) 

574 False 

575 """ 

576 values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask) 

577 

578 # For object type, all won't necessarily return 

579 # boolean values (numpy/numpy#4352) 

580 if is_object_dtype(values): 

581 values = values.astype(bool) 

582 

583 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected 

584 # "bool") 

585 return values.all(axis) # type: ignore[return-value] 

586 

587 

588@disallow("M8") 

589@_datetimelike_compat 

590@maybe_operate_rowwise 

591def nansum( 

592 values: np.ndarray, 

593 *, 

594 axis: int | None = None, 

595 skipna: bool = True, 

596 min_count: int = 0, 

597 mask: npt.NDArray[np.bool_] | None = None, 

598) -> float: 

599 """ 

600 Sum the elements along an axis ignoring NaNs 

601 

602 Parameters 

603 ---------- 

604 values : ndarray[dtype] 

605 axis : int, optional 

606 skipna : bool, default True 

607 min_count: int, default 0 

608 mask : ndarray[bool], optional 

609 nan-mask if known 

610 

611 Returns 

612 ------- 

613 result : dtype 

614 

615 Examples 

616 -------- 

617 >>> import pandas.core.nanops as nanops 

618 >>> s = pd.Series([1, 2, np.nan]) 

619 >>> nanops.nansum(s) 

620 3.0 

621 """ 

622 values, mask, dtype, dtype_max, _ = _get_values( 

623 values, skipna, fill_value=0, mask=mask 

624 ) 

625 dtype_sum = dtype_max 

626 if is_float_dtype(dtype): 

627 dtype_sum = dtype 

628 elif is_timedelta64_dtype(dtype): 

629 dtype_sum = np.dtype(np.float64) 

630 

631 the_sum = values.sum(axis, dtype=dtype_sum) 

632 the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count) 

633 

634 return the_sum 

635 

636 

637def _mask_datetimelike_result( 

638 result: np.ndarray | np.datetime64 | np.timedelta64, 

639 axis: int | None, 

640 mask: npt.NDArray[np.bool_], 

641 orig_values: np.ndarray, 

642) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType: 

643 if isinstance(result, np.ndarray): 

644 # we need to apply the mask 

645 result = result.astype("i8").view(orig_values.dtype) 

646 axis_mask = mask.any(axis=axis) 

647 # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any], 

648 # datetime64, timedelta64]") 

649 result[axis_mask] = iNaT # type: ignore[index] 

650 else: 

651 if mask.any(): 

652 return np.int64(iNaT).view(orig_values.dtype) 

653 return result 

654 

655 

656@disallow(PeriodDtype) 

657@bottleneck_switch() 

658@_datetimelike_compat 

659def nanmean( 

660 values: np.ndarray, 

661 *, 

662 axis: int | None = None, 

663 skipna: bool = True, 

664 mask: npt.NDArray[np.bool_] | None = None, 

665) -> float: 

666 """ 

667 Compute the mean of the element along an axis ignoring NaNs 

668 

669 Parameters 

670 ---------- 

671 values : ndarray 

672 axis : int, optional 

673 skipna : bool, default True 

674 mask : ndarray[bool], optional 

675 nan-mask if known 

676 

677 Returns 

678 ------- 

679 float 

680 Unless input is a float array, in which case use the same 

681 precision as the input array. 

682 

683 Examples 

684 -------- 

685 >>> import pandas.core.nanops as nanops 

686 >>> s = pd.Series([1, 2, np.nan]) 

687 >>> nanops.nanmean(s) 

688 1.5 

689 """ 

690 values, mask, dtype, dtype_max, _ = _get_values( 

691 values, skipna, fill_value=0, mask=mask 

692 ) 

693 dtype_sum = dtype_max 

694 dtype_count = np.dtype(np.float64) 

695 

696 # not using needs_i8_conversion because that includes period 

697 if dtype.kind in ["m", "M"]: 

698 dtype_sum = np.dtype(np.float64) 

699 elif is_integer_dtype(dtype): 

700 dtype_sum = np.dtype(np.float64) 

701 elif is_float_dtype(dtype): 

702 dtype_sum = dtype 

703 dtype_count = dtype 

704 

705 count = _get_counts(values.shape, mask, axis, dtype=dtype_count) 

706 the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) 

707 

708 if axis is not None and getattr(the_sum, "ndim", False): 

709 count = cast(np.ndarray, count) 

710 with np.errstate(all="ignore"): 

711 # suppress division by zero warnings 

712 the_mean = the_sum / count 

713 ct_mask = count == 0 

714 if ct_mask.any(): 

715 the_mean[ct_mask] = np.nan 

716 else: 

717 the_mean = the_sum / count if count > 0 else np.nan 

718 

719 return the_mean 

720 

721 

722@bottleneck_switch() 

723def nanmedian(values, *, axis=None, skipna=True, mask=None): 

724 """ 

725 Parameters 

726 ---------- 

727 values : ndarray 

728 axis : int, optional 

729 skipna : bool, default True 

730 mask : ndarray[bool], optional 

731 nan-mask if known 

732 

733 Returns 

734 ------- 

735 result : float 

736 Unless input is a float array, in which case use the same 

737 precision as the input array. 

738 

739 Examples 

740 -------- 

741 >>> import pandas.core.nanops as nanops 

742 >>> s = pd.Series([1, np.nan, 2, 2]) 

743 >>> nanops.nanmedian(s) 

744 2.0 

745 """ 

746 

747 def get_median(x): 

748 mask = notna(x) 

749 if not skipna and not mask.all(): 

750 return np.nan 

751 with warnings.catch_warnings(): 

752 # Suppress RuntimeWarning about All-NaN slice 

753 warnings.filterwarnings("ignore", "All-NaN slice encountered") 

754 res = np.nanmedian(x[mask]) 

755 return res 

756 

757 values, mask, dtype, _, _ = _get_values(values, skipna, mask=mask) 

758 if not is_float_dtype(values.dtype): 

759 try: 

760 values = values.astype("f8") 

761 except ValueError as err: 

762 # e.g. "could not convert string to float: 'a'" 

763 raise TypeError(str(err)) from err 

764 if mask is not None: 

765 values[mask] = np.nan 

766 

767 notempty = values.size 

768 

769 # an array from a frame 

770 if values.ndim > 1 and axis is not None: 

771 

772 # there's a non-empty array to apply over otherwise numpy raises 

773 if notempty: 

774 if not skipna: 

775 res = np.apply_along_axis(get_median, axis, values) 

776 

777 else: 

778 # fastpath for the skipna case 

779 with warnings.catch_warnings(): 

780 # Suppress RuntimeWarning about All-NaN slice 

781 warnings.filterwarnings("ignore", "All-NaN slice encountered") 

782 res = np.nanmedian(values, axis) 

783 

784 else: 

785 # must return the correct shape, but median is not defined for the 

786 # empty set so return nans of shape "everything but the passed axis" 

787 # since "axis" is where the reduction would occur if we had a nonempty 

788 # array 

789 res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan) 

790 

791 else: 

792 # otherwise return a scalar value 

793 res = get_median(values) if notempty else np.nan 

794 return _wrap_results(res, dtype) 

795 

796 

797def get_empty_reduction_result( 

798 shape: tuple[int, ...], 

799 axis: int, 

800 dtype: np.dtype | type[np.floating], 

801 fill_value: Any, 

802) -> np.ndarray: 

803 """ 

804 The result from a reduction on an empty ndarray. 

805 

806 Parameters 

807 ---------- 

808 shape : Tuple[int] 

809 axis : int 

810 dtype : np.dtype 

811 fill_value : Any 

812 

813 Returns 

814 ------- 

815 np.ndarray 

816 """ 

817 shp = np.array(shape) 

818 dims = np.arange(len(shape)) 

819 ret = np.empty(shp[dims != axis], dtype=dtype) 

820 ret.fill(fill_value) 

821 return ret 

822 

823 

824def _get_counts_nanvar( 

825 values_shape: Shape, 

826 mask: npt.NDArray[np.bool_] | None, 

827 axis: int | None, 

828 ddof: int, 

829 dtype: np.dtype = np.dtype(np.float64), 

830) -> tuple[float | np.ndarray, float | np.ndarray]: 

831 """ 

832 Get the count of non-null values along an axis, accounting 

833 for degrees of freedom. 

834 

835 Parameters 

836 ---------- 

837 values_shape : Tuple[int, ...] 

838 shape tuple from values ndarray, used if mask is None 

839 mask : Optional[ndarray[bool]] 

840 locations in values that should be considered missing 

841 axis : Optional[int] 

842 axis to count along 

843 ddof : int 

844 degrees of freedom 

845 dtype : type, optional 

846 type to use for count 

847 

848 Returns 

849 ------- 

850 count : int, np.nan or np.ndarray 

851 d : int, np.nan or np.ndarray 

852 """ 

853 count = _get_counts(values_shape, mask, axis, dtype=dtype) 

854 d = count - dtype.type(ddof) 

855 

856 # always return NaN, never inf 

857 if is_scalar(count): 

858 if count <= ddof: 

859 count = np.nan 

860 d = np.nan 

861 else: 

862 # count is not narrowed by is_scalar check 

863 count = cast(np.ndarray, count) 

864 mask = count <= ddof 

865 if mask.any(): 

866 np.putmask(d, mask, np.nan) 

867 np.putmask(count, mask, np.nan) 

868 return count, d 

869 

870 

871@bottleneck_switch(ddof=1) 

872def nanstd(values, *, axis=None, skipna=True, ddof=1, mask=None): 

873 """ 

874 Compute the standard deviation along given axis while ignoring NaNs 

875 

876 Parameters 

877 ---------- 

878 values : ndarray 

879 axis : int, optional 

880 skipna : bool, default True 

881 ddof : int, default 1 

882 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

883 where N represents the number of elements. 

884 mask : ndarray[bool], optional 

885 nan-mask if known 

886 

887 Returns 

888 ------- 

889 result : float 

890 Unless input is a float array, in which case use the same 

891 precision as the input array. 

892 

893 Examples 

894 -------- 

895 >>> import pandas.core.nanops as nanops 

896 >>> s = pd.Series([1, np.nan, 2, 3]) 

897 >>> nanops.nanstd(s) 

898 1.0 

899 """ 

900 if values.dtype == "M8[ns]": 

901 values = values.view("m8[ns]") 

902 

903 orig_dtype = values.dtype 

904 values, mask, _, _, _ = _get_values(values, skipna, mask=mask) 

905 

906 result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)) 

907 return _wrap_results(result, orig_dtype) 

908 

909 

910@disallow("M8", "m8") 

911@bottleneck_switch(ddof=1) 

912def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None): 

913 """ 

914 Compute the variance along given axis while ignoring NaNs 

915 

916 Parameters 

917 ---------- 

918 values : ndarray 

919 axis : int, optional 

920 skipna : bool, default True 

921 ddof : int, default 1 

922 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

923 where N represents the number of elements. 

924 mask : ndarray[bool], optional 

925 nan-mask if known 

926 

927 Returns 

928 ------- 

929 result : float 

930 Unless input is a float array, in which case use the same 

931 precision as the input array. 

932 

933 Examples 

934 -------- 

935 >>> import pandas.core.nanops as nanops 

936 >>> s = pd.Series([1, np.nan, 2, 3]) 

937 >>> nanops.nanvar(s) 

938 1.0 

939 """ 

940 values = extract_array(values, extract_numpy=True) 

941 dtype = values.dtype 

942 mask = _maybe_get_mask(values, skipna, mask) 

943 if is_any_int_dtype(dtype): 

944 values = values.astype("f8") 

945 if mask is not None: 

946 values[mask] = np.nan 

947 

948 if is_float_dtype(values.dtype): 

949 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) 

950 else: 

951 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof) 

952 

953 if skipna and mask is not None: 

954 values = values.copy() 

955 np.putmask(values, mask, 0) 

956 

957 # xref GH10242 

958 # Compute variance via two-pass algorithm, which is stable against 

959 # cancellation errors and relatively accurate for small numbers of 

960 # observations. 

961 # 

962 # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 

963 avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count 

964 if axis is not None: 

965 avg = np.expand_dims(avg, axis) 

966 sqr = _ensure_numeric((avg - values) ** 2) 

967 if mask is not None: 

968 np.putmask(sqr, mask, 0) 

969 result = sqr.sum(axis=axis, dtype=np.float64) / d 

970 

971 # Return variance as np.float64 (the datatype used in the accumulator), 

972 # unless we were dealing with a float array, in which case use the same 

973 # precision as the original values array. 

974 if is_float_dtype(dtype): 

975 result = result.astype(dtype, copy=False) 

976 return result 

977 

978 

979@disallow("M8", "m8") 

980def nansem( 

981 values: np.ndarray, 

982 *, 

983 axis: int | None = None, 

984 skipna: bool = True, 

985 ddof: int = 1, 

986 mask: npt.NDArray[np.bool_] | None = None, 

987) -> float: 

988 """ 

989 Compute the standard error in the mean along given axis while ignoring NaNs 

990 

991 Parameters 

992 ---------- 

993 values : ndarray 

994 axis : int, optional 

995 skipna : bool, default True 

996 ddof : int, default 1 

997 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

998 where N represents the number of elements. 

999 mask : ndarray[bool], optional 

1000 nan-mask if known 

1001 

1002 Returns 

1003 ------- 

1004 result : float64 

1005 Unless input is a float array, in which case use the same 

1006 precision as the input array. 

1007 

1008 Examples 

1009 -------- 

1010 >>> import pandas.core.nanops as nanops 

1011 >>> s = pd.Series([1, np.nan, 2, 3]) 

1012 >>> nanops.nansem(s) 

1013 0.5773502691896258 

1014 """ 

1015 # This checks if non-numeric-like data is passed with numeric_only=False 

1016 # and raises a TypeError otherwise 

1017 nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask) 

1018 

1019 mask = _maybe_get_mask(values, skipna, mask) 

1020 if not is_float_dtype(values.dtype): 

1021 values = values.astype("f8") 

1022 

1023 count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) 

1024 var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof) 

1025 

1026 return np.sqrt(var) / np.sqrt(count) 

1027 

1028 

1029def _nanminmax(meth, fill_value_typ): 

1030 @bottleneck_switch(name="nan" + meth) 

1031 @_datetimelike_compat 

1032 def reduction( 

1033 values: np.ndarray, 

1034 *, 

1035 axis: int | None = None, 

1036 skipna: bool = True, 

1037 mask: npt.NDArray[np.bool_] | None = None, 

1038 ) -> Dtype: 

1039 

1040 values, mask, dtype, dtype_max, fill_value = _get_values( 

1041 values, skipna, fill_value_typ=fill_value_typ, mask=mask 

1042 ) 

1043 

1044 if (axis is not None and values.shape[axis] == 0) or values.size == 0: 

1045 try: 

1046 result = getattr(values, meth)(axis, dtype=dtype_max) 

1047 result.fill(np.nan) 

1048 except (AttributeError, TypeError, ValueError): 

1049 result = np.nan 

1050 else: 

1051 result = getattr(values, meth)(axis) 

1052 

1053 result = _maybe_null_out(result, axis, mask, values.shape) 

1054 return result 

1055 

1056 return reduction 

1057 

1058 

1059nanmin = _nanminmax("min", fill_value_typ="+inf") 

1060nanmax = _nanminmax("max", fill_value_typ="-inf") 

1061 

1062 

1063@disallow("O") 

1064def nanargmax( 

1065 values: np.ndarray, 

1066 *, 

1067 axis: int | None = None, 

1068 skipna: bool = True, 

1069 mask: npt.NDArray[np.bool_] | None = None, 

1070) -> int | np.ndarray: 

1071 """ 

1072 Parameters 

1073 ---------- 

1074 values : ndarray 

1075 axis : int, optional 

1076 skipna : bool, default True 

1077 mask : ndarray[bool], optional 

1078 nan-mask if known 

1079 

1080 Returns 

1081 ------- 

1082 result : int or ndarray[int] 

1083 The index/indices of max value in specified axis or -1 in the NA case 

1084 

1085 Examples 

1086 -------- 

1087 >>> import pandas.core.nanops as nanops 

1088 >>> arr = np.array([1, 2, 3, np.nan, 4]) 

1089 >>> nanops.nanargmax(arr) 

1090 4 

1091 

1092 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) 

1093 >>> arr[2:, 2] = np.nan 

1094 >>> arr 

1095 array([[ 0., 1., 2.], 

1096 [ 3., 4., 5.], 

1097 [ 6., 7., nan], 

1098 [ 9., 10., nan]]) 

1099 >>> nanops.nanargmax(arr, axis=1) 

1100 array([2, 2, 1, 1]) 

1101 """ 

1102 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="-inf", mask=mask) 

1103 # error: Need type annotation for 'result' 

1104 result = values.argmax(axis) # type: ignore[var-annotated] 

1105 result = _maybe_arg_null_out(result, axis, mask, skipna) 

1106 return result 

1107 

1108 

1109@disallow("O") 

1110def nanargmin( 

1111 values: np.ndarray, 

1112 *, 

1113 axis: int | None = None, 

1114 skipna: bool = True, 

1115 mask: npt.NDArray[np.bool_] | None = None, 

1116) -> int | np.ndarray: 

1117 """ 

1118 Parameters 

1119 ---------- 

1120 values : ndarray 

1121 axis : int, optional 

1122 skipna : bool, default True 

1123 mask : ndarray[bool], optional 

1124 nan-mask if known 

1125 

1126 Returns 

1127 ------- 

1128 result : int or ndarray[int] 

1129 The index/indices of min value in specified axis or -1 in the NA case 

1130 

1131 Examples 

1132 -------- 

1133 >>> import pandas.core.nanops as nanops 

1134 >>> arr = np.array([1, 2, 3, np.nan, 4]) 

1135 >>> nanops.nanargmin(arr) 

1136 0 

1137 

1138 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) 

1139 >>> arr[2:, 0] = np.nan 

1140 >>> arr 

1141 array([[ 0., 1., 2.], 

1142 [ 3., 4., 5.], 

1143 [nan, 7., 8.], 

1144 [nan, 10., 11.]]) 

1145 >>> nanops.nanargmin(arr, axis=1) 

1146 array([0, 0, 1, 1]) 

1147 """ 

1148 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="+inf", mask=mask) 

1149 # error: Need type annotation for 'result' 

1150 result = values.argmin(axis) # type: ignore[var-annotated] 

1151 result = _maybe_arg_null_out(result, axis, mask, skipna) 

1152 return result 

1153 

1154 

1155@disallow("M8", "m8") 

1156@maybe_operate_rowwise 

1157def nanskew( 

1158 values: np.ndarray, 

1159 *, 

1160 axis: int | None = None, 

1161 skipna: bool = True, 

1162 mask: npt.NDArray[np.bool_] | None = None, 

1163) -> float: 

1164 """ 

1165 Compute the sample skewness. 

1166 

1167 The statistic computed here is the adjusted Fisher-Pearson standardized 

1168 moment coefficient G1. The algorithm computes this coefficient directly 

1169 from the second and third central moment. 

1170 

1171 Parameters 

1172 ---------- 

1173 values : ndarray 

1174 axis : int, optional 

1175 skipna : bool, default True 

1176 mask : ndarray[bool], optional 

1177 nan-mask if known 

1178 

1179 Returns 

1180 ------- 

1181 result : float64 

1182 Unless input is a float array, in which case use the same 

1183 precision as the input array. 

1184 

1185 Examples 

1186 -------- 

1187 >>> import pandas.core.nanops as nanops 

1188 >>> s = pd.Series([1, np.nan, 1, 2]) 

1189 >>> nanops.nanskew(s) 

1190 1.7320508075688787 

1191 """ 

1192 # error: Incompatible types in assignment (expression has type "Union[Any, 

1193 # Union[ExtensionArray, ndarray]]", variable has type "ndarray") 

1194 values = extract_array(values, extract_numpy=True) # type: ignore[assignment] 

1195 mask = _maybe_get_mask(values, skipna, mask) 

1196 if not is_float_dtype(values.dtype): 

1197 values = values.astype("f8") 

1198 count = _get_counts(values.shape, mask, axis) 

1199 else: 

1200 count = _get_counts(values.shape, mask, axis, dtype=values.dtype) 

1201 

1202 if skipna and mask is not None: 

1203 values = values.copy() 

1204 np.putmask(values, mask, 0) 

1205 

1206 mean = values.sum(axis, dtype=np.float64) / count 

1207 if axis is not None: 

1208 mean = np.expand_dims(mean, axis) 

1209 

1210 adjusted = values - mean 

1211 if skipna and mask is not None: 

1212 np.putmask(adjusted, mask, 0) 

1213 adjusted2 = adjusted**2 

1214 adjusted3 = adjusted2 * adjusted 

1215 m2 = adjusted2.sum(axis, dtype=np.float64) 

1216 m3 = adjusted3.sum(axis, dtype=np.float64) 

1217 

1218 # floating point error 

1219 # 

1220 # #18044 in _libs/windows.pyx calc_skew follow this behavior 

1221 # to fix the fperr to treat m2 <1e-14 as zero 

1222 m2 = _zero_out_fperr(m2) 

1223 m3 = _zero_out_fperr(m3) 

1224 

1225 with np.errstate(invalid="ignore", divide="ignore"): 

1226 result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5) 

1227 

1228 dtype = values.dtype 

1229 if is_float_dtype(dtype): 

1230 result = result.astype(dtype, copy=False) 

1231 

1232 if isinstance(result, np.ndarray): 

1233 result = np.where(m2 == 0, 0, result) 

1234 result[count < 3] = np.nan 

1235 else: 

1236 result = 0 if m2 == 0 else result 

1237 if count < 3: 

1238 return np.nan 

1239 

1240 return result 

1241 

1242 

1243@disallow("M8", "m8") 

1244@maybe_operate_rowwise 

1245def nankurt( 

1246 values: np.ndarray, 

1247 *, 

1248 axis: int | None = None, 

1249 skipna: bool = True, 

1250 mask: npt.NDArray[np.bool_] | None = None, 

1251) -> float: 

1252 """ 

1253 Compute the sample excess kurtosis 

1254 

1255 The statistic computed here is the adjusted Fisher-Pearson standardized 

1256 moment coefficient G2, computed directly from the second and fourth 

1257 central moment. 

1258 

1259 Parameters 

1260 ---------- 

1261 values : ndarray 

1262 axis : int, optional 

1263 skipna : bool, default True 

1264 mask : ndarray[bool], optional 

1265 nan-mask if known 

1266 

1267 Returns 

1268 ------- 

1269 result : float64 

1270 Unless input is a float array, in which case use the same 

1271 precision as the input array. 

1272 

1273 Examples 

1274 -------- 

1275 >>> import pandas.core.nanops as nanops 

1276 >>> s = pd.Series([1, np.nan, 1, 3, 2]) 

1277 >>> nanops.nankurt(s) 

1278 -1.2892561983471076 

1279 """ 

1280 # error: Incompatible types in assignment (expression has type "Union[Any, 

1281 # Union[ExtensionArray, ndarray]]", variable has type "ndarray") 

1282 values = extract_array(values, extract_numpy=True) # type: ignore[assignment] 

1283 mask = _maybe_get_mask(values, skipna, mask) 

1284 if not is_float_dtype(values.dtype): 

1285 values = values.astype("f8") 

1286 count = _get_counts(values.shape, mask, axis) 

1287 else: 

1288 count = _get_counts(values.shape, mask, axis, dtype=values.dtype) 

1289 

1290 if skipna and mask is not None: 

1291 values = values.copy() 

1292 np.putmask(values, mask, 0) 

1293 

1294 mean = values.sum(axis, dtype=np.float64) / count 

1295 if axis is not None: 

1296 mean = np.expand_dims(mean, axis) 

1297 

1298 adjusted = values - mean 

1299 if skipna and mask is not None: 

1300 np.putmask(adjusted, mask, 0) 

1301 adjusted2 = adjusted**2 

1302 adjusted4 = adjusted2**2 

1303 m2 = adjusted2.sum(axis, dtype=np.float64) 

1304 m4 = adjusted4.sum(axis, dtype=np.float64) 

1305 

1306 with np.errstate(invalid="ignore", divide="ignore"): 

1307 adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) 

1308 numerator = count * (count + 1) * (count - 1) * m4 

1309 denominator = (count - 2) * (count - 3) * m2**2 

1310 

1311 # floating point error 

1312 # 

1313 # #18044 in _libs/windows.pyx calc_kurt follow this behavior 

1314 # to fix the fperr to treat denom <1e-14 as zero 

1315 numerator = _zero_out_fperr(numerator) 

1316 denominator = _zero_out_fperr(denominator) 

1317 

1318 if not isinstance(denominator, np.ndarray): 

1319 # if ``denom`` is a scalar, check these corner cases first before 

1320 # doing division 

1321 if count < 4: 

1322 return np.nan 

1323 if denominator == 0: 

1324 return 0 

1325 

1326 with np.errstate(invalid="ignore", divide="ignore"): 

1327 result = numerator / denominator - adj 

1328 

1329 dtype = values.dtype 

1330 if is_float_dtype(dtype): 

1331 result = result.astype(dtype, copy=False) 

1332 

1333 if isinstance(result, np.ndarray): 

1334 result = np.where(denominator == 0, 0, result) 

1335 result[count < 4] = np.nan 

1336 

1337 return result 

1338 

1339 

1340@disallow("M8", "m8") 

1341@maybe_operate_rowwise 

1342def nanprod( 

1343 values: np.ndarray, 

1344 *, 

1345 axis: int | None = None, 

1346 skipna: bool = True, 

1347 min_count: int = 0, 

1348 mask: npt.NDArray[np.bool_] | None = None, 

1349) -> float: 

1350 """ 

1351 Parameters 

1352 ---------- 

1353 values : ndarray[dtype] 

1354 axis : int, optional 

1355 skipna : bool, default True 

1356 min_count: int, default 0 

1357 mask : ndarray[bool], optional 

1358 nan-mask if known 

1359 

1360 Returns 

1361 ------- 

1362 Dtype 

1363 The product of all elements on a given axis. ( NaNs are treated as 1) 

1364 

1365 Examples 

1366 -------- 

1367 >>> import pandas.core.nanops as nanops 

1368 >>> s = pd.Series([1, 2, 3, np.nan]) 

1369 >>> nanops.nanprod(s) 

1370 6.0 

1371 """ 

1372 mask = _maybe_get_mask(values, skipna, mask) 

1373 

1374 if skipna and mask is not None: 

1375 values = values.copy() 

1376 values[mask] = 1 

1377 result = values.prod(axis) 

1378 # error: Incompatible return value type (got "Union[ndarray, float]", expected 

1379 # "float") 

1380 return _maybe_null_out( # type: ignore[return-value] 

1381 result, axis, mask, values.shape, min_count=min_count 

1382 ) 

1383 

1384 

1385def _maybe_arg_null_out( 

1386 result: np.ndarray, 

1387 axis: int | None, 

1388 mask: npt.NDArray[np.bool_] | None, 

1389 skipna: bool, 

1390) -> np.ndarray | int: 

1391 # helper function for nanargmin/nanargmax 

1392 if mask is None: 

1393 return result 

1394 

1395 if axis is None or not getattr(result, "ndim", False): 

1396 if skipna: 

1397 if mask.all(): 

1398 return -1 

1399 else: 

1400 if mask.any(): 

1401 return -1 

1402 else: 

1403 if skipna: 

1404 na_mask = mask.all(axis) 

1405 else: 

1406 na_mask = mask.any(axis) 

1407 if na_mask.any(): 

1408 result[na_mask] = -1 

1409 return result 

1410 

1411 

1412def _get_counts( 

1413 values_shape: Shape, 

1414 mask: npt.NDArray[np.bool_] | None, 

1415 axis: int | None, 

1416 dtype: np.dtype = np.dtype(np.float64), 

1417) -> float | np.ndarray: 

1418 """ 

1419 Get the count of non-null values along an axis 

1420 

1421 Parameters 

1422 ---------- 

1423 values_shape : tuple of int 

1424 shape tuple from values ndarray, used if mask is None 

1425 mask : Optional[ndarray[bool]] 

1426 locations in values that should be considered missing 

1427 axis : Optional[int] 

1428 axis to count along 

1429 dtype : type, optional 

1430 type to use for count 

1431 

1432 Returns 

1433 ------- 

1434 count : scalar or array 

1435 """ 

1436 if axis is None: 

1437 if mask is not None: 

1438 n = mask.size - mask.sum() 

1439 else: 

1440 n = np.prod(values_shape) 

1441 return dtype.type(n) 

1442 

1443 if mask is not None: 

1444 count = mask.shape[axis] - mask.sum(axis) 

1445 else: 

1446 count = values_shape[axis] 

1447 

1448 if is_scalar(count): 

1449 return dtype.type(count) 

1450 return count.astype(dtype, copy=False) 

1451 

1452 

1453def _maybe_null_out( 

1454 result: np.ndarray | float | NaTType, 

1455 axis: int | None, 

1456 mask: npt.NDArray[np.bool_] | None, 

1457 shape: tuple[int, ...], 

1458 min_count: int = 1, 

1459) -> np.ndarray | float | NaTType: 

1460 """ 

1461 Returns 

1462 ------- 

1463 Dtype 

1464 The product of all elements on a given axis. ( NaNs are treated as 1) 

1465 """ 

1466 if axis is not None and isinstance(result, np.ndarray): 

1467 if mask is not None: 

1468 null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 

1469 else: 

1470 # we have no nulls, kept mask=None in _maybe_get_mask 

1471 below_count = shape[axis] - min_count < 0 

1472 new_shape = shape[:axis] + shape[axis + 1 :] 

1473 null_mask = np.broadcast_to(below_count, new_shape) 

1474 

1475 if np.any(null_mask): 

1476 if is_numeric_dtype(result): 

1477 if np.iscomplexobj(result): 

1478 result = result.astype("c16") 

1479 elif not is_float_dtype(result): 

1480 result = result.astype("f8", copy=False) 

1481 result[null_mask] = np.nan 

1482 else: 

1483 # GH12941, use None to auto cast null 

1484 result[null_mask] = None 

1485 elif result is not NaT: 

1486 if check_below_min_count(shape, mask, min_count): 

1487 result = np.nan 

1488 

1489 return result 

1490 

1491 

1492def check_below_min_count( 

1493 shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int 

1494) -> bool: 

1495 """ 

1496 Check for the `min_count` keyword. Returns True if below `min_count` (when 

1497 missing value should be returned from the reduction). 

1498 

1499 Parameters 

1500 ---------- 

1501 shape : tuple 

1502 The shape of the values (`values.shape`). 

1503 mask : ndarray[bool] or None 

1504 Boolean numpy array (typically of same shape as `shape`) or None. 

1505 min_count : int 

1506 Keyword passed through from sum/prod call. 

1507 

1508 Returns 

1509 ------- 

1510 bool 

1511 """ 

1512 if min_count > 0: 

1513 if mask is None: 

1514 # no missing values, only check size 

1515 non_nulls = np.prod(shape) 

1516 else: 

1517 non_nulls = mask.size - mask.sum() 

1518 if non_nulls < min_count: 

1519 return True 

1520 return False 

1521 

1522 

1523def _zero_out_fperr(arg): 

1524 # #18044 reference this behavior to fix rolling skew/kurt issue 

1525 if isinstance(arg, np.ndarray): 

1526 with np.errstate(invalid="ignore"): 

1527 return np.where(np.abs(arg) < 1e-14, 0, arg) 

1528 else: 

1529 return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg 

1530 

1531 

1532@disallow("M8", "m8") 

1533def nancorr( 

1534 a: np.ndarray, b: np.ndarray, *, method="pearson", min_periods: int | None = None 

1535) -> float: 

1536 """ 

1537 a, b: ndarrays 

1538 """ 

1539 if len(a) != len(b): 

1540 raise AssertionError("Operands to nancorr must have same size") 

1541 

1542 if min_periods is None: 

1543 min_periods = 1 

1544 

1545 valid = notna(a) & notna(b) 

1546 if not valid.all(): 

1547 a = a[valid] 

1548 b = b[valid] 

1549 

1550 if len(a) < min_periods: 

1551 return np.nan 

1552 

1553 f = get_corr_func(method) 

1554 return f(a, b) 

1555 

1556 

1557def get_corr_func(method) -> Callable[[np.ndarray, np.ndarray], float]: 

1558 if method == "kendall": 

1559 from scipy.stats import kendalltau 

1560 

1561 def func(a, b): 

1562 return kendalltau(a, b)[0] 

1563 

1564 return func 

1565 elif method == "spearman": 

1566 from scipy.stats import spearmanr 

1567 

1568 def func(a, b): 

1569 return spearmanr(a, b)[0] 

1570 

1571 return func 

1572 elif method == "pearson": 

1573 

1574 def func(a, b): 

1575 return np.corrcoef(a, b)[0, 1] 

1576 

1577 return func 

1578 elif callable(method): 

1579 return method 

1580 

1581 raise ValueError( 

1582 f"Unknown method '{method}', expected one of " 

1583 "'kendall', 'spearman', 'pearson', or callable" 

1584 ) 

1585 

1586 

1587@disallow("M8", "m8") 

1588def nancov( 

1589 a: np.ndarray, 

1590 b: np.ndarray, 

1591 *, 

1592 min_periods: int | None = None, 

1593 ddof: int | None = 1, 

1594) -> float: 

1595 if len(a) != len(b): 

1596 raise AssertionError("Operands to nancov must have same size") 

1597 

1598 if min_periods is None: 

1599 min_periods = 1 

1600 

1601 valid = notna(a) & notna(b) 

1602 if not valid.all(): 

1603 a = a[valid] 

1604 b = b[valid] 

1605 

1606 if len(a) < min_periods: 

1607 return np.nan 

1608 

1609 return np.cov(a, b, ddof=ddof)[0, 1] 

1610 

1611 

1612def _ensure_numeric(x): 

1613 if isinstance(x, np.ndarray): 

1614 if is_integer_dtype(x) or is_bool_dtype(x): 

1615 x = x.astype(np.float64) 

1616 elif is_object_dtype(x): 

1617 try: 

1618 x = x.astype(np.complex128) 

1619 except (TypeError, ValueError): 

1620 try: 

1621 x = x.astype(np.float64) 

1622 except ValueError as err: 

1623 # GH#29941 we get here with object arrays containing strs 

1624 raise TypeError(f"Could not convert {x} to numeric") from err 

1625 else: 

1626 if not np.any(np.imag(x)): 

1627 x = x.real 

1628 elif not (is_float(x) or is_integer(x) or is_complex(x)): 

1629 try: 

1630 x = float(x) 

1631 except (TypeError, ValueError): 

1632 # e.g. "1+1j" or "foo" 

1633 try: 

1634 x = complex(x) 

1635 except ValueError as err: 

1636 # e.g. "foo" 

1637 raise TypeError(f"Could not convert {x} to numeric") from err 

1638 return x 

1639 

1640 

1641# NA-friendly array comparisons 

1642 

1643 

1644def make_nancomp(op): 

1645 def f(x, y): 

1646 xmask = isna(x) 

1647 ymask = isna(y) 

1648 mask = xmask | ymask 

1649 

1650 with np.errstate(all="ignore"): 

1651 result = op(x, y) 

1652 

1653 if mask.any(): 

1654 if is_bool_dtype(result): 

1655 result = result.astype("O") 

1656 np.putmask(result, mask, np.nan) 

1657 

1658 return result 

1659 

1660 return f 

1661 

1662 

1663nangt = make_nancomp(operator.gt) 

1664nange = make_nancomp(operator.ge) 

1665nanlt = make_nancomp(operator.lt) 

1666nanle = make_nancomp(operator.le) 

1667naneq = make_nancomp(operator.eq) 

1668nanne = make_nancomp(operator.ne) 

1669 

1670 

1671def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: 

1672 """ 

1673 Cumulative function with skipna support. 

1674 

1675 Parameters 

1676 ---------- 

1677 values : np.ndarray or ExtensionArray 

1678 accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate} 

1679 skipna : bool 

1680 

1681 Returns 

1682 ------- 

1683 np.ndarray or ExtensionArray 

1684 """ 

1685 mask_a, mask_b = { 

1686 np.cumprod: (1.0, np.nan), 

1687 np.maximum.accumulate: (-np.inf, np.nan), 

1688 np.cumsum: (0.0, np.nan), 

1689 np.minimum.accumulate: (np.inf, np.nan), 

1690 }[accum_func] 

1691 

1692 # We will be applying this function to block values 

1693 if values.dtype.kind in ["m", "M"]: 

1694 # GH#30460, GH#29058 

1695 # numpy 1.18 started sorting NaTs at the end instead of beginning, 

1696 # so we need to work around to maintain backwards-consistency. 

1697 orig_dtype = values.dtype 

1698 

1699 # We need to define mask before masking NaTs 

1700 mask = isna(values) 

1701 

1702 y = values.view("i8") 

1703 # Note: the accum_func comparison fails as an "is" comparison 

1704 changed = accum_func == np.minimum.accumulate 

1705 

1706 try: 

1707 if changed: 

1708 y[mask] = lib.i8max 

1709 

1710 result = accum_func(y, axis=0) 

1711 finally: 

1712 if changed: 

1713 # restore NaT elements 

1714 y[mask] = iNaT 

1715 

1716 if skipna: 

1717 result[mask] = iNaT 

1718 elif accum_func == np.minimum.accumulate: 

1719 # Restore NaTs that we masked previously 

1720 nz = (~np.asarray(mask)).nonzero()[0] 

1721 if len(nz): 

1722 # everything up to the first non-na entry stays NaT 

1723 result[: nz[0]] = iNaT 

1724 

1725 if isinstance(values.dtype, np.dtype): 

1726 result = result.view(orig_dtype) 

1727 else: 

1728 # DatetimeArray/TimedeltaArray 

1729 # TODO: have this case go through a DTA method? 

1730 # For DatetimeTZDtype, view result as M8[ns] 

1731 npdtype = orig_dtype if isinstance(orig_dtype, np.dtype) else "M8[ns]" 

1732 # Item "type" of "Union[Type[ExtensionArray], Type[ndarray[Any, Any]]]" 

1733 # has no attribute "_simple_new" 

1734 result = type(values)._simple_new( # type: ignore[union-attr] 

1735 result.view(npdtype), dtype=orig_dtype 

1736 ) 

1737 

1738 elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): 

1739 vals = values.copy() 

1740 mask = isna(vals) 

1741 vals[mask] = mask_a 

1742 result = accum_func(vals, axis=0) 

1743 result[mask] = mask_b 

1744 else: 

1745 result = accum_func(values, axis=0) 

1746 

1747 return result