Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/nanops.py: 14%

1from __future__ import annotations

3import functools

4import itertools

5import operator

6from typing import (

7 Any,

8 Callable,

9 cast,

10)

11import warnings

13import numpy as np

15from pandas._config import get_option

17from pandas._libs import (

18 NaT,

19 NaTType,

20 iNaT,

21 lib,

22)

23from pandas._typing import (

24 ArrayLike,

25 Dtype,

26 DtypeObj,

27 F,

28 Scalar,

29 Shape,

30 npt,

31)

32from pandas.compat._optional import import_optional_dependency

34from pandas.core.dtypes.common import (

35 is_any_int_dtype,

36 is_bool_dtype,

37 is_complex,

38 is_datetime64_any_dtype,

39 is_float,

40 is_float_dtype,

41 is_integer,

42 is_integer_dtype,

43 is_numeric_dtype,

44 is_object_dtype,

45 is_scalar,

46 is_timedelta64_dtype,

47 needs_i8_conversion,

48 pandas_dtype,

49)

50from pandas.core.dtypes.dtypes import PeriodDtype

51from pandas.core.dtypes.missing import (

52 isna,

53 na_value_for_dtype,

54 notna,

55)

57from pandas.core.construction import extract_array

59bn = import_optional_dependency("bottleneck", errors="warn")

60_BOTTLENECK_INSTALLED = bn is not None

61_USE_BOTTLENECK = False

64def set_use_bottleneck(v: bool = True) -> None:

65 # set/unset to use bottleneck

66 global _USE_BOTTLENECK

67 if _BOTTLENECK_INSTALLED: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true

68 _USE_BOTTLENECK = v

71set_use_bottleneck(get_option("compute.use_bottleneck"))

74class disallow:

75 def __init__(self, *dtypes: Dtype) -> None:

76 super().__init__()

77 self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)

79 def check(self, obj) -> bool:

80 return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes)

82 def __call__(self, f: F) -> F:

83 @functools.wraps(f)

84 def _f(*args, **kwargs):

85 obj_iter = itertools.chain(args, kwargs.values())

86 if any(self.check(obj) for obj in obj_iter):

87 f_name = f.__name__.replace("nan", "")

88 raise TypeError(

89 f"reduction operation '{f_name}' not allowed for this dtype"

90 )

91 try:

92 with np.errstate(invalid="ignore"):

93 return f(*args, **kwargs)

94 except ValueError as e:

95 # we want to transform an object array

96 # ValueError message to the more typical TypeError

97 # e.g. this is normally a disallowed function on

98 # object arrays that contain strings

99 if is_object_dtype(args[0]):

100 raise TypeError(e) from e

101 raise

102

103 return cast(F, _f)

104

105

106class bottleneck_switch:

107 def __init__(self, name=None, **kwargs) -> None:

108 self.name = name

109 self.kwargs = kwargs

110

111 def __call__(self, alt: F) -> F:

112 bn_name = self.name or alt.__name__

113

114 try:

115 bn_func = getattr(bn, bn_name)

116 except (AttributeError, NameError): # pragma: no cover

117 bn_func = None

118

119 @functools.wraps(alt)

120 def f(

121 values: np.ndarray,

122 *,

123 axis: int | None = None,

124 skipna: bool = True,

125 **kwds,

126 ):

127 if len(self.kwargs) > 0:

128 for k, v in self.kwargs.items():

129 if k not in kwds:

130 kwds[k] = v

131

132 if values.size == 0 and kwds.get("min_count") is None:

133 # We are empty, returning NA for our type

134 # Only applies for the default `min_count` of None

135 # since that affects how empty arrays are handled.

136 # TODO(GH-18976) update all the nanops methods to

137 # correctly handle empty inputs and remove this check.

138 # It *may* just be `var`

139 return _na_for_min_count(values, axis)

140

141 if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):

142 if kwds.get("mask", None) is None:

143 # `mask` is not recognised by bottleneck, would raise

144 # TypeError if called

145 kwds.pop("mask", None)

146 result = bn_func(values, axis=axis, **kwds)

147

148 # prefer to treat inf/-inf as NA, but must compute the func

149 # twice :(

150 if _has_infs(result):

151 result = alt(values, axis=axis, skipna=skipna, **kwds)

152 else:

153 result = alt(values, axis=axis, skipna=skipna, **kwds)

154 else:

155 result = alt(values, axis=axis, skipna=skipna, **kwds)

156

157 return result

158

159 return cast(F, f)

160

161

162def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool:

163 # Bottleneck chokes on datetime64, PeriodDtype (or and EA)

164 if not is_object_dtype(dtype) and not needs_i8_conversion(dtype):

165 # GH 42878

166 # Bottleneck uses naive summation leading to O(n) loss of precision

167 # unlike numpy which implements pairwise summation, which has O(log(n)) loss

168 # crossref: https://github.com/pydata/bottleneck/issues/379

169

170 # GH 15507

171 # bottleneck does not properly upcast during the sum

172 # so can overflow

173

174 # GH 9422

175 # further we also want to preserve NaN when all elements

176 # are NaN, unlike bottleneck/numpy which consider this

177 # to be 0

178 return name not in ["nansum", "nanprod", "nanmean"]

179 return False

180

181

182def _has_infs(result) -> bool:

183 if isinstance(result, np.ndarray):

184 if result.dtype == "f8" or result.dtype == "f4":

185 # Note: outside of an nanops-specific test, we always have

186 # result.ndim == 1, so there is no risk of this ravel making a copy.

187 return lib.has_infs(result.ravel("K"))

188 try:

189 return np.isinf(result).any()

190 except (TypeError, NotImplementedError):

191 # if it doesn't support infs, then it can't have infs

192 return False

193

194

195def _get_fill_value(

196 dtype: DtypeObj, fill_value: Scalar | None = None, fill_value_typ=None

197):

198 """return the correct fill value for the dtype of the values"""

199 if fill_value is not None:

200 return fill_value

201 if _na_ok_dtype(dtype):

202 if fill_value_typ is None:

203 return np.nan

204 else:

205 if fill_value_typ == "+inf":

206 return np.inf

207 else:

208 return -np.inf

209 else:

210 if fill_value_typ == "+inf":

211 # need the max int here

212 return lib.i8max

213 else:

214 return iNaT

215

216

217def _maybe_get_mask(

218 values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None

219) -> npt.NDArray[np.bool_] | None:

220 """

221 Compute a mask if and only if necessary.

222

223 This function will compute a mask iff it is necessary. Otherwise,

224 return the provided mask (potentially None) when a mask does not need to be

225 computed.

226

227 A mask is never necessary if the values array is of boolean or integer

228 dtypes, as these are incapable of storing NaNs. If passing a NaN-capable

229 dtype that is interpretable as either boolean or integer data (eg,

230 timedelta64), a mask must be provided.

231

232 If the skipna parameter is False, a new mask will not be computed.

233

234 The mask is computed using isna() by default. Setting invert=True selects

235 notna() as the masking function.

236

237 Parameters

238 ----------

239 values : ndarray

240 input array to potentially compute mask for

241 skipna : bool

242 boolean for whether NaNs should be skipped

243 mask : Optional[ndarray]

244 nan-mask if known

245

246 Returns

247 -------

248 Optional[np.ndarray[bool]]

249 """

250 if mask is None:

251 if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype):

252 # Boolean data cannot contain nulls, so signal via mask being None

253 return None

254

255 if skipna or needs_i8_conversion(values.dtype):

256 mask = isna(values)

257

258 return mask

259

260

261def _get_values(

262 values: np.ndarray,

263 skipna: bool,

264 fill_value: Any = None,

265 fill_value_typ: str | None = None,

266 mask: npt.NDArray[np.bool_] | None = None,

267) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]:

268 """

269 Utility to get the values view, mask, dtype, dtype_max, and fill_value.

270

271 If both mask and fill_value/fill_value_typ are not None and skipna is True,

272 the values array will be copied.

273

274 For input arrays of boolean or integer dtypes, copies will only occur if a

275 precomputed mask, a fill_value/fill_value_typ, and skipna=True are

276 provided.

277

278 Parameters

279 ----------

280 values : ndarray

281 input array to potentially compute mask for

282 skipna : bool

283 boolean for whether NaNs should be skipped

284 fill_value : Any

285 value to fill NaNs with

286 fill_value_typ : str

287 Set to '+inf' or '-inf' to handle dtype-specific infinities

288 mask : Optional[np.ndarray[bool]]

289 nan-mask if known

290

291 Returns

292 -------

293 values : ndarray

294 Potential copy of input value array

295 mask : Optional[ndarray[bool]]

296 Mask for values, if deemed necessary to compute

297 dtype : np.dtype

298 dtype for values

299 dtype_max : np.dtype

300 platform independent dtype

301 fill_value : Any

302 fill value used

303 """

304 # In _get_values is only called from within nanops, and in all cases

305 # with scalar fill_value. This guarantee is important for the

306 # np.where call below

307 assert is_scalar(fill_value)

308 # error: Incompatible types in assignment (expression has type "Union[Any,

309 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")

310 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]

311

312 mask = _maybe_get_mask(values, skipna, mask)

313

314 dtype = values.dtype

315

316 datetimelike = False

317 if needs_i8_conversion(values.dtype):

318 # changing timedelta64/datetime64 to int64 needs to happen after

319 # finding `mask` above

320 values = np.asarray(values.view("i8"))

321 datetimelike = True

322

323 dtype_ok = _na_ok_dtype(dtype)

324

325 # get our fill value (in case we need to provide an alternative

326 # dtype for it)

327 fill_value = _get_fill_value(

328 dtype, fill_value=fill_value, fill_value_typ=fill_value_typ

329 )

330

331 if skipna and (mask is not None) and (fill_value is not None):

332 if mask.any():

333 if dtype_ok or datetimelike:

334 values = values.copy()

335 np.putmask(values, mask, fill_value)

336 else:

337 # np.where will promote if needed

338 values = np.where(~mask, values, fill_value)

339

340 # return a platform independent precision dtype

341 dtype_max = dtype

342 if is_integer_dtype(dtype) or is_bool_dtype(dtype):

343 dtype_max = np.dtype(np.int64)

344 elif is_float_dtype(dtype):

345 dtype_max = np.dtype(np.float64)

346

347 return values, mask, dtype, dtype_max, fill_value

348

349

350def _na_ok_dtype(dtype: DtypeObj) -> bool:

351 if needs_i8_conversion(dtype):

352 return False

353 return not issubclass(dtype.type, np.integer)

354

355

356def _wrap_results(result, dtype: np.dtype, fill_value=None):

357 """wrap our results if needed"""

358 if result is NaT:

359 pass

360

361 elif is_datetime64_any_dtype(dtype):

362 if fill_value is None:

363 # GH#24293

364 fill_value = iNaT

365 if not isinstance(result, np.ndarray):

366 assert not isna(fill_value), "Expected non-null fill_value"

367 if result == fill_value:

368 result = np.nan

369

370 if isna(result):

371 result = np.datetime64("NaT", "ns")

372 else:

373 result = np.int64(result).view("datetime64[ns]")

374 # retain original unit

375 result = result.astype(dtype, copy=False)

376 else:

377 # If we have float dtype, taking a view will give the wrong result

378 result = result.astype(dtype)

379 elif is_timedelta64_dtype(dtype):

380 if not isinstance(result, np.ndarray):

381 if result == fill_value or np.isnan(result):

382 result = np.timedelta64("NaT").astype(dtype)

383

384 elif np.fabs(result) > lib.i8max:

385 # raise if we have a timedelta64[ns] which is too large

386 raise ValueError("overflow in timedelta operation")

387 else:

388 # return a timedelta64 with the original unit

389 result = np.int64(result).astype(dtype, copy=False)

390

391 else:

392 result = result.astype("m8[ns]").view(dtype)

393

394 return result

395

396

397def _datetimelike_compat(func: F) -> F:

398 """

399 If we have datetime64 or timedelta64 values, ensure we have a correct

400 mask before calling the wrapped function, then cast back afterwards.

401 """

402

403 @functools.wraps(func)

404 def new_func(

405 values: np.ndarray,

406 *,

407 axis: int | None = None,

408 skipna: bool = True,

409 mask: npt.NDArray[np.bool_] | None = None,

410 **kwargs,

411 ):

412 orig_values = values

413

414 datetimelike = values.dtype.kind in ["m", "M"]

415 if datetimelike and mask is None:

416 mask = isna(values)

417

418 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)

419

420 if datetimelike:

421 result = _wrap_results(result, orig_values.dtype, fill_value=iNaT)

422 if not skipna:

423 assert mask is not None # checked above

424 result = _mask_datetimelike_result(result, axis, mask, orig_values)

425

426 return result

427

428 return cast(F, new_func)

429

430

431def _na_for_min_count(values: np.ndarray, axis: int | None) -> Scalar | np.ndarray:

432 """

433 Return the missing value for `values`.

434

435 Parameters

436 ----------

437 values : ndarray

438 axis : int or None

439 axis for the reduction, required if values.ndim > 1.

440

441 Returns

442 -------

443 result : scalar or ndarray

444 For 1-D values, returns a scalar of the correct missing type.

445 For 2-D values, returns a 1-D array where each element is missing.

446 """

447 # we either return np.nan or pd.NaT

448 if is_numeric_dtype(values):

449 values = values.astype("float64")

450 fill_value = na_value_for_dtype(values.dtype)

451

452 if values.ndim == 1:

453 return fill_value

454 elif axis is None:

455 return fill_value

456 else:

457 result_shape = values.shape[:axis] + values.shape[axis + 1 :]

458

459 return np.full(result_shape, fill_value, dtype=values.dtype)

460

461

462def maybe_operate_rowwise(func: F) -> F:

463 """

464 NumPy operations on C-contiguous ndarrays with axis=1 can be

465 very slow if axis 1 >> axis 0.

466 Operate row-by-row and concatenate the results.

467 """

468

469 @functools.wraps(func)

470 def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs):

471 if (

472 axis == 1

473 and values.ndim == 2

474 and values.flags["C_CONTIGUOUS"]

475 # only takes this path for wide arrays (long dataframes), for threshold see

476 # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737

477 and (values.shape[1] / 1000) > values.shape[0]

478 and values.dtype != object

479 and values.dtype != bool

480 ):

481 arrs = list(values)

482 if kwargs.get("mask") is not None:

483 mask = kwargs.pop("mask")

484 results = [

485 func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs))

486 ]

487 else:

488 results = [func(x, **kwargs) for x in arrs]

489 return np.array(results)

490

491 return func(values, axis=axis, **kwargs)

492

493 return cast(F, newfunc)

494

495

496def nanany(

497 values: np.ndarray,

498 *,

499 axis: int | None = None,

500 skipna: bool = True,

501 mask: npt.NDArray[np.bool_] | None = None,

502) -> bool:

503 """

504 Check if any elements along an axis evaluate to True.

505

506 Parameters

507 ----------

508 values : ndarray

509 axis : int, optional

510 skipna : bool, default True

511 mask : ndarray[bool], optional

512 nan-mask if known

513

514 Returns

515 -------

516 result : bool

517

518 Examples

519 --------

520 >>> import pandas.core.nanops as nanops

521 >>> s = pd.Series([1, 2])

522 >>> nanops.nanany(s)

523 True

524

525 >>> import pandas.core.nanops as nanops

526 >>> s = pd.Series([np.nan])

527 >>> nanops.nanany(s)

528 False

529 """

530 values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask)

531

532 # For object type, any won't necessarily return

533 # boolean values (numpy/numpy#4352)

534 if is_object_dtype(values):

535 values = values.astype(bool)

536

537 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected

538 # "bool")

539 return values.any(axis) # type: ignore[return-value]

540

541

542def nanall(

543 values: np.ndarray,

544 *,

545 axis: int | None = None,

546 skipna: bool = True,

547 mask: npt.NDArray[np.bool_] | None = None,

548) -> bool:

549 """

550 Check if all elements along an axis evaluate to True.

551

552 Parameters

553 ----------

554 values : ndarray

555 axis : int, optional

556 skipna : bool, default True

557 mask : ndarray[bool], optional

558 nan-mask if known

559

560 Returns

561 -------

562 result : bool

563

564 Examples

565 --------

566 >>> import pandas.core.nanops as nanops

567 >>> s = pd.Series([1, 2, np.nan])

568 >>> nanops.nanall(s)

569 True

570

571 >>> import pandas.core.nanops as nanops

572 >>> s = pd.Series([1, 0])

573 >>> nanops.nanall(s)

574 False

575 """

576 values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask)

577

578 # For object type, all won't necessarily return

579 # boolean values (numpy/numpy#4352)

580 if is_object_dtype(values):

581 values = values.astype(bool)

582

583 # error: Incompatible return value type (got "Union[bool_, ndarray]", expected

584 # "bool")

585 return values.all(axis) # type: ignore[return-value]

586

587

588@disallow("M8")

589@_datetimelike_compat

590@maybe_operate_rowwise

591def nansum(

592 values: np.ndarray,

593 *,

594 axis: int | None = None,

595 skipna: bool = True,

596 min_count: int = 0,

597 mask: npt.NDArray[np.bool_] | None = None,

598) -> float:

599 """

600 Sum the elements along an axis ignoring NaNs

601

602 Parameters

603 ----------

604 values : ndarray[dtype]

605 axis : int, optional

606 skipna : bool, default True

607 min_count: int, default 0

608 mask : ndarray[bool], optional

609 nan-mask if known

610

611 Returns

612 -------

613 result : dtype

614

615 Examples

616 --------

617 >>> import pandas.core.nanops as nanops

618 >>> s = pd.Series([1, 2, np.nan])

619 >>> nanops.nansum(s)

620 3.0

621 """

622 values, mask, dtype, dtype_max, _ = _get_values(

623 values, skipna, fill_value=0, mask=mask

624 )

625 dtype_sum = dtype_max

626 if is_float_dtype(dtype):

627 dtype_sum = dtype

628 elif is_timedelta64_dtype(dtype):

629 dtype_sum = np.dtype(np.float64)

630

631 the_sum = values.sum(axis, dtype=dtype_sum)

632 the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)

633

634 return the_sum

635

636

637def _mask_datetimelike_result(

638 result: np.ndarray | np.datetime64 | np.timedelta64,

639 axis: int | None,

640 mask: npt.NDArray[np.bool_],

641 orig_values: np.ndarray,

642) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType:

643 if isinstance(result, np.ndarray):

644 # we need to apply the mask

645 result = result.astype("i8").view(orig_values.dtype)

646 axis_mask = mask.any(axis=axis)

647 # error: Unsupported target for indexed assignment ("Union[ndarray[Any, Any],

648 # datetime64, timedelta64]")

649 result[axis_mask] = iNaT # type: ignore[index]

650 else:

651 if mask.any():

652 return np.int64(iNaT).view(orig_values.dtype)

653 return result

654

655

656@disallow(PeriodDtype)

657@bottleneck_switch()

658@_datetimelike_compat

659def nanmean(

660 values: np.ndarray,

661 *,

662 axis: int | None = None,

663 skipna: bool = True,

664 mask: npt.NDArray[np.bool_] | None = None,

665) -> float:

666 """

667 Compute the mean of the element along an axis ignoring NaNs

668

669 Parameters

670 ----------

671 values : ndarray

672 axis : int, optional

673 skipna : bool, default True

674 mask : ndarray[bool], optional

675 nan-mask if known

676

677 Returns

678 -------

679 float

680 Unless input is a float array, in which case use the same

681 precision as the input array.

682

683 Examples

684 --------

685 >>> import pandas.core.nanops as nanops

686 >>> s = pd.Series([1, 2, np.nan])

687 >>> nanops.nanmean(s)

688 1.5

689 """

690 values, mask, dtype, dtype_max, _ = _get_values(

691 values, skipna, fill_value=0, mask=mask

692 )

693 dtype_sum = dtype_max

694 dtype_count = np.dtype(np.float64)

695

696 # not using needs_i8_conversion because that includes period

697 if dtype.kind in ["m", "M"]:

698 dtype_sum = np.dtype(np.float64)

699 elif is_integer_dtype(dtype):

700 dtype_sum = np.dtype(np.float64)

701 elif is_float_dtype(dtype):

702 dtype_sum = dtype

703 dtype_count = dtype

704

705 count = _get_counts(values.shape, mask, axis, dtype=dtype_count)

706 the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))

707

708 if axis is not None and getattr(the_sum, "ndim", False):

709 count = cast(np.ndarray, count)

710 with np.errstate(all="ignore"):

711 # suppress division by zero warnings

712 the_mean = the_sum / count

713 ct_mask = count == 0

714 if ct_mask.any():

715 the_mean[ct_mask] = np.nan

716 else:

717 the_mean = the_sum / count if count > 0 else np.nan

718

719 return the_mean

720

721

722@bottleneck_switch()

723def nanmedian(values, *, axis=None, skipna=True, mask=None):

724 """

725 Parameters

726 ----------

727 values : ndarray

728 axis : int, optional

729 skipna : bool, default True

730 mask : ndarray[bool], optional

731 nan-mask if known

732

733 Returns

734 -------

735 result : float

736 Unless input is a float array, in which case use the same

737 precision as the input array.

738

739 Examples

740 --------

741 >>> import pandas.core.nanops as nanops

742 >>> s = pd.Series([1, np.nan, 2, 2])

743 >>> nanops.nanmedian(s)

744 2.0

745 """

746

747 def get_median(x):

748 mask = notna(x)

749 if not skipna and not mask.all():

750 return np.nan

751 with warnings.catch_warnings():

752 # Suppress RuntimeWarning about All-NaN slice

753 warnings.filterwarnings("ignore", "All-NaN slice encountered")

754 res = np.nanmedian(x[mask])

755 return res

756

757 values, mask, dtype, _, _ = _get_values(values, skipna, mask=mask)

758 if not is_float_dtype(values.dtype):

759 try:

760 values = values.astype("f8")

761 except ValueError as err:

762 # e.g. "could not convert string to float: 'a'"

763 raise TypeError(str(err)) from err

764 if mask is not None:

765 values[mask] = np.nan

766

767 notempty = values.size

768

769 # an array from a frame

770 if values.ndim > 1 and axis is not None:

771

772 # there's a non-empty array to apply over otherwise numpy raises

773 if notempty:

774 if not skipna:

775 res = np.apply_along_axis(get_median, axis, values)

776

777 else:

778 # fastpath for the skipna case

779 with warnings.catch_warnings():

780 # Suppress RuntimeWarning about All-NaN slice

781 warnings.filterwarnings("ignore", "All-NaN slice encountered")

782 res = np.nanmedian(values, axis)

783

784 else:

785 # must return the correct shape, but median is not defined for the

786 # empty set so return nans of shape "everything but the passed axis"

787 # since "axis" is where the reduction would occur if we had a nonempty

788 # array

789 res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan)

790

791 else:

792 # otherwise return a scalar value

793 res = get_median(values) if notempty else np.nan

794 return _wrap_results(res, dtype)

795

796

797def get_empty_reduction_result(

798 shape: tuple[int, ...],

799 axis: int,

800 dtype: np.dtype | type[np.floating],

801 fill_value: Any,

802) -> np.ndarray:

803 """

804 The result from a reduction on an empty ndarray.

805

806 Parameters

807 ----------

808 shape : Tuple[int]

809 axis : int

810 dtype : np.dtype

811 fill_value : Any

812

813 Returns

814 -------

815 np.ndarray

816 """

817 shp = np.array(shape)

818 dims = np.arange(len(shape))

819 ret = np.empty(shp[dims != axis], dtype=dtype)

820 ret.fill(fill_value)

821 return ret

822

823

824def _get_counts_nanvar(

825 values_shape: Shape,

826 mask: npt.NDArray[np.bool_] | None,

827 axis: int | None,

828 ddof: int,

829 dtype: np.dtype = np.dtype(np.float64),

830) -> tuple[float | np.ndarray, float | np.ndarray]:

831 """

832 Get the count of non-null values along an axis, accounting

833 for degrees of freedom.

834

835 Parameters

836 ----------

837 values_shape : Tuple[int, ...]

838 shape tuple from values ndarray, used if mask is None

839 mask : Optional[ndarray[bool]]

840 locations in values that should be considered missing

841 axis : Optional[int]

842 axis to count along

843 ddof : int

844 degrees of freedom

845 dtype : type, optional

846 type to use for count

847

848 Returns

849 -------

850 count : int, np.nan or np.ndarray

851 d : int, np.nan or np.ndarray

852 """

853 count = _get_counts(values_shape, mask, axis, dtype=dtype)

854 d = count - dtype.type(ddof)

855

856 # always return NaN, never inf

857 if is_scalar(count):

858 if count <= ddof:

859 count = np.nan

860 d = np.nan

861 else:

862 # count is not narrowed by is_scalar check

863 count = cast(np.ndarray, count)

864 mask = count <= ddof

865 if mask.any():

866 np.putmask(d, mask, np.nan)

867 np.putmask(count, mask, np.nan)

868 return count, d

869

870

871@bottleneck_switch(ddof=1)

872def nanstd(values, *, axis=None, skipna=True, ddof=1, mask=None):

873 """

874 Compute the standard deviation along given axis while ignoring NaNs

875

876 Parameters

877 ----------

878 values : ndarray

879 axis : int, optional

880 skipna : bool, default True

881 ddof : int, default 1

882 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,

883 where N represents the number of elements.

884 mask : ndarray[bool], optional

885 nan-mask if known

886

887 Returns

888 -------

889 result : float

890 Unless input is a float array, in which case use the same

891 precision as the input array.

892

893 Examples

894 --------

895 >>> import pandas.core.nanops as nanops

896 >>> s = pd.Series([1, np.nan, 2, 3])

897 >>> nanops.nanstd(s)

898 1.0

899 """

900 if values.dtype == "M8[ns]":

901 values = values.view("m8[ns]")

902

903 orig_dtype = values.dtype

904 values, mask, _, _, _ = _get_values(values, skipna, mask=mask)

905

906 result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask))

907 return _wrap_results(result, orig_dtype)

908

909

910@disallow("M8", "m8")

911@bottleneck_switch(ddof=1)

912def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None):

913 """

914 Compute the variance along given axis while ignoring NaNs

915

916 Parameters

917 ----------

918 values : ndarray

919 axis : int, optional

920 skipna : bool, default True

921 ddof : int, default 1

922 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,

923 where N represents the number of elements.

924 mask : ndarray[bool], optional

925 nan-mask if known

926

927 Returns

928 -------

929 result : float

930 Unless input is a float array, in which case use the same

931 precision as the input array.

932

933 Examples

934 --------

935 >>> import pandas.core.nanops as nanops

936 >>> s = pd.Series([1, np.nan, 2, 3])

937 >>> nanops.nanvar(s)

938 1.0

939 """

940 values = extract_array(values, extract_numpy=True)

941 dtype = values.dtype

942 mask = _maybe_get_mask(values, skipna, mask)

943 if is_any_int_dtype(dtype):

944 values = values.astype("f8")

945 if mask is not None:

946 values[mask] = np.nan

947

948 if is_float_dtype(values.dtype):

949 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)

950 else:

951 count, d = _get_counts_nanvar(values.shape, mask, axis, ddof)

952

953 if skipna and mask is not None:

954 values = values.copy()

955 np.putmask(values, mask, 0)

956

957 # xref GH10242

958 # Compute variance via two-pass algorithm, which is stable against

959 # cancellation errors and relatively accurate for small numbers of

960 # observations.

961 #

962 # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance

963 avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count

964 if axis is not None:

965 avg = np.expand_dims(avg, axis)

966 sqr = _ensure_numeric((avg - values) ** 2)

967 if mask is not None:

968 np.putmask(sqr, mask, 0)

969 result = sqr.sum(axis=axis, dtype=np.float64) / d

970

971 # Return variance as np.float64 (the datatype used in the accumulator),

972 # unless we were dealing with a float array, in which case use the same

973 # precision as the original values array.

974 if is_float_dtype(dtype):

975 result = result.astype(dtype, copy=False)

976 return result

977

978

979@disallow("M8", "m8")

980def nansem(

981 values: np.ndarray,

982 *,

983 axis: int | None = None,

984 skipna: bool = True,

985 ddof: int = 1,

986 mask: npt.NDArray[np.bool_] | None = None,

987) -> float:

988 """

989 Compute the standard error in the mean along given axis while ignoring NaNs

990

991 Parameters

992 ----------

993 values : ndarray

994 axis : int, optional

995 skipna : bool, default True

996 ddof : int, default 1

997 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,

998 where N represents the number of elements.

999 mask : ndarray[bool], optional

1000 nan-mask if known

1001

1002 Returns

1003 -------

1004 result : float64

1005 Unless input is a float array, in which case use the same

1006 precision as the input array.

1007

1008 Examples

1009 --------

1010 >>> import pandas.core.nanops as nanops

1011 >>> s = pd.Series([1, np.nan, 2, 3])

1012 >>> nanops.nansem(s)

1013 0.5773502691896258

1014 """

1015 # This checks if non-numeric-like data is passed with numeric_only=False

1016 # and raises a TypeError otherwise

1017 nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)

1018

1019 mask = _maybe_get_mask(values, skipna, mask)

1020 if not is_float_dtype(values.dtype):

1021 values = values.astype("f8")

1022

1023 count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype)

1024 var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof)

1025

1026 return np.sqrt(var) / np.sqrt(count)

1027

1028

1029def _nanminmax(meth, fill_value_typ):

1030 @bottleneck_switch(name="nan" + meth)

1031 @_datetimelike_compat

1032 def reduction(

1033 values: np.ndarray,

1034 *,

1035 axis: int | None = None,

1036 skipna: bool = True,

1037 mask: npt.NDArray[np.bool_] | None = None,

1038 ) -> Dtype:

1039

1040 values, mask, dtype, dtype_max, fill_value = _get_values(

1041 values, skipna, fill_value_typ=fill_value_typ, mask=mask

1042 )

1043

1044 if (axis is not None and values.shape[axis] == 0) or values.size == 0:

1045 try:

1046 result = getattr(values, meth)(axis, dtype=dtype_max)

1047 result.fill(np.nan)

1048 except (AttributeError, TypeError, ValueError):

1049 result = np.nan

1050 else:

1051 result = getattr(values, meth)(axis)

1052

1053 result = _maybe_null_out(result, axis, mask, values.shape)

1054 return result

1055

1056 return reduction

1057

1058

1059nanmin = _nanminmax("min", fill_value_typ="+inf")

1060nanmax = _nanminmax("max", fill_value_typ="-inf")

1061

1062

1063@disallow("O")

1064def nanargmax(

1065 values: np.ndarray,

1066 *,

1067 axis: int | None = None,

1068 skipna: bool = True,

1069 mask: npt.NDArray[np.bool_] | None = None,

1070) -> int | np.ndarray:

1071 """

1072 Parameters

1073 ----------

1074 values : ndarray

1075 axis : int, optional

1076 skipna : bool, default True

1077 mask : ndarray[bool], optional

1078 nan-mask if known

1079

1080 Returns

1081 -------

1082 result : int or ndarray[int]

1083 The index/indices of max value in specified axis or -1 in the NA case

1084

1085 Examples

1086 --------

1087 >>> import pandas.core.nanops as nanops

1088 >>> arr = np.array([1, 2, 3, np.nan, 4])

1089 >>> nanops.nanargmax(arr)

1090 4

1091

1092 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)

1093 >>> arr[2:, 2] = np.nan

1094 >>> arr

1095 array([[ 0., 1., 2.],

1096 [ 3., 4., 5.],

1097 [ 6., 7., nan],

1098 [ 9., 10., nan]])

1099 >>> nanops.nanargmax(arr, axis=1)

1100 array([2, 2, 1, 1])

1101 """

1102 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="-inf", mask=mask)

1103 # error: Need type annotation for 'result'

1104 result = values.argmax(axis) # type: ignore[var-annotated]

1105 result = _maybe_arg_null_out(result, axis, mask, skipna)

1106 return result

1107

1108

1109@disallow("O")

1110def nanargmin(

1111 values: np.ndarray,

1112 *,

1113 axis: int | None = None,

1114 skipna: bool = True,

1115 mask: npt.NDArray[np.bool_] | None = None,

1116) -> int | np.ndarray:

1117 """

1118 Parameters

1119 ----------

1120 values : ndarray

1121 axis : int, optional

1122 skipna : bool, default True

1123 mask : ndarray[bool], optional

1124 nan-mask if known

1125

1126 Returns

1127 -------

1128 result : int or ndarray[int]

1129 The index/indices of min value in specified axis or -1 in the NA case

1130

1131 Examples

1132 --------

1133 >>> import pandas.core.nanops as nanops

1134 >>> arr = np.array([1, 2, 3, np.nan, 4])

1135 >>> nanops.nanargmin(arr)

1136 0

1137

1138 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3)

1139 >>> arr[2:, 0] = np.nan

1140 >>> arr

1141 array([[ 0., 1., 2.],

1142 [ 3., 4., 5.],

1143 [nan, 7., 8.],

1144 [nan, 10., 11.]])

1145 >>> nanops.nanargmin(arr, axis=1)

1146 array([0, 0, 1, 1])

1147 """

1148 values, mask, _, _, _ = _get_values(values, True, fill_value_typ="+inf", mask=mask)

1149 # error: Need type annotation for 'result'

1150 result = values.argmin(axis) # type: ignore[var-annotated]

1151 result = _maybe_arg_null_out(result, axis, mask, skipna)

1152 return result

1153

1154

1155@disallow("M8", "m8")

1156@maybe_operate_rowwise

1157def nanskew(

1158 values: np.ndarray,

1159 *,

1160 axis: int | None = None,

1161 skipna: bool = True,

1162 mask: npt.NDArray[np.bool_] | None = None,

1163) -> float:

1164 """

1165 Compute the sample skewness.

1166

1167 The statistic computed here is the adjusted Fisher-Pearson standardized

1168 moment coefficient G1. The algorithm computes this coefficient directly

1169 from the second and third central moment.

1170

1171 Parameters

1172 ----------

1173 values : ndarray

1174 axis : int, optional

1175 skipna : bool, default True

1176 mask : ndarray[bool], optional

1177 nan-mask if known

1178

1179 Returns

1180 -------

1181 result : float64

1182 Unless input is a float array, in which case use the same

1183 precision as the input array.

1184

1185 Examples

1186 --------

1187 >>> import pandas.core.nanops as nanops

1188 >>> s = pd.Series([1, np.nan, 1, 2])

1189 >>> nanops.nanskew(s)

1190 1.7320508075688787

1191 """

1192 # error: Incompatible types in assignment (expression has type "Union[Any,

1193 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")

1194 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]

1195 mask = _maybe_get_mask(values, skipna, mask)

1196 if not is_float_dtype(values.dtype):

1197 values = values.astype("f8")

1198 count = _get_counts(values.shape, mask, axis)

1199 else:

1200 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)

1201

1202 if skipna and mask is not None:

1203 values = values.copy()

1204 np.putmask(values, mask, 0)

1205

1206 mean = values.sum(axis, dtype=np.float64) / count

1207 if axis is not None:

1208 mean = np.expand_dims(mean, axis)

1209

1210 adjusted = values - mean

1211 if skipna and mask is not None:

1212 np.putmask(adjusted, mask, 0)

1213 adjusted2 = adjusted**2

1214 adjusted3 = adjusted2 * adjusted

1215 m2 = adjusted2.sum(axis, dtype=np.float64)

1216 m3 = adjusted3.sum(axis, dtype=np.float64)

1217

1218 # floating point error

1219 #

1220 # #18044 in _libs/windows.pyx calc_skew follow this behavior

1221 # to fix the fperr to treat m2 <1e-14 as zero

1222 m2 = _zero_out_fperr(m2)

1223 m3 = _zero_out_fperr(m3)

1224

1225 with np.errstate(invalid="ignore", divide="ignore"):

1226 result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)

1227

1228 dtype = values.dtype

1229 if is_float_dtype(dtype):

1230 result = result.astype(dtype, copy=False)

1231

1232 if isinstance(result, np.ndarray):

1233 result = np.where(m2 == 0, 0, result)

1234 result[count < 3] = np.nan

1235 else:

1236 result = 0 if m2 == 0 else result

1237 if count < 3:

1238 return np.nan

1239

1240 return result

1241

1242

1243@disallow("M8", "m8")

1244@maybe_operate_rowwise

1245def nankurt(

1246 values: np.ndarray,

1247 *,

1248 axis: int | None = None,

1249 skipna: bool = True,

1250 mask: npt.NDArray[np.bool_] | None = None,

1251) -> float:

1252 """

1253 Compute the sample excess kurtosis

1254

1255 The statistic computed here is the adjusted Fisher-Pearson standardized

1256 moment coefficient G2, computed directly from the second and fourth

1257 central moment.

1258

1259 Parameters

1260 ----------

1261 values : ndarray

1262 axis : int, optional

1263 skipna : bool, default True

1264 mask : ndarray[bool], optional

1265 nan-mask if known

1266

1267 Returns

1268 -------

1269 result : float64

1270 Unless input is a float array, in which case use the same

1271 precision as the input array.

1272

1273 Examples

1274 --------

1275 >>> import pandas.core.nanops as nanops

1276 >>> s = pd.Series([1, np.nan, 1, 3, 2])

1277 >>> nanops.nankurt(s)

1278 -1.2892561983471076

1279 """

1280 # error: Incompatible types in assignment (expression has type "Union[Any,

1281 # Union[ExtensionArray, ndarray]]", variable has type "ndarray")

1282 values = extract_array(values, extract_numpy=True) # type: ignore[assignment]

1283 mask = _maybe_get_mask(values, skipna, mask)

1284 if not is_float_dtype(values.dtype):

1285 values = values.astype("f8")

1286 count = _get_counts(values.shape, mask, axis)

1287 else:

1288 count = _get_counts(values.shape, mask, axis, dtype=values.dtype)

1289

1290 if skipna and mask is not None:

1291 values = values.copy()

1292 np.putmask(values, mask, 0)

1293

1294 mean = values.sum(axis, dtype=np.float64) / count

1295 if axis is not None:

1296 mean = np.expand_dims(mean, axis)

1297

1298 adjusted = values - mean

1299 if skipna and mask is not None:

1300 np.putmask(adjusted, mask, 0)

1301 adjusted2 = adjusted**2

1302 adjusted4 = adjusted2**2

1303 m2 = adjusted2.sum(axis, dtype=np.float64)

1304 m4 = adjusted4.sum(axis, dtype=np.float64)

1305

1306 with np.errstate(invalid="ignore", divide="ignore"):

1307 adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))

1308 numerator = count * (count + 1) * (count - 1) * m4

1309 denominator = (count - 2) * (count - 3) * m2**2

1310

1311 # floating point error

1312 #

1313 # #18044 in _libs/windows.pyx calc_kurt follow this behavior

1314 # to fix the fperr to treat denom <1e-14 as zero

1315 numerator = _zero_out_fperr(numerator)

1316 denominator = _zero_out_fperr(denominator)

1317

1318 if not isinstance(denominator, np.ndarray):

1319 # if ``denom`` is a scalar, check these corner cases first before

1320 # doing division

1321 if count < 4:

1322 return np.nan

1323 if denominator == 0:

1324 return 0

1325

1326 with np.errstate(invalid="ignore", divide="ignore"):

1327 result = numerator / denominator - adj

1328

1329 dtype = values.dtype

1330 if is_float_dtype(dtype):

1331 result = result.astype(dtype, copy=False)

1332

1333 if isinstance(result, np.ndarray):

1334 result = np.where(denominator == 0, 0, result)

1335 result[count < 4] = np.nan

1336

1337 return result

1338

1339

1340@disallow("M8", "m8")

1341@maybe_operate_rowwise

1342def nanprod(

1343 values: np.ndarray,

1344 *,

1345 axis: int | None = None,

1346 skipna: bool = True,

1347 min_count: int = 0,

1348 mask: npt.NDArray[np.bool_] | None = None,

1349) -> float:

1350 """

1351 Parameters

1352 ----------

1353 values : ndarray[dtype]

1354 axis : int, optional

1355 skipna : bool, default True

1356 min_count: int, default 0

1357 mask : ndarray[bool], optional

1358 nan-mask if known

1359

1360 Returns

1361 -------

1362 Dtype

1363 The product of all elements on a given axis. ( NaNs are treated as 1)

1364

1365 Examples

1366 --------

1367 >>> import pandas.core.nanops as nanops

1368 >>> s = pd.Series([1, 2, 3, np.nan])

1369 >>> nanops.nanprod(s)

1370 6.0

1371 """

1372 mask = _maybe_get_mask(values, skipna, mask)

1373

1374 if skipna and mask is not None:

1375 values = values.copy()

1376 values[mask] = 1

1377 result = values.prod(axis)

1378 # error: Incompatible return value type (got "Union[ndarray, float]", expected

1379 # "float")

1380 return _maybe_null_out( # type: ignore[return-value]

1381 result, axis, mask, values.shape, min_count=min_count

1382 )

1383

1384

1385def _maybe_arg_null_out(

1386 result: np.ndarray,

1387 axis: int | None,

1388 mask: npt.NDArray[np.bool_] | None,

1389 skipna: bool,

1390) -> np.ndarray | int:

1391 # helper function for nanargmin/nanargmax

1392 if mask is None:

1393 return result

1394

1395 if axis is None or not getattr(result, "ndim", False):

1396 if skipna:

1397 if mask.all():

1398 return -1

1399 else:

1400 if mask.any():

1401 return -1

1402 else:

1403 if skipna:

1404 na_mask = mask.all(axis)

1405 else:

1406 na_mask = mask.any(axis)

1407 if na_mask.any():

1408 result[na_mask] = -1

1409 return result

1410

1411

1412def _get_counts(

1413 values_shape: Shape,

1414 mask: npt.NDArray[np.bool_] | None,

1415 axis: int | None,

1416 dtype: np.dtype = np.dtype(np.float64),

1417) -> float | np.ndarray:

1418 """

1419 Get the count of non-null values along an axis

1420

1421 Parameters

1422 ----------

1423 values_shape : tuple of int

1424 shape tuple from values ndarray, used if mask is None

1425 mask : Optional[ndarray[bool]]

1426 locations in values that should be considered missing

1427 axis : Optional[int]

1428 axis to count along

1429 dtype : type, optional

1430 type to use for count

1431

1432 Returns

1433 -------

1434 count : scalar or array

1435 """

1436 if axis is None:

1437 if mask is not None:

1438 n = mask.size - mask.sum()

1439 else:

1440 n = np.prod(values_shape)

1441 return dtype.type(n)

1442

1443 if mask is not None:

1444 count = mask.shape[axis] - mask.sum(axis)

1445 else:

1446 count = values_shape[axis]

1447

1448 if is_scalar(count):

1449 return dtype.type(count)

1450 return count.astype(dtype, copy=False)

1451

1452

1453def _maybe_null_out(

1454 result: np.ndarray | float | NaTType,

1455 axis: int | None,

1456 mask: npt.NDArray[np.bool_] | None,

1457 shape: tuple[int, ...],

1458 min_count: int = 1,

1459) -> np.ndarray | float | NaTType:

1460 """

1461 Returns

1462 -------

1463 Dtype

1464 The product of all elements on a given axis. ( NaNs are treated as 1)

1465 """

1466 if axis is not None and isinstance(result, np.ndarray):

1467 if mask is not None:

1468 null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0

1469 else:

1470 # we have no nulls, kept mask=None in _maybe_get_mask

1471 below_count = shape[axis] - min_count < 0

1472 new_shape = shape[:axis] + shape[axis + 1 :]

1473 null_mask = np.broadcast_to(below_count, new_shape)

1474

1475 if np.any(null_mask):

1476 if is_numeric_dtype(result):

1477 if np.iscomplexobj(result):

1478 result = result.astype("c16")

1479 elif not is_float_dtype(result):

1480 result = result.astype("f8", copy=False)

1481 result[null_mask] = np.nan

1482 else:

1483 # GH12941, use None to auto cast null

1484 result[null_mask] = None

1485 elif result is not NaT:

1486 if check_below_min_count(shape, mask, min_count):

1487 result = np.nan

1488

1489 return result

1490

1491

1492def check_below_min_count(

1493 shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int

1494) -> bool:

1495 """

1496 Check for the `min_count` keyword. Returns True if below `min_count` (when

1497 missing value should be returned from the reduction).

1498

1499 Parameters

1500 ----------

1501 shape : tuple

1502 The shape of the values (`values.shape`).

1503 mask : ndarray[bool] or None

1504 Boolean numpy array (typically of same shape as `shape`) or None.

1505 min_count : int

1506 Keyword passed through from sum/prod call.

1507

1508 Returns

1509 -------

1510 bool

1511 """

1512 if min_count > 0:

1513 if mask is None:

1514 # no missing values, only check size

1515 non_nulls = np.prod(shape)

1516 else:

1517 non_nulls = mask.size - mask.sum()

1518 if non_nulls < min_count:

1519 return True

1520 return False

1521

1522

1523def _zero_out_fperr(arg):

1524 # #18044 reference this behavior to fix rolling skew/kurt issue

1525 if isinstance(arg, np.ndarray):

1526 with np.errstate(invalid="ignore"):

1527 return np.where(np.abs(arg) < 1e-14, 0, arg)

1528 else:

1529 return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg

1530

1531

1532@disallow("M8", "m8")

1533def nancorr(

1534 a: np.ndarray, b: np.ndarray, *, method="pearson", min_periods: int | None = None

1535) -> float:

1536 """

1537 a, b: ndarrays

1538 """

1539 if len(a) != len(b):

1540 raise AssertionError("Operands to nancorr must have same size")

1541

1542 if min_periods is None:

1543 min_periods = 1

1544

1545 valid = notna(a) & notna(b)

1546 if not valid.all():

1547 a = a[valid]

1548 b = b[valid]

1549

1550 if len(a) < min_periods:

1551 return np.nan

1552

1553 f = get_corr_func(method)

1554 return f(a, b)

1555

1556

1557def get_corr_func(method) -> Callable[[np.ndarray, np.ndarray], float]:

1558 if method == "kendall":

1559 from scipy.stats import kendalltau

1560

1561 def func(a, b):

1562 return kendalltau(a, b)[0]

1563

1564 return func

1565 elif method == "spearman":

1566 from scipy.stats import spearmanr

1567

1568 def func(a, b):

1569 return spearmanr(a, b)[0]

1570

1571 return func

1572 elif method == "pearson":

1573

1574 def func(a, b):

1575 return np.corrcoef(a, b)[0, 1]

1576

1577 return func

1578 elif callable(method):

1579 return method

1580

1581 raise ValueError(

1582 f"Unknown method '{method}', expected one of "

1583 "'kendall', 'spearman', 'pearson', or callable"

1584 )

1585

1586

1587@disallow("M8", "m8")

1588def nancov(

1589 a: np.ndarray,

1590 b: np.ndarray,

1591 *,

1592 min_periods: int | None = None,

1593 ddof: int | None = 1,

1594) -> float:

1595 if len(a) != len(b):

1596 raise AssertionError("Operands to nancov must have same size")

1597

1598 if min_periods is None:

1599 min_periods = 1

1600

1601 valid = notna(a) & notna(b)

1602 if not valid.all():

1603 a = a[valid]

1604 b = b[valid]

1605

1606 if len(a) < min_periods:

1607 return np.nan

1608

1609 return np.cov(a, b, ddof=ddof)[0, 1]

1610

1611

1612def _ensure_numeric(x):

1613 if isinstance(x, np.ndarray):

1614 if is_integer_dtype(x) or is_bool_dtype(x):

1615 x = x.astype(np.float64)

1616 elif is_object_dtype(x):

1617 try:

1618 x = x.astype(np.complex128)

1619 except (TypeError, ValueError):

1620 try:

1621 x = x.astype(np.float64)

1622 except ValueError as err:

1623 # GH#29941 we get here with object arrays containing strs

1624 raise TypeError(f"Could not convert {x} to numeric") from err

1625 else:

1626 if not np.any(np.imag(x)):

1627 x = x.real

1628 elif not (is_float(x) or is_integer(x) or is_complex(x)):

1629 try:

1630 x = float(x)

1631 except (TypeError, ValueError):

1632 # e.g. "1+1j" or "foo"

1633 try:

1634 x = complex(x)

1635 except ValueError as err:

1636 # e.g. "foo"

1637 raise TypeError(f"Could not convert {x} to numeric") from err

1638 return x

1639

1640

1641# NA-friendly array comparisons

1642

1643

1644def make_nancomp(op):

1645 def f(x, y):

1646 xmask = isna(x)

1647 ymask = isna(y)

1648 mask = xmask | ymask

1649

1650 with np.errstate(all="ignore"):

1651 result = op(x, y)

1652

1653 if mask.any():

1654 if is_bool_dtype(result):

1655 result = result.astype("O")

1656 np.putmask(result, mask, np.nan)

1658 return result

1660 return f

1663nangt = make_nancomp(operator.gt)

1664nange = make_nancomp(operator.ge)

1665nanlt = make_nancomp(operator.lt)

1666nanle = make_nancomp(operator.le)

1667naneq = make_nancomp(operator.eq)

1668nanne = make_nancomp(operator.ne)

1669

1670

1671def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:

1672 """

1673 Cumulative function with skipna support.

1674

1675 Parameters

1676 ----------

1677 values : np.ndarray or ExtensionArray

1678 accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate}

1679 skipna : bool

1680

1681 Returns

1682 -------

1683 np.ndarray or ExtensionArray

1684 """

1685 mask_a, mask_b = {

1686 np.cumprod: (1.0, np.nan),

1687 np.maximum.accumulate: (-np.inf, np.nan),

1688 np.cumsum: (0.0, np.nan),

1689 np.minimum.accumulate: (np.inf, np.nan),

1690 }[accum_func]

1691

1692 # We will be applying this function to block values

1693 if values.dtype.kind in ["m", "M"]:

1694 # GH#30460, GH#29058

1695 # numpy 1.18 started sorting NaTs at the end instead of beginning,

1696 # so we need to work around to maintain backwards-consistency.

1697 orig_dtype = values.dtype

1698

1699 # We need to define mask before masking NaTs

1700 mask = isna(values)

1701

1702 y = values.view("i8")

1703 # Note: the accum_func comparison fails as an "is" comparison

1704 changed = accum_func == np.minimum.accumulate

1705

1706 try:

1707 if changed:

1708 y[mask] = lib.i8max

1709

1710 result = accum_func(y, axis=0)

1711 finally:

1712 if changed:

1713 # restore NaT elements

1714 y[mask] = iNaT

1715

1716 if skipna:

1717 result[mask] = iNaT

1718 elif accum_func == np.minimum.accumulate:

1719 # Restore NaTs that we masked previously

1720 nz = (~np.asarray(mask)).nonzero()[0]

1721 if len(nz):

1722 # everything up to the first non-na entry stays NaT

1723 result[: nz[0]] = iNaT

1724

1725 if isinstance(values.dtype, np.dtype):

1726 result = result.view(orig_dtype)

1727 else:

1728 # DatetimeArray/TimedeltaArray

1729 # TODO: have this case go through a DTA method?

1730 # For DatetimeTZDtype, view result as M8[ns]

1731 npdtype = orig_dtype if isinstance(orig_dtype, np.dtype) else "M8[ns]"

1732 # Item "type" of "Union[Type[ExtensionArray], Type[ndarray[Any, Any]]]"

1733 # has no attribute "_simple_new"

1734 result = type(values)._simple_new( # type: ignore[union-attr]

1735 result.view(npdtype), dtype=orig_dtype

1736 )

1737

1738 elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):

1739 vals = values.copy()

1740 mask = isna(vals)

1741 vals[mask] = mask_a

1742 result = accum_func(vals, axis=0)

1743 result[mask] = mask_b

1744 else:

1745 result = accum_func(values, axis=0)

1746

1747 return result