Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/window/rolling.py: 16%

1"""

2Provide a generic structure to support window functions,

3similar to how we have a Groupby object.

4"""

5from __future__ import annotations

7import copy

8from datetime import timedelta

9from functools import partial

10import inspect

11from textwrap import dedent

12from typing import (

13 TYPE_CHECKING,

14 Any,

15 Callable,

16 Hashable,

17 Sized,

18)

19import warnings

21import numpy as np

23from pandas._libs.tslibs import (

24 BaseOffset,

25 to_offset,

26)

27import pandas._libs.window.aggregations as window_aggregations

28from pandas._typing import (

29 ArrayLike,

30 Axis,

31 NDFrameT,

32 QuantileInterpolation,

33 WindowingRankType,

34)

35from pandas.compat._optional import import_optional_dependency

36from pandas.compat.numpy import function as nv

37from pandas.errors import DataError

38from pandas.util._decorators import doc

39from pandas.util._exceptions import find_stack_level

41from pandas.core.dtypes.common import (

42 ensure_float64,

43 is_bool,

44 is_integer,

45 is_list_like,

46 is_numeric_dtype,

47 is_scalar,

48 needs_i8_conversion,

49)

50from pandas.core.dtypes.generic import (

51 ABCDataFrame,

52 ABCSeries,

53)

54from pandas.core.dtypes.missing import notna

56from pandas.core._numba import executor

57from pandas.core.algorithms import factorize

58from pandas.core.apply import ResamplerWindowApply

59from pandas.core.arrays import ExtensionArray

60from pandas.core.base import SelectionMixin

61import pandas.core.common as com

62from pandas.core.indexers.objects import (

63 BaseIndexer,

64 FixedWindowIndexer,

65 GroupbyIndexer,

66 VariableWindowIndexer,

67)

68from pandas.core.indexes.api import (

69 DatetimeIndex,

70 Index,

71 MultiIndex,

72 PeriodIndex,

73 TimedeltaIndex,

74)

75from pandas.core.reshape.concat import concat

76from pandas.core.util.numba_ import (

77 get_jit_arguments,

78 maybe_use_numba,

79)

80from pandas.core.window.common import (

81 flex_binary_moment,

82 maybe_warn_args_and_kwargs,

83 zsqrt,

84)

85from pandas.core.window.doc import (

86 _shared_docs,

87 args_compat,

88 create_section_header,

89 kwargs_compat,

90 kwargs_numeric_only,

91 kwargs_scipy,

92 numba_notes,

93 template_header,

94 template_returns,

95 template_see_also,

96 window_agg_numba_parameters,

97 window_apply_parameters,

98)

99from pandas.core.window.numba_ import (

100 generate_manual_numpy_nan_agg_with_axis,

101 generate_numba_apply_func,

102 generate_numba_table_func,

103)

104

105if TYPE_CHECKING: 105 ↛ 106line 105 didn't jump to line 106, because the condition on line 105 was never true

106 from pandas import (

107 DataFrame,

108 Series,

109 )

110 from pandas.core.generic import NDFrame

111 from pandas.core.groupby.ops import BaseGrouper

112

113

114class BaseWindow(SelectionMixin):

115 """Provides utilities for performing windowing operations."""

116

117 _attributes: list[str] = []

118 exclusions: frozenset[Hashable] = frozenset()

119 _on: Index

120

121 def __init__(

122 self,

123 obj: NDFrame,

124 window=None,

125 min_periods: int | None = None,

126 center: bool | None = False,

127 win_type: str | None = None,

128 axis: Axis = 0,

129 on: str | Index | None = None,

130 closed: str | None = None,

131 step: int | None = None,

132 method: str = "single",

133 *,

134 selection=None,

135 ) -> None:

136 self.obj = obj

137 self.on = on

138 self.closed = closed

139 self.step = step

140 self.window = window

141 self.min_periods = min_periods

142 self.center = center

143 # TODO(2.0): Change this back to self.win_type once deprecation is enforced

144 self._win_type = win_type

145 self.axis = obj._get_axis_number(axis) if axis is not None else None

146 self.method = method

147 self._win_freq_i8: int | None = None

148 if self.on is None:

149 if self.axis == 0:

150 self._on = self.obj.index

151 else:

152 # i.e. self.axis == 1

153 self._on = self.obj.columns

154 elif isinstance(self.on, Index):

155 self._on = self.on

156 elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:

157 self._on = Index(self.obj[self.on])

158 else:

159 raise ValueError(

160 f"invalid on specified as {self.on}, "

161 "must be a column (of DataFrame), an Index or None"

162 )

163

164 self._selection = selection

165 self._validate()

166

167 @property

168 def win_type(self):

169 if self._win_freq_i8 is not None:

170 warnings.warn(

171 "win_type will no longer return 'freq' in a future version. "

172 "Check the type of self.window instead.",

173 FutureWarning,

174 stacklevel=find_stack_level(),

175 )

176 return "freq"

177 return self._win_type

178

179 @property

180 def is_datetimelike(self) -> bool:

181 warnings.warn(

182 "is_datetimelike is deprecated and will be removed in a future version.",

183 FutureWarning,

184 stacklevel=find_stack_level(),

185 )

186 return self._win_freq_i8 is not None

187

188 def validate(self) -> None:

189 warnings.warn(

190 "validate is deprecated and will be removed in a future version.",

191 FutureWarning,

192 stacklevel=find_stack_level(),

193 )

194 return self._validate()

195

196 def _validate(self) -> None:

197 if self.center is not None and not is_bool(self.center):

198 raise ValueError("center must be a boolean")

199 if self.min_periods is not None:

200 if not is_integer(self.min_periods):

201 raise ValueError("min_periods must be an integer")

202 elif self.min_periods < 0:

203 raise ValueError("min_periods must be >= 0")

204 elif is_integer(self.window) and self.min_periods > self.window:

205 raise ValueError(

206 f"min_periods {self.min_periods} must be <= window {self.window}"

207 )

208 if self.closed is not None and self.closed not in [

209 "right",

210 "both",

211 "left",

212 "neither",

213 ]:

214 raise ValueError("closed must be 'right', 'left', 'both' or 'neither'")

215 if not isinstance(self.obj, (ABCSeries, ABCDataFrame)):

216 raise TypeError(f"invalid type: {type(self)}")

217 if isinstance(self.window, BaseIndexer):

218 # Validate that the passed BaseIndexer subclass has

219 # a get_window_bounds with the correct signature.

220 get_window_bounds_signature = inspect.signature(

221 self.window.get_window_bounds

222 ).parameters.keys()

223 expected_signature = inspect.signature(

224 BaseIndexer().get_window_bounds

225 ).parameters.keys()

226 if get_window_bounds_signature != expected_signature:

227 raise ValueError(

228 f"{type(self.window).__name__} does not implement "

229 f"the correct signature for get_window_bounds"

230 )

231 if self.method not in ["table", "single"]:

232 raise ValueError("method must be 'table' or 'single")

233 if self.step is not None:

234 if not is_integer(self.step):

235 raise ValueError("step must be an integer")

236 elif self.step < 0:

237 raise ValueError("step must be >= 0")

238

239 def _check_window_bounds(

240 self, start: np.ndarray, end: np.ndarray, num_vals: int

241 ) -> None:

242 if len(start) != len(end):

243 raise ValueError(

244 f"start ({len(start)}) and end ({len(end)}) bounds must be the "

245 f"same length"

246 )

247 elif len(start) != (num_vals + (self.step or 1) - 1) // (self.step or 1):

248 raise ValueError(

249 f"start and end bounds ({len(start)}) must be the same length "

250 f"as the object ({num_vals}) divided by the step ({self.step}) "

251 f"if given and rounded up"

252 )

253

254 def _slice_axis_for_step(self, index: Index, result: Sized | None = None) -> Index:

255 """

256 Slices the index for a given result and the preset step.

257 """

258 return (

259 index

260 if result is None or len(result) == len(index)

261 else index[:: self.step]

262 )

263

264 def _validate_numeric_only(self, name: str, numeric_only: bool) -> None:

265 """

266 Validate numeric_only argument, raising if invalid for the input.

267

268 Parameters

269 ----------

270 name : str

271 Name of the operator (kernel).

272 numeric_only : bool

273 Value passed by user.

274 """

275 if (

276 self._selected_obj.ndim == 1

277 and numeric_only

278 and not is_numeric_dtype(self._selected_obj.dtype)

279 ):

280 raise NotImplementedError(

281 f"{type(self).__name__}.{name} does not implement numeric_only"

282 )

283

284 def _make_numeric_only(self, obj: NDFrameT) -> NDFrameT:

285 """Subset DataFrame to numeric columns.

286

287 Parameters

288 ----------

289 obj : DataFrame

290

291 Returns

292 -------

293 obj subset to numeric-only columns.

294 """

295 result = obj.select_dtypes(include=["number"], exclude=["timedelta"])

296 return result

297

298 def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT:

299 """

300 Split data into blocks & return conformed data.

301 """

302 # filter out the on from the object

303 if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2:

304 obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)

305 if obj.ndim > 1 and (numeric_only or self.axis == 1):

306 # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything

307 # to float to calculate the complete row at once. We exclude all non-numeric

308 # dtypes.

309 obj = self._make_numeric_only(obj)

310 if self.axis == 1:

311 obj = obj.astype("float64", copy=False)

312 obj._mgr = obj._mgr.consolidate()

313 return obj

314

315 def _gotitem(self, key, ndim, subset=None):

316 """

317 Sub-classes to define. Return a sliced object.

318

319 Parameters

320 ----------

321 key : str / list of selections

322 ndim : {1, 2}

323 requested ndim of result

324 subset : object, default None

325 subset to act on

326 """

327 # create a new object to prevent aliasing

328 if subset is None:

329 subset = self.obj

330

331 # we need to make a shallow copy of ourselves

332 # with the same groupby

333 with warnings.catch_warnings():

334 # TODO(2.0): Remove once win_type deprecation is enforced

335 warnings.filterwarnings("ignore", "win_type", FutureWarning)

336 kwargs = {attr: getattr(self, attr) for attr in self._attributes}

337

338 selection = None

339 if subset.ndim == 2 and (

340 (is_scalar(key) and key in subset) or is_list_like(key)

341 ):

342 selection = key

343

344 new_win = type(self)(subset, selection=selection, **kwargs)

345 return new_win

346

347 def __getattr__(self, attr: str):

348 if attr in self._internal_names_set:

349 return object.__getattribute__(self, attr)

350 if attr in self.obj:

351 return self[attr]

352

353 raise AttributeError(

354 f"'{type(self).__name__}' object has no attribute '{attr}'"

355 )

356

357 def _dir_additions(self):

358 return self.obj._dir_additions()

359

360 def __repr__(self) -> str:

361 """

362 Provide a nice str repr of our rolling object.

363 """

364 attrs_list = (

365 f"{attr_name}={getattr(self, attr_name)}"

366 for attr_name in self._attributes

367 if getattr(self, attr_name, None) is not None and attr_name[0] != "_"

368 )

369 attrs = ",".join(attrs_list)

370 return f"{type(self).__name__} [{attrs}]"

371

372 def __iter__(self):

373 obj = self._selected_obj.set_axis(self._on)

374 obj = self._create_data(obj)

375 indexer = self._get_window_indexer()

376

377 start, end = indexer.get_window_bounds(

378 num_values=len(obj),

379 min_periods=self.min_periods,

380 center=self.center,

381 closed=self.closed,

382 step=self.step,

383 )

384 self._check_window_bounds(start, end, len(obj))

385

386 for s, e in zip(start, end):

387 result = obj.iloc[slice(s, e)]

388 yield result

389

390 def _prep_values(self, values: ArrayLike) -> np.ndarray:

391 """Convert input to numpy arrays for Cython routines"""

392 if needs_i8_conversion(values.dtype):

393 raise NotImplementedError(

394 f"ops for {type(self).__name__} for this "

395 f"dtype {values.dtype} are not implemented"

396 )

397 else:

398 # GH #12373 : rolling functions error on float32 data

399 # make sure the data is coerced to float64

400 try:

401 if isinstance(values, ExtensionArray):

402 values = values.to_numpy(np.float64, na_value=np.nan)

403 else:

404 values = ensure_float64(values)

405 except (ValueError, TypeError) as err:

406 raise TypeError(f"cannot handle this type -> {values.dtype}") from err

407

408 # Convert inf to nan for C funcs

409 inf = np.isinf(values)

410 if inf.any():

411 values = np.where(inf, np.nan, values)

412

413 return values

414

415 def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None:

416 # if we have an 'on' column we want to put it back into

417 # the results in the same location

418 from pandas import Series

419

420 if self.on is not None and not self._on.equals(obj.index):

421 name = self._on.name

422 extra_col = Series(self._on, index=self.obj.index, name=name)

423 if name in result.columns:

424 # TODO: sure we want to overwrite results?

425 result[name] = extra_col

426 elif name in result.index.names:

427 pass

428 elif name in self._selected_obj.columns:

429 # insert in the same location as we had in _selected_obj

430 old_cols = self._selected_obj.columns

431 new_cols = result.columns

432 old_loc = old_cols.get_loc(name)

433 overlap = new_cols.intersection(old_cols[:old_loc])

434 new_loc = len(overlap)

435 result.insert(new_loc, name, extra_col)

436 else:

437 # insert at the end

438 result[name] = extra_col

439

440 @property

441 def _index_array(self):

442 # TODO: why do we get here with e.g. MultiIndex?

443 if needs_i8_conversion(self._on.dtype):

444 return self._on.asi8

445 return None

446

447 def _resolve_output(self, out: DataFrame, obj: DataFrame) -> DataFrame:

448 """Validate and finalize result."""

449 if out.shape[1] == 0 and obj.shape[1] > 0:

450 raise DataError("No numeric types to aggregate")

451 elif out.shape[1] == 0:

452 return obj.astype("float64")

453

454 self._insert_on_column(out, obj)

455 return out

456

457 def _get_window_indexer(self) -> BaseIndexer:

458 """

459 Return an indexer class that will compute the window start and end bounds

460 """

461 if isinstance(self.window, BaseIndexer):

462 return self.window

463 if self._win_freq_i8 is not None:

464 return VariableWindowIndexer(

465 index_array=self._index_array,

466 window_size=self._win_freq_i8,

467 center=self.center,

468 )

469 return FixedWindowIndexer(window_size=self.window)

470

471 def _apply_series(

472 self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None

473 ) -> Series:

474 """

475 Series version of _apply_blockwise

476 """

477 obj = self._create_data(self._selected_obj)

478

479 if name == "count":

480 # GH 12541: Special case for count where we support date-like types

481 obj = notna(obj).astype(int)

482 try:

483 values = self._prep_values(obj._values)

484 except (TypeError, NotImplementedError) as err:

485 raise DataError("No numeric types to aggregate") from err

486

487 result = homogeneous_func(values)

488 index = self._slice_axis_for_step(obj.index, result)

489 return obj._constructor(result, index=index, name=obj.name)

490

491 def _apply_blockwise(

492 self,

493 homogeneous_func: Callable[..., ArrayLike],

494 name: str,

495 numeric_only: bool = False,

496 ) -> DataFrame | Series:

497 """

498 Apply the given function to the DataFrame broken down into homogeneous

499 sub-frames.

500 """

501 self._validate_numeric_only(name, numeric_only)

502 if self._selected_obj.ndim == 1:

503 return self._apply_series(homogeneous_func, name)

504

505 obj = self._create_data(self._selected_obj, numeric_only)

506 if name == "count":

507 # GH 12541: Special case for count where we support date-like types

508 obj = notna(obj).astype(int)

509 obj._mgr = obj._mgr.consolidate()

510

511 def hfunc(values: ArrayLike) -> ArrayLike:

512 values = self._prep_values(values)

513 return homogeneous_func(values)

514

515 if self.axis == 1:

516 obj = obj.T

517

518 taker = []

519 res_values = []

520 for i, arr in enumerate(obj._iter_column_arrays()):

521 # GH#42736 operate column-wise instead of block-wise

522 try:

523 res = hfunc(arr)

524 except (TypeError, NotImplementedError):

525 pass

526 else:

527 res_values.append(res)

528 taker.append(i)

529

530 index = self._slice_axis_for_step(

531 obj.index, res_values[0] if len(res_values) > 0 else None

532 )

533 df = type(obj)._from_arrays(

534 res_values,

535 index=index,

536 columns=obj.columns.take(taker),

537 verify_integrity=False,

538 )

539

540 if self.axis == 1:

541 df = df.T

542

543 if 0 != len(res_values) != len(obj.columns):

544 # GH#42738 ignore_failures dropped nuisance columns

545 dropped = obj.columns.difference(obj.columns.take(taker))

546 warnings.warn(

547 "Dropping of nuisance columns in rolling operations "

548 "is deprecated; in a future version this will raise TypeError. "

549 "Select only valid columns before calling the operation. "

550 f"Dropped columns were {dropped}",

551 FutureWarning,

552 stacklevel=find_stack_level(),

553 )

554

555 return self._resolve_output(df, obj)

556

557 def _apply_tablewise(

558 self,

559 homogeneous_func: Callable[..., ArrayLike],

560 name: str | None = None,

561 numeric_only: bool = False,

562 ) -> DataFrame | Series:

563 """

564 Apply the given function to the DataFrame across the entire object

565 """

566 if self._selected_obj.ndim == 1:

567 raise ValueError("method='table' not applicable for Series objects.")

568 obj = self._create_data(self._selected_obj, numeric_only)

569 values = self._prep_values(obj.to_numpy())

570 values = values.T if self.axis == 1 else values

571 result = homogeneous_func(values)

572 result = result.T if self.axis == 1 else result

573 index = self._slice_axis_for_step(obj.index, result)

574 columns = (

575 obj.columns

576 if result.shape[1] == len(obj.columns)

577 else obj.columns[:: self.step]

578 )

579 out = obj._constructor(result, index=index, columns=columns)

580

581 return self._resolve_output(out, obj)

582

583 def _apply_pairwise(

584 self,

585 target: DataFrame | Series,

586 other: DataFrame | Series | None,

587 pairwise: bool | None,

588 func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series],

589 numeric_only: bool,

590 ) -> DataFrame | Series:

591 """

592 Apply the given pairwise function given 2 pandas objects (DataFrame/Series)

593 """

594 target = self._create_data(target, numeric_only)

595 if other is None:

596 other = target

597 # only default unset

598 pairwise = True if pairwise is None else pairwise

599 elif not isinstance(other, (ABCDataFrame, ABCSeries)):

600 raise ValueError("other must be a DataFrame or Series")

601 elif other.ndim == 2 and numeric_only:

602 other = self._make_numeric_only(other)

603

604 return flex_binary_moment(target, other, func, pairwise=bool(pairwise))

605

606 def _apply(

607 self,

608 func: Callable[..., Any],

609 name: str,

610 numeric_only: bool = False,

611 numba_args: tuple[Any, ...] = (),

612 **kwargs,

613 ):

614 """

615 Rolling statistical measure using supplied function.

616

617 Designed to be used with passed-in Cython array-based functions.

618

619 Parameters

620 ----------

621 func : callable function to apply

622 name : str,

623 numba_args : tuple

624 args to be passed when func is a numba func

625 **kwargs

626 additional arguments for rolling function and window function

627

628 Returns

629 -------

630 y : type of input

631 """

632 window_indexer = self._get_window_indexer()

633 min_periods = (

634 self.min_periods

635 if self.min_periods is not None

636 else window_indexer.window_size

637 )

638

639 def homogeneous_func(values: np.ndarray):

640 # calculation function

641

642 if values.size == 0:

643 return values.copy()

644

645 def calc(x):

646 start, end = window_indexer.get_window_bounds(

647 num_values=len(x),

648 min_periods=min_periods,

649 center=self.center,

650 closed=self.closed,

651 step=self.step,

652 )

653 self._check_window_bounds(start, end, len(x))

654

655 return func(x, start, end, min_periods, *numba_args)

656

657 with np.errstate(all="ignore"):

658 result = calc(values)

659

660 return result

661

662 if self.method == "single":

663 return self._apply_blockwise(homogeneous_func, name, numeric_only)

664 else:

665 return self._apply_tablewise(homogeneous_func, name, numeric_only)

666

667 def _numba_apply(

668 self,

669 func: Callable[..., Any],

670 engine_kwargs: dict[str, bool] | None = None,

671 *func_args,

672 ):

673 window_indexer = self._get_window_indexer()

674 min_periods = (

675 self.min_periods

676 if self.min_periods is not None

677 else window_indexer.window_size

678 )

679 obj = self._create_data(self._selected_obj)

680 if self.axis == 1:

681 obj = obj.T

682 values = self._prep_values(obj.to_numpy())

683 if values.ndim == 1:

684 values = values.reshape(-1, 1)

685 start, end = window_indexer.get_window_bounds(

686 num_values=len(values),

687 min_periods=min_periods,

688 center=self.center,

689 closed=self.closed,

690 step=self.step,

691 )

692 self._check_window_bounds(start, end, len(values))

693 aggregator = executor.generate_shared_aggregator(

694 func, **get_jit_arguments(engine_kwargs)

695 )

696 result = aggregator(values, start, end, min_periods, *func_args)

697 result = result.T if self.axis == 1 else result

698 index = self._slice_axis_for_step(obj.index, result)

699 if obj.ndim == 1:

700 result = result.squeeze()

701 out = obj._constructor(result, index=index, name=obj.name)

702 return out

703 else:

704 columns = self._slice_axis_for_step(obj.columns, result.T)

705 out = obj._constructor(result, index=index, columns=columns)

706 return self._resolve_output(out, obj)

707

708 def aggregate(self, func, *args, **kwargs):

709 result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()

710 if result is None:

711 return self.apply(func, raw=False, args=args, kwargs=kwargs)

712 return result

713

714 agg = aggregate

715

716

717class BaseWindowGroupby(BaseWindow):

718 """

719 Provide the groupby windowing facilities.

720 """

721

722 _grouper: BaseGrouper

723 _as_index: bool

724 _attributes: list[str] = ["_grouper"]

725

726 def __init__(

727 self,

728 obj: DataFrame | Series,

729 *args,

730 _grouper: BaseGrouper,

731 _as_index: bool = True,

732 **kwargs,

733 ) -> None:

734 from pandas.core.groupby.ops import BaseGrouper

735

736 if not isinstance(_grouper, BaseGrouper):

737 raise ValueError("Must pass a BaseGrouper object.")

738 self._grouper = _grouper

739 self._as_index = _as_index

740 # GH 32262: It's convention to keep the grouping column in

741 # groupby.<agg_func>, but unexpected to users in

742 # groupby.rolling.<agg_func>

743 obj = obj.drop(columns=self._grouper.names, errors="ignore")

744 # GH 15354

745 if kwargs.get("step") is not None:

746 raise NotImplementedError("step not implemented for groupby")

747 super().__init__(obj, *args, **kwargs)

748

749 def _apply(

750 self,

751 func: Callable[..., Any],

752 name: str,

753 numeric_only: bool = False,

754 numba_args: tuple[Any, ...] = (),

755 **kwargs,

756 ) -> DataFrame | Series:

757 result = super()._apply(

758 func,

759 name,

760 numeric_only,

761 numba_args,

762 **kwargs,

763 )

764 # Reconstruct the resulting MultiIndex

765 # 1st set of levels = group by labels

766 # 2nd set of levels = original DataFrame/Series index

767 grouped_object_index = self.obj.index

768 grouped_index_name = [*grouped_object_index.names]

769 groupby_keys = copy.copy(self._grouper.names)

770 result_index_names = groupby_keys + grouped_index_name

771

772 drop_columns = [

773 key

774 for key in self._grouper.names

775 if key not in self.obj.index.names or key is None

776 ]

777

778 if len(drop_columns) != len(groupby_keys):

779 # Our result will have still kept the column in the result

780 result = result.drop(columns=drop_columns, errors="ignore")

781

782 codes = self._grouper.codes

783 levels = copy.copy(self._grouper.levels)

784

785 group_indices = self._grouper.indices.values()

786 if group_indices:

787 indexer = np.concatenate(list(group_indices))

788 else:

789 indexer = np.array([], dtype=np.intp)

790 codes = [c.take(indexer) for c in codes]

791

792 # if the index of the original dataframe needs to be preserved, append

793 # this index (but reordered) to the codes/levels from the groupby

794 if grouped_object_index is not None:

795 idx = grouped_object_index.take(indexer)

796 if not isinstance(idx, MultiIndex):

797 idx = MultiIndex.from_arrays([idx])

798 codes.extend(list(idx.codes))

799 levels.extend(list(idx.levels))

800

801 result_index = MultiIndex(

802 levels, codes, names=result_index_names, verify_integrity=False

803 )

804

805 result.index = result_index

806 if not self._as_index:

807 result = result.reset_index(level=list(range(len(groupby_keys))))

808 return result

809

810 def _apply_pairwise(

811 self,

812 target: DataFrame | Series,

813 other: DataFrame | Series | None,

814 pairwise: bool | None,

815 func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series],

816 numeric_only: bool,

817 ) -> DataFrame | Series:

818 """

819 Apply the given pairwise function given 2 pandas objects (DataFrame/Series)

820 """

821 # Manually drop the grouping column first

822 target = target.drop(columns=self._grouper.names, errors="ignore")

823 result = super()._apply_pairwise(target, other, pairwise, func, numeric_only)

824 # 1) Determine the levels + codes of the groupby levels

825 if other is not None and not all(

826 len(group) == len(other) for group in self._grouper.indices.values()

827 ):

828 # GH 42915

829 # len(other) != len(any group), so must reindex (expand) the result

830 # from flex_binary_moment to a "transform"-like result

831 # per groupby combination

832 old_result_len = len(result)

833 result = concat(

834 [

835 result.take(gb_indices).reindex(result.index)

836 for gb_indices in self._grouper.indices.values()

837 ]

838 )

839

840 gb_pairs = (

841 com.maybe_make_list(pair) for pair in self._grouper.indices.keys()

842 )

843 groupby_codes = []

844 groupby_levels = []

845 # e.g. [[1, 2], [4, 5]] as [[1, 4], [2, 5]]

846 for gb_level_pair in map(list, zip(*gb_pairs)):

847 labels = np.repeat(np.array(gb_level_pair), old_result_len)

848 codes, levels = factorize(labels)

849 groupby_codes.append(codes)

850 groupby_levels.append(levels)

851 else:

852 # pairwise=True or len(other) == len(each group), so repeat

853 # the groupby labels by the number of columns in the original object

854 groupby_codes = self._grouper.codes

855 # error: Incompatible types in assignment (expression has type

856 # "List[Index]", variable has type "List[Union[ndarray, Index]]")

857 groupby_levels = self._grouper.levels # type: ignore[assignment]

858

859 group_indices = self._grouper.indices.values()

860 if group_indices:

861 indexer = np.concatenate(list(group_indices))

862 else:

863 indexer = np.array([], dtype=np.intp)

864

865 if target.ndim == 1:

866 repeat_by = 1

867 else:

868 repeat_by = len(target.columns)

869 groupby_codes = [

870 np.repeat(c.take(indexer), repeat_by) for c in groupby_codes

871 ]

872 # 2) Determine the levels + codes of the result from super()._apply_pairwise

873 if isinstance(result.index, MultiIndex):

874 result_codes = list(result.index.codes)

875 result_levels = list(result.index.levels)

876 result_names = list(result.index.names)

877 else:

878 idx_codes, idx_levels = factorize(result.index)

879 result_codes = [idx_codes]

880 result_levels = [idx_levels]

881 result_names = [result.index.name]

882

883 # 3) Create the resulting index by combining 1) + 2)

884 result_codes = groupby_codes + result_codes

885 result_levels = groupby_levels + result_levels

886 result_names = self._grouper.names + result_names

887

888 result_index = MultiIndex(

889 result_levels, result_codes, names=result_names, verify_integrity=False

890 )

891 result.index = result_index

892 return result

893

894 def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT:

895 """

896 Split data into blocks & return conformed data.

897 """

898 # Ensure the object we're rolling over is monotonically sorted relative

899 # to the groups

900 # GH 36197

901 if not obj.empty:

902 groupby_order = np.concatenate(list(self._grouper.indices.values())).astype(

903 np.int64

904 )

905 obj = obj.take(groupby_order)

906 return super()._create_data(obj, numeric_only)

907

908 def _gotitem(self, key, ndim, subset=None):

909 # we are setting the index on the actual object

910 # here so our index is carried through to the selected obj

911 # when we do the splitting for the groupby

912 if self.on is not None:

913 # GH 43355

914 subset = self.obj.set_index(self._on)

915 return super()._gotitem(key, ndim, subset=subset)

916

917

918class Window(BaseWindow):

919 """

920 Provide rolling window calculations.

921

922 Parameters

923 ----------

924 window : int, offset, or BaseIndexer subclass

925 Size of the moving window.

926

927 If an integer, the fixed number of observations used for

928 each window.

929

930 If an offset, the time period of each window. Each

931 window will be a variable sized based on the observations included in

932 the time-period. This is only valid for datetimelike indexes.

933 To learn more about the offsets & frequency strings, please see `this link

934 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

935

936 If a BaseIndexer subclass, the window boundaries

937 based on the defined ``get_window_bounds`` method. Additional rolling

938 keyword arguments, namely ``min_periods``, ``center``, ``closed`` and

939 ``step`` will be passed to ``get_window_bounds``.

940

941 min_periods : int, default None

942 Minimum number of observations in window required to have a value;

943 otherwise, result is ``np.nan``.

944

945 For a window that is specified by an offset, ``min_periods`` will default to 1.

946

947 For a window that is specified by an integer, ``min_periods`` will default

948 to the size of the window.

949

950 center : bool, default False

951 If False, set the window labels as the right edge of the window index.

952

953 If True, set the window labels as the center of the window index.

954

955 win_type : str, default None

956 If ``None``, all points are evenly weighted.

957

958 If a string, it must be a valid `scipy.signal window function

959 <https://docs.scipy.org/doc/scipy/reference/signal.windows.html#module-scipy.signal.windows>`__.

960

961 Certain Scipy window types require additional parameters to be passed

962 in the aggregation function. The additional parameters must match

963 the keywords specified in the Scipy window type method signature.

964

965 on : str, optional

966 For a DataFrame, a column label or Index level on which

967 to calculate the rolling window, rather than the DataFrame's index.

968

969 Provided integer column is ignored and excluded from result since

970 an integer index is not used to calculate the rolling window.

971

972 axis : int or str, default 0

973 If ``0`` or ``'index'``, roll across the rows.

974

975 If ``1`` or ``'columns'``, roll across the columns.

976

977 For `Series` this parameter is unused and defaults to 0.

978

979 closed : str, default None

980 If ``'right'``, the first point in the window is excluded from calculations.

981

982 If ``'left'``, the last point in the window is excluded from calculations.

983

984 If ``'both'``, the no points in the window are excluded from calculations.

985

986 If ``'neither'``, the first and last points in the window are excluded

987 from calculations.

988

989 Default ``None`` (``'right'``).

990

991 .. versionchanged:: 1.2.0

992

993 The closed parameter with fixed windows is now supported.

994

995 step : int, default None

996

997 .. versionadded:: 1.5.0

998

999 Evaluate the window at every ``step`` result, equivalent to slicing as

1000 ``[::step]``. ``window`` must be an integer. Using a step argument other

1001 than None or 1 will produce a result with a different shape than the input.

1002

1003 method : str {'single', 'table'}, default 'single'

1004

1005 .. versionadded:: 1.3.0

1006

1007 Execute the rolling operation per single column or row (``'single'``)

1008 or over the entire object (``'table'``).

1009

1010 This argument is only implemented when specifying ``engine='numba'``

1011 in the method call.

1012

1013 Returns

1014 -------

1015 ``Window`` subclass if a ``win_type`` is passed

1016

1017 ``Rolling`` subclass if ``win_type`` is not passed

1018

1019 See Also

1020 --------

1021 expanding : Provides expanding transformations.

1022 ewm : Provides exponential weighted functions.

1023

1024 Notes

1025 -----

1026 See :ref:`Windowing Operations <window.generic>` for further usage details

1027 and examples.

1028

1029 Examples

1030 --------

1031 >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})

1032 >>> df

1033 B

1034 0 0.0

1035 1 1.0

1036 2 2.0

1037 3 NaN

1038 4 4.0

1039

1040 **window**

1041

1042 Rolling sum with a window length of 2 observations.

1043

1044 >>> df.rolling(2).sum()

1045 B

1046 0 NaN

1047 1 1.0

1048 2 3.0

1049 3 NaN

1050 4 NaN

1051

1052 Rolling sum with a window span of 2 seconds.

1053

1054 >>> df_time = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},

1055 ... index = [pd.Timestamp('20130101 09:00:00'),

1056 ... pd.Timestamp('20130101 09:00:02'),

1057 ... pd.Timestamp('20130101 09:00:03'),

1058 ... pd.Timestamp('20130101 09:00:05'),

1059 ... pd.Timestamp('20130101 09:00:06')])

1060

1061 >>> df_time

1062 B

1063 2013-01-01 09:00:00 0.0

1064 2013-01-01 09:00:02 1.0

1065 2013-01-01 09:00:03 2.0

1066 2013-01-01 09:00:05 NaN

1067 2013-01-01 09:00:06 4.0

1068

1069 >>> df_time.rolling('2s').sum()

1070 B

1071 2013-01-01 09:00:00 0.0

1072 2013-01-01 09:00:02 1.0

1073 2013-01-01 09:00:03 3.0

1074 2013-01-01 09:00:05 NaN

1075 2013-01-01 09:00:06 4.0

1076

1077 Rolling sum with forward looking windows with 2 observations.

1078

1079 >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2)

1080 >>> df.rolling(window=indexer, min_periods=1).sum()

1081 B

1082 0 1.0

1083 1 3.0

1084 2 2.0

1085 3 4.0

1086 4 4.0

1087

1088 **min_periods**

1089

1090 Rolling sum with a window length of 2 observations, but only needs a minimum of 1

1091 observation to calculate a value.

1092

1093 >>> df.rolling(2, min_periods=1).sum()

1094 B

1095 0 0.0

1096 1 1.0

1097 2 3.0

1098 3 2.0

1099 4 4.0

1100

1101 **center**

1102

1103 Rolling sum with the result assigned to the center of the window index.

1104

1105 >>> df.rolling(3, min_periods=1, center=True).sum()

1106 B

1107 0 1.0

1108 1 3.0

1109 2 3.0

1110 3 6.0

1111 4 4.0

1112

1113 >>> df.rolling(3, min_periods=1, center=False).sum()

1114 B

1115 0 0.0

1116 1 1.0

1117 2 3.0

1118 3 3.0

1119 4 6.0

1120

1121 **step**

1122

1123 Rolling sum with a window length of 2 observations, minimum of 1 observation to

1124 calculate a value, and a step of 2.

1125

1126 >>> df.rolling(2, min_periods=1, step=2).sum()

1127 B

1128 0 0.0

1129 2 3.0

1130 4 4.0

1131

1132 **win_type**

1133

1134 Rolling sum with a window length of 2, using the Scipy ``'gaussian'``

1135 window type. ``std`` is required in the aggregation function.

1136

1137 >>> df.rolling(2, win_type='gaussian').sum(std=3)

1138 B

1139 0 NaN

1140 1 0.986207

1141 2 2.958621

1142 3 NaN

1143 4 NaN

1144 """

1145

1146 _attributes = [

1147 "window",

1148 "min_periods",

1149 "center",

1150 "win_type",

1151 "axis",

1152 "on",

1153 "closed",

1154 "step",

1155 "method",

1156 ]

1157

1158 def _validate(self):

1159 super()._validate()

1160

1161 if not isinstance(self.win_type, str):

1162 raise ValueError(f"Invalid win_type {self.win_type}")

1163 signal = import_optional_dependency(

1164 "scipy.signal", extra="Scipy is required to generate window weight."

1165 )

1166 self._scipy_weight_generator = getattr(signal, self.win_type, None)

1167 if self._scipy_weight_generator is None:

1168 raise ValueError(f"Invalid win_type {self.win_type}")

1169

1170 if isinstance(self.window, BaseIndexer):

1171 raise NotImplementedError(

1172 "BaseIndexer subclasses not implemented with win_types."

1173 )

1174 elif not is_integer(self.window) or self.window < 0:

1175 raise ValueError("window must be an integer 0 or greater")

1176

1177 if self.method != "single":

1178 raise NotImplementedError("'single' is the only supported method type.")

1179

1180 def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray:

1181 """

1182 Center the result in the window for weighted rolling aggregations.

1183 """

1184 if offset > 0:

1185 lead_indexer = [slice(offset, None)]

1186 result = np.copy(result[tuple(lead_indexer)])

1187 return result

1188

1189 def _apply(

1190 self,

1191 func: Callable[[np.ndarray, int, int], np.ndarray],

1192 name: str,

1193 numeric_only: bool = False,

1194 numba_args: tuple[Any, ...] = (),

1195 **kwargs,

1196 ):

1197 """

1198 Rolling with weights statistical measure using supplied function.

1199

1200 Designed to be used with passed-in Cython array-based functions.

1201

1202 Parameters

1203 ----------

1204 func : callable function to apply

1205 name : str,

1206 numeric_only : bool, default False

1207 Whether to only operate on bool, int, and float columns

1208 numba_args : tuple

1209 unused

1210 **kwargs

1211 additional arguments for scipy windows if necessary

1212

1213 Returns

1214 -------

1215 y : type of input

1216 """

1217 # "None" not callable [misc]

1218 window = self._scipy_weight_generator( # type: ignore[misc]

1219 self.window, **kwargs

1220 )

1221 offset = (len(window) - 1) // 2 if self.center else 0

1222

1223 def homogeneous_func(values: np.ndarray):

1224 # calculation function

1225

1226 if values.size == 0:

1227 return values.copy()

1228

1229 def calc(x):

1230 additional_nans = np.array([np.nan] * offset)

1231 x = np.concatenate((x, additional_nans))

1232 return func(x, window, self.min_periods or len(window))

1233

1234 with np.errstate(all="ignore"):

1235 # Our weighted aggregations return memoryviews

1236 result = np.asarray(calc(values))

1237

1238 if self.center:

1239 result = self._center_window(result, offset)

1240

1241 return result

1242

1243 return self._apply_blockwise(homogeneous_func, name, numeric_only)[:: self.step]

1244

1245 @doc(

1246 _shared_docs["aggregate"],

1247 see_also=dedent(

1248 """

1249 See Also

1250 --------

1251 pandas.DataFrame.aggregate : Similar DataFrame method.

1252 pandas.Series.aggregate : Similar Series method.

1253 """

1254 ),

1255 examples=dedent(

1256 """

1257 Examples

1258 --------

1259 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})

1260 >>> df

1261 A B C

1262 0 1 4 7

1263 1 2 5 8

1264 2 3 6 9

1265

1266 >>> df.rolling(2, win_type="boxcar").agg("mean")

1267 A B C

1268 0 NaN NaN NaN

1269 1 1.5 4.5 7.5

1270 2 2.5 5.5 8.5

1271 """

1272 ),

1273 klass="Series/DataFrame",

1274 axis="",

1275 )

1276 def aggregate(self, func, *args, **kwargs):

1277 result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()

1278 if result is None:

1279

1280 # these must apply directly

1281 result = func(self)

1282

1283 return result

1284

1285 agg = aggregate

1286

1287 @doc(

1288 template_header,

1289 create_section_header("Parameters"),

1290 kwargs_numeric_only,

1291 kwargs_scipy,

1292 create_section_header("Returns"),

1293 template_returns,

1294 create_section_header("See Also"),

1295 template_see_also[:-1],

1296 window_method="rolling",

1297 aggregation_description="weighted window sum",

1298 agg_method="sum",

1299 )

1300 def sum(self, numeric_only: bool = False, *args, **kwargs):

1301 nv.validate_window_func("sum", args, kwargs)

1302 window_func = window_aggregations.roll_weighted_sum

1303 # error: Argument 1 to "_apply" of "Window" has incompatible type

1304 # "Callable[[ndarray, ndarray, int], ndarray]"; expected

1305 # "Callable[[ndarray, int, int], ndarray]"

1306 return self._apply(

1307 window_func, # type: ignore[arg-type]

1308 name="sum",

1309 numeric_only=numeric_only,

1310 **kwargs,

1311 )

1312

1313 @doc(

1314 template_header,

1315 create_section_header("Parameters"),

1316 kwargs_numeric_only,

1317 kwargs_scipy,

1318 create_section_header("Returns"),

1319 template_returns,

1320 create_section_header("See Also"),

1321 template_see_also[:-1],

1322 window_method="rolling",

1323 aggregation_description="weighted window mean",

1324 agg_method="mean",

1325 )

1326 def mean(self, numeric_only: bool = False, *args, **kwargs):

1327 nv.validate_window_func("mean", args, kwargs)

1328 window_func = window_aggregations.roll_weighted_mean

1329 # error: Argument 1 to "_apply" of "Window" has incompatible type

1330 # "Callable[[ndarray, ndarray, int], ndarray]"; expected

1331 # "Callable[[ndarray, int, int], ndarray]"

1332 return self._apply(

1333 window_func, # type: ignore[arg-type]

1334 name="mean",

1335 numeric_only=numeric_only,

1336 **kwargs,

1337 )

1338

1339 @doc(

1340 template_header,

1341 ".. versionadded:: 1.0.0 \n\n",

1342 create_section_header("Parameters"),

1343 kwargs_numeric_only,

1344 kwargs_scipy,

1345 create_section_header("Returns"),

1346 template_returns,

1347 create_section_header("See Also"),

1348 template_see_also[:-1],

1349 window_method="rolling",

1350 aggregation_description="weighted window variance",

1351 agg_method="var",

1352 )

1353 def var(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs):

1354 nv.validate_window_func("var", args, kwargs)

1355 window_func = partial(window_aggregations.roll_weighted_var, ddof=ddof)

1356 kwargs.pop("name", None)

1357 return self._apply(window_func, name="var", numeric_only=numeric_only, **kwargs)

1358

1359 @doc(

1360 template_header,

1361 ".. versionadded:: 1.0.0 \n\n",

1362 create_section_header("Parameters"),

1363 kwargs_numeric_only,

1364 kwargs_scipy,

1365 create_section_header("Returns"),

1366 template_returns,

1367 create_section_header("See Also"),

1368 template_see_also[:-1],

1369 window_method="rolling",

1370 aggregation_description="weighted window standard deviation",

1371 agg_method="std",

1372 )

1373 def std(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs):

1374 nv.validate_window_func("std", args, kwargs)

1375 return zsqrt(

1376 self.var(ddof=ddof, name="std", numeric_only=numeric_only, **kwargs)

1377 )

1378

1379

1380class RollingAndExpandingMixin(BaseWindow):

1381 def count(self, numeric_only: bool = False):

1382 window_func = window_aggregations.roll_sum

1383 return self._apply(window_func, name="count", numeric_only=numeric_only)

1384

1385 def apply(

1386 self,

1387 func: Callable[..., Any],

1388 raw: bool = False,

1389 engine: str | None = None,

1390 engine_kwargs: dict[str, bool] | None = None,

1391 args: tuple[Any, ...] | None = None,

1392 kwargs: dict[str, Any] | None = None,

1393 ):

1394 if args is None:

1395 args = ()

1396 if kwargs is None:

1397 kwargs = {}

1398

1399 if not is_bool(raw):

1400 raise ValueError("raw parameter must be `True` or `False`")

1401

1402 numba_args: tuple[Any, ...] = ()

1403 if maybe_use_numba(engine):

1404 if raw is False:

1405 raise ValueError("raw must be `True` when using the numba engine")

1406 numba_args = args

1407 if self.method == "single":

1408 apply_func = generate_numba_apply_func(

1409 func, **get_jit_arguments(engine_kwargs, kwargs)

1410 )

1411 else:

1412 apply_func = generate_numba_table_func(

1413 func, **get_jit_arguments(engine_kwargs, kwargs)

1414 )

1415 elif engine in ("cython", None):

1416 if engine_kwargs is not None:

1417 raise ValueError("cython engine does not accept engine_kwargs")

1418 apply_func = self._generate_cython_apply_func(args, kwargs, raw, func)

1419 else:

1420 raise ValueError("engine must be either 'numba' or 'cython'")

1421

1422 return self._apply(

1423 apply_func,

1424 name="apply",

1425 numba_args=numba_args,

1426 )

1427

1428 def _generate_cython_apply_func(

1429 self,

1430 args: tuple[Any, ...],

1431 kwargs: dict[str, Any],

1432 raw: bool,

1433 function: Callable[..., Any],

1434 ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]:

1435 from pandas import Series

1436

1437 window_func = partial(

1438 window_aggregations.roll_apply,

1439 args=args,

1440 kwargs=kwargs,

1441 raw=raw,

1442 function=function,

1443 )

1444

1445 def apply_func(values, begin, end, min_periods, raw=raw):

1446 if not raw:

1447 # GH 45912

1448 values = Series(values, index=self._on)

1449 return window_func(values, begin, end, min_periods)

1450

1451 return apply_func

1452

1453 def sum(

1454 self,

1455 numeric_only: bool = False,

1456 *args,

1457 engine: str | None = None,

1458 engine_kwargs: dict[str, bool] | None = None,

1459 **kwargs,

1460 ):

1461 nv.validate_window_func("sum", args, kwargs)

1462 if maybe_use_numba(engine):

1463 if self.method == "table":

1464 func = generate_manual_numpy_nan_agg_with_axis(np.nansum)

1465 return self.apply(

1466 func,

1467 raw=True,

1468 engine=engine,

1469 engine_kwargs=engine_kwargs,

1470 )

1471 else:

1472 from pandas.core._numba.kernels import sliding_sum

1473

1474 return self._numba_apply(sliding_sum, engine_kwargs)

1475 window_func = window_aggregations.roll_sum

1476 return self._apply(window_func, name="sum", numeric_only=numeric_only, **kwargs)

1477

1478 def max(

1479 self,

1480 numeric_only: bool = False,

1481 *args,

1482 engine: str | None = None,

1483 engine_kwargs: dict[str, bool] | None = None,

1484 **kwargs,

1485 ):

1486 nv.validate_window_func("max", args, kwargs)

1487 if maybe_use_numba(engine):

1488 if self.method == "table":

1489 func = generate_manual_numpy_nan_agg_with_axis(np.nanmax)

1490 return self.apply(

1491 func,

1492 raw=True,

1493 engine=engine,

1494 engine_kwargs=engine_kwargs,

1495 )

1496 else:

1497 from pandas.core._numba.kernels import sliding_min_max

1498

1499 return self._numba_apply(sliding_min_max, engine_kwargs, True)

1500 window_func = window_aggregations.roll_max

1501 return self._apply(window_func, name="max", numeric_only=numeric_only, **kwargs)

1502

1503 def min(

1504 self,

1505 numeric_only: bool = False,

1506 *args,

1507 engine: str | None = None,

1508 engine_kwargs: dict[str, bool] | None = None,

1509 **kwargs,

1510 ):

1511 nv.validate_window_func("min", args, kwargs)

1512 if maybe_use_numba(engine):

1513 if self.method == "table":

1514 func = generate_manual_numpy_nan_agg_with_axis(np.nanmin)

1515 return self.apply(

1516 func,

1517 raw=True,

1518 engine=engine,

1519 engine_kwargs=engine_kwargs,

1520 )

1521 else:

1522 from pandas.core._numba.kernels import sliding_min_max

1523

1524 return self._numba_apply(sliding_min_max, engine_kwargs, False)

1525 window_func = window_aggregations.roll_min

1526 return self._apply(window_func, name="min", numeric_only=numeric_only, **kwargs)

1527

1528 def mean(

1529 self,

1530 numeric_only: bool = False,

1531 *args,

1532 engine: str | None = None,

1533 engine_kwargs: dict[str, bool] | None = None,

1534 **kwargs,

1535 ):

1536 nv.validate_window_func("mean", args, kwargs)

1537 if maybe_use_numba(engine):

1538 if self.method == "table":

1539 func = generate_manual_numpy_nan_agg_with_axis(np.nanmean)

1540 return self.apply(

1541 func,

1542 raw=True,

1543 engine=engine,

1544 engine_kwargs=engine_kwargs,

1545 )

1546 else:

1547 from pandas.core._numba.kernels import sliding_mean

1548

1549 return self._numba_apply(sliding_mean, engine_kwargs)

1550 window_func = window_aggregations.roll_mean

1551 return self._apply(

1552 window_func, name="mean", numeric_only=numeric_only, **kwargs

1553 )

1554

1555 def median(

1556 self,

1557 numeric_only: bool = False,

1558 engine: str | None = None,

1559 engine_kwargs: dict[str, bool] | None = None,

1560 **kwargs,

1561 ):

1562 if maybe_use_numba(engine):

1563 if self.method == "table":

1564 func = generate_manual_numpy_nan_agg_with_axis(np.nanmedian)

1565 else:

1566 func = np.nanmedian

1567

1568 return self.apply(

1569 func,

1570 raw=True,

1571 engine=engine,

1572 engine_kwargs=engine_kwargs,

1573 )

1574 window_func = window_aggregations.roll_median_c

1575 return self._apply(

1576 window_func, name="median", numeric_only=numeric_only, **kwargs

1577 )

1578

1579 def std(

1580 self,

1581 ddof: int = 1,

1582 numeric_only: bool = False,

1583 *args,

1584 engine: str | None = None,

1585 engine_kwargs: dict[str, bool] | None = None,

1586 **kwargs,

1587 ):

1588 nv.validate_window_func("std", args, kwargs)

1589 if maybe_use_numba(engine):

1590 if self.method == "table":

1591 raise NotImplementedError("std not supported with method='table'")

1592 else:

1593 from pandas.core._numba.kernels import sliding_var

1594

1595 return zsqrt(self._numba_apply(sliding_var, engine_kwargs, ddof))

1596 window_func = window_aggregations.roll_var

1597

1598 def zsqrt_func(values, begin, end, min_periods):

1599 return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof))

1600

1601 return self._apply(

1602 zsqrt_func,

1603 name="std",

1604 numeric_only=numeric_only,

1605 **kwargs,

1606 )

1607

1608 def var(

1609 self,

1610 ddof: int = 1,

1611 numeric_only: bool = False,

1612 *args,

1613 engine: str | None = None,

1614 engine_kwargs: dict[str, bool] | None = None,

1615 **kwargs,

1616 ):

1617 nv.validate_window_func("var", args, kwargs)

1618 if maybe_use_numba(engine):

1619 if self.method == "table":

1620 raise NotImplementedError("var not supported with method='table'")

1621 else:

1622 from pandas.core._numba.kernels import sliding_var

1623

1624 return self._numba_apply(sliding_var, engine_kwargs, ddof)

1625 window_func = partial(window_aggregations.roll_var, ddof=ddof)

1626 return self._apply(

1627 window_func,

1628 name="var",

1629 numeric_only=numeric_only,

1630 **kwargs,

1631 )

1632

1633 def skew(self, numeric_only: bool = False, **kwargs):

1634 window_func = window_aggregations.roll_skew

1635 return self._apply(

1636 window_func,

1637 name="skew",

1638 numeric_only=numeric_only,

1639 **kwargs,

1640 )

1641

1642 def sem(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs):

1643 nv.validate_rolling_func("sem", args, kwargs)

1644 # Raise here so error message says sem instead of std

1645 self._validate_numeric_only("sem", numeric_only)

1646 return self.std(numeric_only=numeric_only, **kwargs) / (

1647 self.count(numeric_only=numeric_only) - ddof

1648 ).pow(0.5)

1649

1650 def kurt(self, numeric_only: bool = False, **kwargs):

1651 window_func = window_aggregations.roll_kurt

1652 return self._apply(

1653 window_func,

1654 name="kurt",

1655 numeric_only=numeric_only,

1656 **kwargs,

1657 )

1658

1659 def quantile(

1660 self,

1661 quantile: float,

1662 interpolation: QuantileInterpolation = "linear",

1663 numeric_only: bool = False,

1664 **kwargs,

1665 ):

1666 if quantile == 1.0:

1667 window_func = window_aggregations.roll_max

1668 elif quantile == 0.0:

1669 window_func = window_aggregations.roll_min

1670 else:

1671 window_func = partial(

1672 window_aggregations.roll_quantile,

1673 quantile=quantile,

1674 interpolation=interpolation,

1675 )

1676

1677 return self._apply(

1678 window_func, name="quantile", numeric_only=numeric_only, **kwargs

1679 )

1680

1681 def rank(

1682 self,

1683 method: WindowingRankType = "average",

1684 ascending: bool = True,

1685 pct: bool = False,

1686 numeric_only: bool = False,

1687 **kwargs,

1688 ):

1689 window_func = partial(

1690 window_aggregations.roll_rank,

1691 method=method,

1692 ascending=ascending,

1693 percentile=pct,

1694 )

1695

1696 return self._apply(

1697 window_func, name="rank", numeric_only=numeric_only, **kwargs

1698 )

1699

1700 def cov(

1701 self,

1702 other: DataFrame | Series | None = None,

1703 pairwise: bool | None = None,

1704 ddof: int = 1,

1705 numeric_only: bool = False,

1706 **kwargs,

1707 ):

1708 if self.step is not None:

1709 raise NotImplementedError("step not implemented for cov")

1710 self._validate_numeric_only("cov", numeric_only)

1711

1712 from pandas import Series

1713

1714 def cov_func(x, y):

1715 x_array = self._prep_values(x)

1716 y_array = self._prep_values(y)

1717 window_indexer = self._get_window_indexer()

1718 min_periods = (

1719 self.min_periods

1720 if self.min_periods is not None

1721 else window_indexer.window_size

1722 )

1723 start, end = window_indexer.get_window_bounds(

1724 num_values=len(x_array),

1725 min_periods=min_periods,

1726 center=self.center,

1727 closed=self.closed,

1728 step=self.step,

1729 )

1730 self._check_window_bounds(start, end, len(x_array))

1731

1732 with np.errstate(all="ignore"):

1733 mean_x_y = window_aggregations.roll_mean(

1734 x_array * y_array, start, end, min_periods

1735 )

1736 mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods)

1737 mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods)

1738 count_x_y = window_aggregations.roll_sum(

1739 notna(x_array + y_array).astype(np.float64), start, end, 0

1740 )

1741 result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof))

1742 return Series(result, index=x.index, name=x.name)

1743

1744 return self._apply_pairwise(

1745 self._selected_obj, other, pairwise, cov_func, numeric_only

1746 )

1747

1748 def corr(

1749 self,

1750 other: DataFrame | Series | None = None,

1751 pairwise: bool | None = None,

1752 ddof: int = 1,

1753 numeric_only: bool = False,

1754 **kwargs,

1755 ):

1756 if self.step is not None:

1757 raise NotImplementedError("step not implemented for corr")

1758 self._validate_numeric_only("corr", numeric_only)

1759

1760 from pandas import Series

1761

1762 def corr_func(x, y):

1763 x_array = self._prep_values(x)

1764 y_array = self._prep_values(y)

1765 window_indexer = self._get_window_indexer()

1766 min_periods = (

1767 self.min_periods

1768 if self.min_periods is not None

1769 else window_indexer.window_size

1770 )

1771 start, end = window_indexer.get_window_bounds(

1772 num_values=len(x_array),

1773 min_periods=min_periods,

1774 center=self.center,

1775 closed=self.closed,

1776 step=self.step,

1777 )

1778 self._check_window_bounds(start, end, len(x_array))

1779

1780 with np.errstate(all="ignore"):

1781 mean_x_y = window_aggregations.roll_mean(

1782 x_array * y_array, start, end, min_periods

1783 )

1784 mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods)

1785 mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods)

1786 count_x_y = window_aggregations.roll_sum(

1787 notna(x_array + y_array).astype(np.float64), start, end, 0

1788 )

1789 x_var = window_aggregations.roll_var(

1790 x_array, start, end, min_periods, ddof

1791 )

1792 y_var = window_aggregations.roll_var(

1793 y_array, start, end, min_periods, ddof

1794 )

1795 numerator = (mean_x_y - mean_x * mean_y) * (

1796 count_x_y / (count_x_y - ddof)

1797 )

1798 denominator = (x_var * y_var) ** 0.5

1799 result = numerator / denominator

1800 return Series(result, index=x.index, name=x.name)

1801

1802 return self._apply_pairwise(

1803 self._selected_obj, other, pairwise, corr_func, numeric_only

1804 )

1805

1806

1807class Rolling(RollingAndExpandingMixin):

1808

1809 _attributes: list[str] = [

1810 "window",

1811 "min_periods",

1812 "center",

1813 "win_type",

1814 "axis",

1815 "on",

1816 "closed",

1817 "step",

1818 "method",

1819 ]

1820

1821 def _validate(self):

1822 super()._validate()

1823

1824 # we allow rolling on a datetimelike index

1825 if (

1826 self.obj.empty

1827 or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex))

1828 ) and isinstance(self.window, (str, BaseOffset, timedelta)):

1829

1830 self._validate_datetimelike_monotonic()

1831

1832 # this will raise ValueError on non-fixed freqs

1833 try:

1834 freq = to_offset(self.window)

1835 except (TypeError, ValueError) as err:

1836 raise ValueError(

1837 f"passed window {self.window} is not "

1838 "compatible with a datetimelike index"

1839 ) from err

1840 if isinstance(self._on, PeriodIndex):

1841 # error: Incompatible types in assignment (expression has type

1842 # "float", variable has type "Optional[int]")

1843 self._win_freq_i8 = freq.nanos / ( # type: ignore[assignment]

1844 self._on.freq.nanos / self._on.freq.n

1845 )

1846 else:

1847 self._win_freq_i8 = freq.nanos

1848

1849 # min_periods must be an integer

1850 if self.min_periods is None:

1851 self.min_periods = 1

1852

1853 if self.step is not None:

1854 raise NotImplementedError(

1855 "step is not supported with frequency windows"

1856 )

1857

1858 elif isinstance(self.window, BaseIndexer):

1859 # Passed BaseIndexer subclass should handle all other rolling kwargs

1860 pass

1861 elif not is_integer(self.window) or self.window < 0:

1862 raise ValueError("window must be an integer 0 or greater")

1863

1864 def _validate_datetimelike_monotonic(self):

1865 """

1866 Validate self._on is monotonic (increasing or decreasing) and has

1867 no NaT values for frequency windows.

1868 """

1869 if self._on.hasnans:

1870 self._raise_monotonic_error("values must not have NaT")

1871 if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):

1872 self._raise_monotonic_error("values must be monotonic")

1873

1874 def _raise_monotonic_error(self, msg: str):

1875 on = self.on

1876 if on is None:

1877 if self.axis == 0:

1878 on = "index"

1879 else:

1880 on = "column"

1881 raise ValueError(f"{on} {msg}")

1882

1883 @doc(

1884 _shared_docs["aggregate"],

1885 see_also=dedent(

1886 """

1887 See Also

1888 --------

1889 pandas.Series.rolling : Calling object with Series data.

1890 pandas.DataFrame.rolling : Calling object with DataFrame data.

1891 """

1892 ),

1893 examples=dedent(

1894 """

1895 Examples

1896 --------

1897 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})

1898 >>> df

1899 A B C

1900 0 1 4 7

1901 1 2 5 8

1902 2 3 6 9

1903

1904 >>> df.rolling(2).sum()

1905 A B C

1906 0 NaN NaN NaN

1907 1 3.0 9.0 15.0

1908 2 5.0 11.0 17.0

1909

1910 >>> df.rolling(2).agg({"A": "sum", "B": "min"})

1911 A B

1912 0 NaN NaN

1913 1 3.0 4.0

1914 2 5.0 5.0

1915 """

1916 ),

1917 klass="Series/Dataframe",

1918 axis="",

1919 )

1920 def aggregate(self, func, *args, **kwargs):

1921 return super().aggregate(func, *args, **kwargs)

1922

1923 agg = aggregate

1924

1925 @doc(

1926 template_header,

1927 create_section_header("Parameters"),

1928 kwargs_numeric_only,

1929 create_section_header("Returns"),

1930 template_returns,

1931 create_section_header("See Also"),

1932 template_see_also,

1933 create_section_header("Examples"),

1934 dedent(

1935 """

1936 >>> s = pd.Series([2, 3, np.nan, 10])

1937 >>> s.rolling(2).count()

1938 0 1.0

1939 1 2.0

1940 2 1.0

1941 3 1.0

1942 dtype: float64

1943 >>> s.rolling(3).count()

1944 0 1.0

1945 1 2.0

1946 2 2.0

1947 3 2.0

1948 dtype: float64

1949 >>> s.rolling(4).count()

1950 0 1.0

1951 1 2.0

1952 2 2.0

1953 3 3.0

1954 dtype: float64

1955 """

1956 ).replace("\n", "", 1),

1957 window_method="rolling",

1958 aggregation_description="count of non NaN observations",

1959 agg_method="count",

1960 )

1961 def count(self, numeric_only: bool = False):

1962 if self.min_periods is None:

1963 warnings.warn(

1964 (

1965 "min_periods=None will default to the size of window "

1966 "consistent with other methods in a future version. "

1967 "Specify min_periods=0 instead."

1968 ),

1969 FutureWarning,

1970 stacklevel=find_stack_level(),

1971 )

1972 self.min_periods = 0

1973 result = super().count()

1974 self.min_periods = None

1975 else:

1976 result = super().count(numeric_only)

1977 return result

1978

1979 @doc(

1980 template_header,

1981 create_section_header("Parameters"),

1982 window_apply_parameters,

1983 create_section_header("Returns"),

1984 template_returns,

1985 create_section_header("See Also"),

1986 template_see_also[:-1],

1987 window_method="rolling",

1988 aggregation_description="custom aggregation function",

1989 agg_method="apply",

1990 )

1991 def apply(

1992 self,

1993 func: Callable[..., Any],

1994 raw: bool = False,

1995 engine: str | None = None,

1996 engine_kwargs: dict[str, bool] | None = None,

1997 args: tuple[Any, ...] | None = None,

1998 kwargs: dict[str, Any] | None = None,

1999 ):

2000 return super().apply(

2001 func,

2002 raw=raw,

2003 engine=engine,

2004 engine_kwargs=engine_kwargs,

2005 args=args,

2006 kwargs=kwargs,

2007 )

2008

2009 @doc(

2010 template_header,

2011 create_section_header("Parameters"),

2012 kwargs_numeric_only,

2013 args_compat,

2014 window_agg_numba_parameters(),

2015 kwargs_compat,

2016 create_section_header("Returns"),

2017 template_returns,

2018 create_section_header("See Also"),

2019 template_see_also,

2020 create_section_header("Notes"),

2021 numba_notes,

2022 create_section_header("Examples"),

2023 dedent(

2024 """

2025 >>> s = pd.Series([1, 2, 3, 4, 5])

2026 >>> s

2027 0 1

2028 1 2

2029 2 3

2030 3 4

2031 4 5

2032 dtype: int64

2033

2034 >>> s.rolling(3).sum()

2035 0 NaN

2036 1 NaN

2037 2 6.0

2038 3 9.0

2039 4 12.0

2040 dtype: float64

2041

2042 >>> s.rolling(3, center=True).sum()

2043 0 NaN

2044 1 6.0

2045 2 9.0

2046 3 12.0

2047 4 NaN

2048 dtype: float64

2049

2050 For DataFrame, each sum is computed column-wise.

2051

2052 >>> df = pd.DataFrame({{"A": s, "B": s ** 2}})

2053 >>> df

2054 A B

2055 0 1 1

2056 1 2 4

2057 2 3 9

2058 3 4 16

2059 4 5 25

2060

2061 >>> df.rolling(3).sum()

2062 A B

2063 0 NaN NaN

2064 1 NaN NaN

2065 2 6.0 14.0

2066 3 9.0 29.0

2067 4 12.0 50.0

2068 """

2069 ).replace("\n", "", 1),

2070 window_method="rolling",

2071 aggregation_description="sum",

2072 agg_method="sum",

2073 )

2074 def sum(

2075 self,

2076 numeric_only: bool = False,

2077 *args,

2078 engine: str | None = None,

2079 engine_kwargs: dict[str, bool] | None = None,

2080 **kwargs,

2081 ):

2082 maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs)

2083 nv.validate_rolling_func("sum", args, kwargs)

2084 return super().sum(

2085 numeric_only=numeric_only,

2086 engine=engine,

2087 engine_kwargs=engine_kwargs,

2088 **kwargs,

2089 )

2090

2091 @doc(

2092 template_header,

2093 create_section_header("Parameters"),

2094 kwargs_numeric_only,

2095 args_compat,

2096 window_agg_numba_parameters(),

2097 kwargs_compat,

2098 create_section_header("Returns"),

2099 template_returns,

2100 create_section_header("See Also"),

2101 template_see_also,

2102 create_section_header("Notes"),

2103 numba_notes[:-1],

2104 window_method="rolling",

2105 aggregation_description="maximum",

2106 agg_method="max",

2107 )

2108 def max(

2109 self,

2110 numeric_only: bool = False,

2111 *args,

2112 engine: str | None = None,

2113 engine_kwargs: dict[str, bool] | None = None,

2114 **kwargs,

2115 ):

2116 maybe_warn_args_and_kwargs(type(self), "max", args, kwargs)

2117 nv.validate_rolling_func("max", args, kwargs)

2118 return super().max(

2119 numeric_only=numeric_only,

2120 engine=engine,

2121 engine_kwargs=engine_kwargs,

2122 **kwargs,

2123 )

2124

2125 @doc(

2126 template_header,

2127 create_section_header("Parameters"),

2128 kwargs_numeric_only,

2129 args_compat,

2130 window_agg_numba_parameters(),

2131 kwargs_compat,

2132 create_section_header("Returns"),

2133 template_returns,

2134 create_section_header("See Also"),

2135 template_see_also,

2136 create_section_header("Notes"),

2137 numba_notes,

2138 create_section_header("Examples"),

2139 dedent(

2140 """

2141 Performing a rolling minimum with a window size of 3.

2142

2143 >>> s = pd.Series([4, 3, 5, 2, 6])

2144 >>> s.rolling(3).min()

2145 0 NaN

2146 1 NaN

2147 2 3.0

2148 3 2.0

2149 4 2.0

2150 dtype: float64

2151 """

2152 ).replace("\n", "", 1),

2153 window_method="rolling",

2154 aggregation_description="minimum",

2155 agg_method="min",

2156 )

2157 def min(

2158 self,

2159 numeric_only: bool = False,

2160 *args,

2161 engine: str | None = None,

2162 engine_kwargs: dict[str, bool] | None = None,

2163 **kwargs,

2164 ):

2165 maybe_warn_args_and_kwargs(type(self), "min", args, kwargs)

2166 nv.validate_rolling_func("min", args, kwargs)

2167 return super().min(

2168 numeric_only=numeric_only,

2169 engine=engine,

2170 engine_kwargs=engine_kwargs,

2171 **kwargs,

2172 )

2173

2174 @doc(

2175 template_header,

2176 create_section_header("Parameters"),

2177 kwargs_numeric_only,

2178 args_compat,

2179 window_agg_numba_parameters(),

2180 kwargs_compat,

2181 create_section_header("Returns"),

2182 template_returns,

2183 create_section_header("See Also"),

2184 template_see_also,

2185 create_section_header("Notes"),

2186 numba_notes,

2187 create_section_header("Examples"),

2188 dedent(

2189 """

2190 The below examples will show rolling mean calculations with window sizes of

2191 two and three, respectively.

2192

2193 >>> s = pd.Series([1, 2, 3, 4])

2194 >>> s.rolling(2).mean()

2195 0 NaN

2196 1 1.5

2197 2 2.5

2198 3 3.5

2199 dtype: float64

2200

2201 >>> s.rolling(3).mean()

2202 0 NaN

2203 1 NaN

2204 2 2.0

2205 3 3.0

2206 dtype: float64

2207 """

2208 ).replace("\n", "", 1),

2209 window_method="rolling",

2210 aggregation_description="mean",

2211 agg_method="mean",

2212 )

2213 def mean(

2214 self,

2215 numeric_only: bool = False,

2216 *args,

2217 engine: str | None = None,

2218 engine_kwargs: dict[str, bool] | None = None,

2219 **kwargs,

2220 ):

2221 maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs)

2222 nv.validate_rolling_func("mean", args, kwargs)

2223 return super().mean(

2224 numeric_only=numeric_only,

2225 engine=engine,

2226 engine_kwargs=engine_kwargs,

2227 **kwargs,

2228 )

2229

2230 @doc(

2231 template_header,

2232 create_section_header("Parameters"),

2233 kwargs_numeric_only,

2234 window_agg_numba_parameters(),

2235 kwargs_compat,

2236 create_section_header("Returns"),

2237 template_returns,

2238 create_section_header("See Also"),

2239 template_see_also,

2240 create_section_header("Notes"),

2241 numba_notes,

2242 create_section_header("Examples"),

2243 dedent(

2244 """

2245 Compute the rolling median of a series with a window size of 3.

2246

2247 >>> s = pd.Series([0, 1, 2, 3, 4])

2248 >>> s.rolling(3).median()

2249 0 NaN

2250 1 NaN

2251 2 1.0

2252 3 2.0

2253 4 3.0

2254 dtype: float64

2255 """

2256 ).replace("\n", "", 1),

2257 window_method="rolling",

2258 aggregation_description="median",

2259 agg_method="median",

2260 )

2261 def median(

2262 self,

2263 numeric_only: bool = False,

2264 engine: str | None = None,

2265 engine_kwargs: dict[str, bool] | None = None,

2266 **kwargs,

2267 ):

2268 maybe_warn_args_and_kwargs(type(self), "median", None, kwargs)

2269 return super().median(

2270 numeric_only=numeric_only,

2271 engine=engine,

2272 engine_kwargs=engine_kwargs,

2273 **kwargs,

2274 )

2275

2276 @doc(

2277 template_header,

2278 create_section_header("Parameters"),

2279 dedent(

2280 """

2281 ddof : int, default 1

2282 Delta Degrees of Freedom. The divisor used in calculations

2283 is ``N - ddof``, where ``N`` represents the number of elements.

2284 """

2285 ).replace("\n", "", 1),

2286 kwargs_numeric_only,

2287 args_compat,

2288 window_agg_numba_parameters("1.4"),

2289 kwargs_compat,

2290 create_section_header("Returns"),

2291 template_returns,

2292 create_section_header("See Also"),

2293 "numpy.std : Equivalent method for NumPy array.\n",

2294 template_see_also,

2295 create_section_header("Notes"),

2296 dedent(

2297 """

2298 The default ``ddof`` of 1 used in :meth:`Series.std` is different

2299 than the default ``ddof`` of 0 in :func:`numpy.std`.

2300

2301 A minimum of one period is required for the rolling calculation.\n

2302 """

2303 ).replace("\n", "", 1),

2304 create_section_header("Examples"),

2305 dedent(

2306 """

2307 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])

2308 >>> s.rolling(3).std()

2309 0 NaN

2310 1 NaN

2311 2 0.577350

2312 3 1.000000

2313 4 1.000000

2314 5 1.154701

2315 6 0.000000

2316 dtype: float64

2317 """

2318 ).replace("\n", "", 1),

2319 window_method="rolling",

2320 aggregation_description="standard deviation",

2321 agg_method="std",

2322 )

2323 def std(

2324 self,

2325 ddof: int = 1,

2326 numeric_only: bool = False,

2327 *args,

2328 engine: str | None = None,

2329 engine_kwargs: dict[str, bool] | None = None,

2330 **kwargs,

2331 ):

2332 maybe_warn_args_and_kwargs(type(self), "std", args, kwargs)

2333 nv.validate_rolling_func("std", args, kwargs)

2334 return super().std(

2335 ddof=ddof,

2336 numeric_only=numeric_only,

2337 engine=engine,

2338 engine_kwargs=engine_kwargs,

2339 **kwargs,

2340 )

2341

2342 @doc(

2343 template_header,

2344 create_section_header("Parameters"),

2345 dedent(

2346 """

2347 ddof : int, default 1

2348 Delta Degrees of Freedom. The divisor used in calculations

2349 is ``N - ddof``, where ``N`` represents the number of elements.

2350 """

2351 ).replace("\n", "", 1),

2352 kwargs_numeric_only,

2353 args_compat,

2354 window_agg_numba_parameters("1.4"),

2355 kwargs_compat,

2356 create_section_header("Returns"),

2357 template_returns,

2358 create_section_header("See Also"),

2359 "numpy.var : Equivalent method for NumPy array.\n",

2360 template_see_also,

2361 create_section_header("Notes"),

2362 dedent(

2363 """

2364 The default ``ddof`` of 1 used in :meth:`Series.var` is different

2365 than the default ``ddof`` of 0 in :func:`numpy.var`.

2366

2367 A minimum of one period is required for the rolling calculation.\n

2368 """

2369 ).replace("\n", "", 1),

2370 create_section_header("Examples"),

2371 dedent(

2372 """

2373 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])

2374 >>> s.rolling(3).var()

2375 0 NaN

2376 1 NaN

2377 2 0.333333

2378 3 1.000000

2379 4 1.000000

2380 5 1.333333

2381 6 0.000000

2382 dtype: float64

2383 """

2384 ).replace("\n", "", 1),

2385 window_method="rolling",

2386 aggregation_description="variance",

2387 agg_method="var",

2388 )

2389 def var(

2390 self,

2391 ddof: int = 1,

2392 numeric_only: bool = False,

2393 *args,

2394 engine: str | None = None,

2395 engine_kwargs: dict[str, bool] | None = None,

2396 **kwargs,

2397 ):

2398 maybe_warn_args_and_kwargs(type(self), "var", args, kwargs)

2399 nv.validate_rolling_func("var", args, kwargs)

2400 return super().var(

2401 ddof=ddof,

2402 numeric_only=numeric_only,

2403 engine=engine,

2404 engine_kwargs=engine_kwargs,

2405 **kwargs,

2406 )

2407

2408 @doc(

2409 template_header,

2410 create_section_header("Parameters"),

2411 kwargs_numeric_only,

2412 kwargs_compat,

2413 create_section_header("Returns"),

2414 template_returns,

2415 create_section_header("See Also"),

2416 "scipy.stats.skew : Third moment of a probability density.\n",

2417 template_see_also,

2418 create_section_header("Notes"),

2419 "A minimum of three periods is required for the rolling calculation.\n",

2420 window_method="rolling",

2421 aggregation_description="unbiased skewness",

2422 agg_method="skew",

2423 )

2424 def skew(self, numeric_only: bool = False, **kwargs):

2425 maybe_warn_args_and_kwargs(type(self), "skew", None, kwargs)

2426 return super().skew(numeric_only=numeric_only, **kwargs)

2427

2428 @doc(

2429 template_header,

2430 create_section_header("Parameters"),

2431 dedent(

2432 """

2433 ddof : int, default 1

2434 Delta Degrees of Freedom. The divisor used in calculations

2435 is ``N - ddof``, where ``N`` represents the number of elements.

2436 """

2437 ).replace("\n", "", 1),

2438 kwargs_numeric_only,

2439 args_compat,

2440 kwargs_compat,

2441 create_section_header("Returns"),

2442 template_returns,

2443 create_section_header("See Also"),

2444 template_see_also,

2445 create_section_header("Notes"),

2446 "A minimum of one period is required for the calculation.\n\n",

2447 create_section_header("Examples"),

2448 dedent(

2449 """

2450 >>> s = pd.Series([0, 1, 2, 3])

2451 >>> s.rolling(2, min_periods=1).sem()

2452 0 NaN

2453 1 0.707107

2454 2 0.707107

2455 3 0.707107

2456 dtype: float64

2457 """

2458 ).replace("\n", "", 1),

2459 window_method="rolling",

2460 aggregation_description="standard error of mean",

2461 agg_method="sem",

2462 )

2463 def sem(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs):

2464 maybe_warn_args_and_kwargs(type(self), "sem", args, kwargs)

2465 nv.validate_rolling_func("sem", args, kwargs)

2466 # Raise here so error message says sem instead of std

2467 self._validate_numeric_only("sem", numeric_only)

2468 return self.std(numeric_only=numeric_only, **kwargs) / (

2469 self.count(numeric_only) - ddof

2470 ).pow(0.5)

2471

2472 @doc(

2473 template_header,

2474 create_section_header("Parameters"),

2475 kwargs_numeric_only,

2476 kwargs_compat,

2477 create_section_header("Returns"),

2478 template_returns,

2479 create_section_header("See Also"),

2480 "scipy.stats.kurtosis : Reference SciPy method.\n",

2481 template_see_also,

2482 create_section_header("Notes"),

2483 "A minimum of four periods is required for the calculation.\n\n",

2484 create_section_header("Examples"),

2485 dedent(

2486 """

2487 The example below will show a rolling calculation with a window size of

2488 four matching the equivalent function call using `scipy.stats`.

2489

2490 >>> arr = [1, 2, 3, 4, 999]

2491 >>> import scipy.stats

2492 >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")

2493 -1.200000

2494 >>> print(f"{{scipy.stats.kurtosis(arr[1:], bias=False):.6f}}")

2495 3.999946

2496 >>> s = pd.Series(arr)

2497 >>> s.rolling(4).kurt()

2498 0 NaN

2499 1 NaN

2500 2 NaN

2501 3 -1.200000

2502 4 3.999946

2503 dtype: float64

2504 """

2505 ).replace("\n", "", 1),

2506 window_method="rolling",

2507 aggregation_description="Fisher's definition of kurtosis without bias",

2508 agg_method="kurt",

2509 )

2510 def kurt(self, numeric_only: bool = False, **kwargs):

2511 maybe_warn_args_and_kwargs(type(self), "kurt", None, kwargs)

2512 return super().kurt(numeric_only=numeric_only, **kwargs)

2513

2514 @doc(

2515 template_header,

2516 create_section_header("Parameters"),

2517 dedent(

2518 """

2519 quantile : float

2520 Quantile to compute. 0 <= quantile <= 1.

2521 interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}

2522 This optional parameter specifies the interpolation method to use,

2523 when the desired quantile lies between two data points `i` and `j`:

2524

2525 * linear: `i + (j - i) * fraction`, where `fraction` is the

2526 fractional part of the index surrounded by `i` and `j`.

2527 * lower: `i`.

2528 * higher: `j`.

2529 * nearest: `i` or `j` whichever is nearest.

2530 * midpoint: (`i` + `j`) / 2.

2531 """

2532 ).replace("\n", "", 1),

2533 kwargs_numeric_only,

2534 kwargs_compat,

2535 create_section_header("Returns"),

2536 template_returns,

2537 create_section_header("See Also"),

2538 template_see_also,

2539 create_section_header("Examples"),

2540 dedent(

2541 """

2542 >>> s = pd.Series([1, 2, 3, 4])

2543 >>> s.rolling(2).quantile(.4, interpolation='lower')

2544 0 NaN

2545 1 1.0

2546 2 2.0

2547 3 3.0

2548 dtype: float64

2549

2550 >>> s.rolling(2).quantile(.4, interpolation='midpoint')

2551 0 NaN

2552 1 1.5

2553 2 2.5

2554 3 3.5

2555 dtype: float64

2556 """

2557 ).replace("\n", "", 1),

2558 window_method="rolling",

2559 aggregation_description="quantile",

2560 agg_method="quantile",

2561 )

2562 def quantile(

2563 self,

2564 quantile: float,

2565 interpolation: QuantileInterpolation = "linear",

2566 numeric_only: bool = False,

2567 **kwargs,

2568 ):

2569 maybe_warn_args_and_kwargs(type(self), "quantile", None, kwargs)

2570 return super().quantile(

2571 quantile=quantile,

2572 interpolation=interpolation,

2573 numeric_only=numeric_only,

2574 **kwargs,

2575 )

2576

2577 @doc(

2578 template_header,

2579 ".. versionadded:: 1.4.0 \n\n",

2580 create_section_header("Parameters"),

2581 dedent(

2582 """

2583 method : {{'average', 'min', 'max'}}, default 'average'

2584 How to rank the group of records that have the same value (i.e. ties):

2585

2586 * average: average rank of the group

2587 * min: lowest rank in the group

2588 * max: highest rank in the group

2589

2590 ascending : bool, default True

2591 Whether or not the elements should be ranked in ascending order.

2592 pct : bool, default False

2593 Whether or not to display the returned rankings in percentile

2594 form.

2595 """

2596 ).replace("\n", "", 1),

2597 kwargs_numeric_only,

2598 kwargs_compat,

2599 create_section_header("Returns"),

2600 template_returns,

2601 create_section_header("See Also"),

2602 template_see_also,

2603 create_section_header("Examples"),

2604 dedent(

2605 """

2606 >>> s = pd.Series([1, 4, 2, 3, 5, 3])

2607 >>> s.rolling(3).rank()

2608 0 NaN

2609 1 NaN

2610 2 2.0

2611 3 2.0

2612 4 3.0

2613 5 1.5

2614 dtype: float64

2615

2616 >>> s.rolling(3).rank(method="max")

2617 0 NaN

2618 1 NaN

2619 2 2.0

2620 3 2.0

2621 4 3.0

2622 5 2.0

2623 dtype: float64

2624

2625 >>> s.rolling(3).rank(method="min")

2626 0 NaN

2627 1 NaN

2628 2 2.0

2629 3 2.0

2630 4 3.0

2631 5 1.0

2632 dtype: float64

2633 """

2634 ).replace("\n", "", 1),

2635 window_method="rolling",

2636 aggregation_description="rank",

2637 agg_method="rank",

2638 )

2639 def rank(

2640 self,

2641 method: WindowingRankType = "average",

2642 ascending: bool = True,

2643 pct: bool = False,

2644 numeric_only: bool = False,

2645 **kwargs,

2646 ):

2647 maybe_warn_args_and_kwargs(type(self), "rank", None, kwargs)

2648 return super().rank(

2649 method=method,

2650 ascending=ascending,

2651 pct=pct,

2652 numeric_only=numeric_only,

2653 **kwargs,

2654 )

2655

2656 @doc(

2657 template_header,

2658 create_section_header("Parameters"),

2659 dedent(

2660 """

2661 other : Series or DataFrame, optional

2662 If not supplied then will default to self and produce pairwise

2663 output.

2664 pairwise : bool, default None

2665 If False then only matching columns between self and other will be

2666 used and the output will be a DataFrame.

2667 If True then all pairwise combinations will be calculated and the

2668 output will be a MultiIndexed DataFrame in the case of DataFrame

2669 inputs. In the case of missing elements, only complete pairwise

2670 observations will be used.

2671 ddof : int, default 1

2672 Delta Degrees of Freedom. The divisor used in calculations

2673 is ``N - ddof``, where ``N`` represents the number of elements.

2674 """

2675 ).replace("\n", "", 1),

2676 kwargs_numeric_only,

2677 kwargs_compat,

2678 create_section_header("Returns"),

2679 template_returns,

2680 create_section_header("See Also"),

2681 template_see_also[:-1],

2682 window_method="rolling",

2683 aggregation_description="sample covariance",

2684 agg_method="cov",

2685 )

2686 def cov(

2687 self,

2688 other: DataFrame | Series | None = None,

2689 pairwise: bool | None = None,

2690 ddof: int = 1,

2691 numeric_only: bool = False,

2692 **kwargs,

2693 ):

2694 maybe_warn_args_and_kwargs(type(self), "cov", None, kwargs)

2695 return super().cov(

2696 other=other,

2697 pairwise=pairwise,

2698 ddof=ddof,

2699 numeric_only=numeric_only,

2700 **kwargs,

2701 )

2702

2703 @doc(

2704 template_header,

2705 create_section_header("Parameters"),

2706 dedent(

2707 """

2708 other : Series or DataFrame, optional

2709 If not supplied then will default to self and produce pairwise

2710 output.

2711 pairwise : bool, default None

2712 If False then only matching columns between self and other will be

2713 used and the output will be a DataFrame.

2714 If True then all pairwise combinations will be calculated and the

2715 output will be a MultiIndexed DataFrame in the case of DataFrame

2716 inputs. In the case of missing elements, only complete pairwise

2717 observations will be used.

2718 ddof : int, default 1

2719 Delta Degrees of Freedom. The divisor used in calculations

2720 is ``N - ddof``, where ``N`` represents the number of elements.

2721 """

2722 ).replace("\n", "", 1),

2723 kwargs_numeric_only,

2724 kwargs_compat,

2725 create_section_header("Returns"),

2726 template_returns,

2727 create_section_header("See Also"),

2728 dedent(

2729 """

2730 cov : Similar method to calculate covariance.

2731 numpy.corrcoef : NumPy Pearson's correlation calculation.

2732 """

2733 ).replace("\n", "", 1),

2734 template_see_also,

2735 create_section_header("Notes"),

2736 dedent(

2737 """

2738 This function uses Pearson's definition of correlation

2739 (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).

2740

2741 When `other` is not specified, the output will be self correlation (e.g.

2742 all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`

2743 set to `True`.

2744

2745 Function will return ``NaN`` for correlations of equal valued sequences;

2746 this is the result of a 0/0 division error.

2747

2748 When `pairwise` is set to `False`, only matching columns between `self` and

2749 `other` will be used.

2750

2751 When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame

2752 with the original index on the first level, and the `other` DataFrame

2753 columns on the second level.

2754

2755 In the case of missing elements, only complete pairwise observations

2756 will be used.\n

2757 """

2758 ).replace("\n", "", 1),

2759 create_section_header("Examples"),

2760 dedent(

2761 """

2762 The below example shows a rolling calculation with a window size of

2763 four matching the equivalent function call using :meth:`numpy.corrcoef`.

2764

2765 >>> v1 = [3, 3, 3, 5, 8]

2766 >>> v2 = [3, 4, 4, 4, 8]

2767 >>> # numpy returns a 2X2 array, the correlation coefficient

2768 >>> # is the number at entry [0][1]

2769 >>> print(f"{{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}}")

2770 0.333333

2771 >>> print(f"{{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}}")

2772 0.916949

2773 >>> s1 = pd.Series(v1)

2774 >>> s2 = pd.Series(v2)

2775 >>> s1.rolling(4).corr(s2)

2776 0 NaN

2777 1 NaN

2778 2 NaN

2779 3 0.333333

2780 4 0.916949

2781 dtype: float64

2782

2783 The below example shows a similar rolling calculation on a

2784 DataFrame using the pairwise option.

2785

2786 >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\

2787 [46., 31.], [50., 36.]])

2788 >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7))

2789 [[1. 0.6263001]

2790 [0.6263001 1. ]]

2791 >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7))

2792 [[1. 0.5553681]

2793 [0.5553681 1. ]]

2794 >>> df = pd.DataFrame(matrix, columns=['X','Y'])

2795 >>> df

2796 X Y

2797 0 51.0 35.0

2798 1 49.0 30.0

2799 2 47.0 32.0

2800 3 46.0 31.0

2801 4 50.0 36.0

2802 >>> df.rolling(4).corr(pairwise=True)

2803 X Y

2804 0 X NaN NaN

2805 Y NaN NaN

2806 1 X NaN NaN

2807 Y NaN NaN

2808 2 X NaN NaN

2809 Y NaN NaN

2810 3 X 1.000000 0.626300

2811 Y 0.626300 1.000000

2812 4 X 1.000000 0.555368

2813 Y 0.555368 1.000000

2814 """

2815 ).replace("\n", "", 1),

2816 window_method="rolling",

2817 aggregation_description="correlation",

2818 agg_method="corr",

2819 )

2820 def corr(

2821 self,

2822 other: DataFrame | Series | None = None,

2823 pairwise: bool | None = None,

2824 ddof: int = 1,

2825 numeric_only: bool = False,

2826 **kwargs,

2827 ):

2828 maybe_warn_args_and_kwargs(type(self), "corr", None, kwargs)

2829 return super().corr(

2830 other=other,

2831 pairwise=pairwise,

2832 ddof=ddof,

2833 numeric_only=numeric_only,

2834 **kwargs,

2835 )

2836

2837

2838Rolling.__doc__ = Window.__doc__

2839

2840

2841class RollingGroupby(BaseWindowGroupby, Rolling):

2842 """

2843 Provide a rolling groupby implementation.

2844 """

2845

2846 _attributes = Rolling._attributes + BaseWindowGroupby._attributes

2847

2848 def _get_window_indexer(self) -> GroupbyIndexer:

2849 """

2850 Return an indexer class that will compute the window start and end bounds

2851

2852 Returns

2853 -------

2854 GroupbyIndexer

2855 """

2856 rolling_indexer: type[BaseIndexer]

2857 indexer_kwargs: dict[str, Any] | None = None

2858 index_array = self._index_array

2859 if isinstance(self.window, BaseIndexer):

2860 rolling_indexer = type(self.window)

2861 indexer_kwargs = self.window.__dict__.copy()

2862 assert isinstance(indexer_kwargs, dict) # for mypy

2863 # We'll be using the index of each group later

2864 indexer_kwargs.pop("index_array", None)

2865 window = self.window

2866 elif self._win_freq_i8 is not None:

2867 rolling_indexer = VariableWindowIndexer

2868 # error: Incompatible types in assignment (expression has type

2869 # "int", variable has type "BaseIndexer")

2870 window = self._win_freq_i8 # type: ignore[assignment]

2871 else:

2872 rolling_indexer = FixedWindowIndexer

2873 window = self.window

2874 window_indexer = GroupbyIndexer(

2875 index_array=index_array,

2876 window_size=window,

2877 groupby_indices=self._grouper.indices,

2878 window_indexer=rolling_indexer,

2879 indexer_kwargs=indexer_kwargs,

2880 )

2881 return window_indexer

2882

2883 def _validate_datetimelike_monotonic(self):

2884 """

2885 Validate that each group in self._on is monotonic

2886 """

2887 # GH 46061

2888 if self._on.hasnans:

2889 self._raise_monotonic_error("values must not have NaT")

2890 for group_indices in self._grouper.indices.values():

2891 group_on = self._on.take(group_indices)

2892 if not (

2893 group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing

2894 ):

2895 on = "index" if self.on is None else self.on

2896 raise ValueError(

2897 f"Each group within {on} must be monotonic. "

2898 f"Sort the values in {on} first."

2899 )