Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/window/rolling.py: 16%
769 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Provide a generic structure to support window functions,
3similar to how we have a Groupby object.
4"""
5from __future__ import annotations
7import copy
8from datetime import timedelta
9from functools import partial
10import inspect
11from textwrap import dedent
12from typing import (
13 TYPE_CHECKING,
14 Any,
15 Callable,
16 Hashable,
17 Sized,
18)
19import warnings
21import numpy as np
23from pandas._libs.tslibs import (
24 BaseOffset,
25 to_offset,
26)
27import pandas._libs.window.aggregations as window_aggregations
28from pandas._typing import (
29 ArrayLike,
30 Axis,
31 NDFrameT,
32 QuantileInterpolation,
33 WindowingRankType,
34)
35from pandas.compat._optional import import_optional_dependency
36from pandas.compat.numpy import function as nv
37from pandas.errors import DataError
38from pandas.util._decorators import doc
39from pandas.util._exceptions import find_stack_level
41from pandas.core.dtypes.common import (
42 ensure_float64,
43 is_bool,
44 is_integer,
45 is_list_like,
46 is_numeric_dtype,
47 is_scalar,
48 needs_i8_conversion,
49)
50from pandas.core.dtypes.generic import (
51 ABCDataFrame,
52 ABCSeries,
53)
54from pandas.core.dtypes.missing import notna
56from pandas.core._numba import executor
57from pandas.core.algorithms import factorize
58from pandas.core.apply import ResamplerWindowApply
59from pandas.core.arrays import ExtensionArray
60from pandas.core.base import SelectionMixin
61import pandas.core.common as com
62from pandas.core.indexers.objects import (
63 BaseIndexer,
64 FixedWindowIndexer,
65 GroupbyIndexer,
66 VariableWindowIndexer,
67)
68from pandas.core.indexes.api import (
69 DatetimeIndex,
70 Index,
71 MultiIndex,
72 PeriodIndex,
73 TimedeltaIndex,
74)
75from pandas.core.reshape.concat import concat
76from pandas.core.util.numba_ import (
77 get_jit_arguments,
78 maybe_use_numba,
79)
80from pandas.core.window.common import (
81 flex_binary_moment,
82 maybe_warn_args_and_kwargs,
83 zsqrt,
84)
85from pandas.core.window.doc import (
86 _shared_docs,
87 args_compat,
88 create_section_header,
89 kwargs_compat,
90 kwargs_numeric_only,
91 kwargs_scipy,
92 numba_notes,
93 template_header,
94 template_returns,
95 template_see_also,
96 window_agg_numba_parameters,
97 window_apply_parameters,
98)
99from pandas.core.window.numba_ import (
100 generate_manual_numpy_nan_agg_with_axis,
101 generate_numba_apply_func,
102 generate_numba_table_func,
103)
105if TYPE_CHECKING: 105 ↛ 106line 105 didn't jump to line 106, because the condition on line 105 was never true
106 from pandas import (
107 DataFrame,
108 Series,
109 )
110 from pandas.core.generic import NDFrame
111 from pandas.core.groupby.ops import BaseGrouper
114class BaseWindow(SelectionMixin):
115 """Provides utilities for performing windowing operations."""
117 _attributes: list[str] = []
118 exclusions: frozenset[Hashable] = frozenset()
119 _on: Index
121 def __init__(
122 self,
123 obj: NDFrame,
124 window=None,
125 min_periods: int | None = None,
126 center: bool | None = False,
127 win_type: str | None = None,
128 axis: Axis = 0,
129 on: str | Index | None = None,
130 closed: str | None = None,
131 step: int | None = None,
132 method: str = "single",
133 *,
134 selection=None,
135 ) -> None:
136 self.obj = obj
137 self.on = on
138 self.closed = closed
139 self.step = step
140 self.window = window
141 self.min_periods = min_periods
142 self.center = center
143 # TODO(2.0): Change this back to self.win_type once deprecation is enforced
144 self._win_type = win_type
145 self.axis = obj._get_axis_number(axis) if axis is not None else None
146 self.method = method
147 self._win_freq_i8: int | None = None
148 if self.on is None:
149 if self.axis == 0:
150 self._on = self.obj.index
151 else:
152 # i.e. self.axis == 1
153 self._on = self.obj.columns
154 elif isinstance(self.on, Index):
155 self._on = self.on
156 elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:
157 self._on = Index(self.obj[self.on])
158 else:
159 raise ValueError(
160 f"invalid on specified as {self.on}, "
161 "must be a column (of DataFrame), an Index or None"
162 )
164 self._selection = selection
165 self._validate()
167 @property
168 def win_type(self):
169 if self._win_freq_i8 is not None:
170 warnings.warn(
171 "win_type will no longer return 'freq' in a future version. "
172 "Check the type of self.window instead.",
173 FutureWarning,
174 stacklevel=find_stack_level(),
175 )
176 return "freq"
177 return self._win_type
179 @property
180 def is_datetimelike(self) -> bool:
181 warnings.warn(
182 "is_datetimelike is deprecated and will be removed in a future version.",
183 FutureWarning,
184 stacklevel=find_stack_level(),
185 )
186 return self._win_freq_i8 is not None
188 def validate(self) -> None:
189 warnings.warn(
190 "validate is deprecated and will be removed in a future version.",
191 FutureWarning,
192 stacklevel=find_stack_level(),
193 )
194 return self._validate()
196 def _validate(self) -> None:
197 if self.center is not None and not is_bool(self.center):
198 raise ValueError("center must be a boolean")
199 if self.min_periods is not None:
200 if not is_integer(self.min_periods):
201 raise ValueError("min_periods must be an integer")
202 elif self.min_periods < 0:
203 raise ValueError("min_periods must be >= 0")
204 elif is_integer(self.window) and self.min_periods > self.window:
205 raise ValueError(
206 f"min_periods {self.min_periods} must be <= window {self.window}"
207 )
208 if self.closed is not None and self.closed not in [
209 "right",
210 "both",
211 "left",
212 "neither",
213 ]:
214 raise ValueError("closed must be 'right', 'left', 'both' or 'neither'")
215 if not isinstance(self.obj, (ABCSeries, ABCDataFrame)):
216 raise TypeError(f"invalid type: {type(self)}")
217 if isinstance(self.window, BaseIndexer):
218 # Validate that the passed BaseIndexer subclass has
219 # a get_window_bounds with the correct signature.
220 get_window_bounds_signature = inspect.signature(
221 self.window.get_window_bounds
222 ).parameters.keys()
223 expected_signature = inspect.signature(
224 BaseIndexer().get_window_bounds
225 ).parameters.keys()
226 if get_window_bounds_signature != expected_signature:
227 raise ValueError(
228 f"{type(self.window).__name__} does not implement "
229 f"the correct signature for get_window_bounds"
230 )
231 if self.method not in ["table", "single"]:
232 raise ValueError("method must be 'table' or 'single")
233 if self.step is not None:
234 if not is_integer(self.step):
235 raise ValueError("step must be an integer")
236 elif self.step < 0:
237 raise ValueError("step must be >= 0")
239 def _check_window_bounds(
240 self, start: np.ndarray, end: np.ndarray, num_vals: int
241 ) -> None:
242 if len(start) != len(end):
243 raise ValueError(
244 f"start ({len(start)}) and end ({len(end)}) bounds must be the "
245 f"same length"
246 )
247 elif len(start) != (num_vals + (self.step or 1) - 1) // (self.step or 1):
248 raise ValueError(
249 f"start and end bounds ({len(start)}) must be the same length "
250 f"as the object ({num_vals}) divided by the step ({self.step}) "
251 f"if given and rounded up"
252 )
254 def _slice_axis_for_step(self, index: Index, result: Sized | None = None) -> Index:
255 """
256 Slices the index for a given result and the preset step.
257 """
258 return (
259 index
260 if result is None or len(result) == len(index)
261 else index[:: self.step]
262 )
264 def _validate_numeric_only(self, name: str, numeric_only: bool) -> None:
265 """
266 Validate numeric_only argument, raising if invalid for the input.
268 Parameters
269 ----------
270 name : str
271 Name of the operator (kernel).
272 numeric_only : bool
273 Value passed by user.
274 """
275 if (
276 self._selected_obj.ndim == 1
277 and numeric_only
278 and not is_numeric_dtype(self._selected_obj.dtype)
279 ):
280 raise NotImplementedError(
281 f"{type(self).__name__}.{name} does not implement numeric_only"
282 )
284 def _make_numeric_only(self, obj: NDFrameT) -> NDFrameT:
285 """Subset DataFrame to numeric columns.
287 Parameters
288 ----------
289 obj : DataFrame
291 Returns
292 -------
293 obj subset to numeric-only columns.
294 """
295 result = obj.select_dtypes(include=["number"], exclude=["timedelta"])
296 return result
298 def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT:
299 """
300 Split data into blocks & return conformed data.
301 """
302 # filter out the on from the object
303 if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2:
304 obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
305 if obj.ndim > 1 and (numeric_only or self.axis == 1):
306 # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything
307 # to float to calculate the complete row at once. We exclude all non-numeric
308 # dtypes.
309 obj = self._make_numeric_only(obj)
310 if self.axis == 1:
311 obj = obj.astype("float64", copy=False)
312 obj._mgr = obj._mgr.consolidate()
313 return obj
315 def _gotitem(self, key, ndim, subset=None):
316 """
317 Sub-classes to define. Return a sliced object.
319 Parameters
320 ----------
321 key : str / list of selections
322 ndim : {1, 2}
323 requested ndim of result
324 subset : object, default None
325 subset to act on
326 """
327 # create a new object to prevent aliasing
328 if subset is None:
329 subset = self.obj
331 # we need to make a shallow copy of ourselves
332 # with the same groupby
333 with warnings.catch_warnings():
334 # TODO(2.0): Remove once win_type deprecation is enforced
335 warnings.filterwarnings("ignore", "win_type", FutureWarning)
336 kwargs = {attr: getattr(self, attr) for attr in self._attributes}
338 selection = None
339 if subset.ndim == 2 and (
340 (is_scalar(key) and key in subset) or is_list_like(key)
341 ):
342 selection = key
344 new_win = type(self)(subset, selection=selection, **kwargs)
345 return new_win
347 def __getattr__(self, attr: str):
348 if attr in self._internal_names_set:
349 return object.__getattribute__(self, attr)
350 if attr in self.obj:
351 return self[attr]
353 raise AttributeError(
354 f"'{type(self).__name__}' object has no attribute '{attr}'"
355 )
357 def _dir_additions(self):
358 return self.obj._dir_additions()
360 def __repr__(self) -> str:
361 """
362 Provide a nice str repr of our rolling object.
363 """
364 attrs_list = (
365 f"{attr_name}={getattr(self, attr_name)}"
366 for attr_name in self._attributes
367 if getattr(self, attr_name, None) is not None and attr_name[0] != "_"
368 )
369 attrs = ",".join(attrs_list)
370 return f"{type(self).__name__} [{attrs}]"
372 def __iter__(self):
373 obj = self._selected_obj.set_axis(self._on)
374 obj = self._create_data(obj)
375 indexer = self._get_window_indexer()
377 start, end = indexer.get_window_bounds(
378 num_values=len(obj),
379 min_periods=self.min_periods,
380 center=self.center,
381 closed=self.closed,
382 step=self.step,
383 )
384 self._check_window_bounds(start, end, len(obj))
386 for s, e in zip(start, end):
387 result = obj.iloc[slice(s, e)]
388 yield result
390 def _prep_values(self, values: ArrayLike) -> np.ndarray:
391 """Convert input to numpy arrays for Cython routines"""
392 if needs_i8_conversion(values.dtype):
393 raise NotImplementedError(
394 f"ops for {type(self).__name__} for this "
395 f"dtype {values.dtype} are not implemented"
396 )
397 else:
398 # GH #12373 : rolling functions error on float32 data
399 # make sure the data is coerced to float64
400 try:
401 if isinstance(values, ExtensionArray):
402 values = values.to_numpy(np.float64, na_value=np.nan)
403 else:
404 values = ensure_float64(values)
405 except (ValueError, TypeError) as err:
406 raise TypeError(f"cannot handle this type -> {values.dtype}") from err
408 # Convert inf to nan for C funcs
409 inf = np.isinf(values)
410 if inf.any():
411 values = np.where(inf, np.nan, values)
413 return values
415 def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None:
416 # if we have an 'on' column we want to put it back into
417 # the results in the same location
418 from pandas import Series
420 if self.on is not None and not self._on.equals(obj.index):
421 name = self._on.name
422 extra_col = Series(self._on, index=self.obj.index, name=name)
423 if name in result.columns:
424 # TODO: sure we want to overwrite results?
425 result[name] = extra_col
426 elif name in result.index.names:
427 pass
428 elif name in self._selected_obj.columns:
429 # insert in the same location as we had in _selected_obj
430 old_cols = self._selected_obj.columns
431 new_cols = result.columns
432 old_loc = old_cols.get_loc(name)
433 overlap = new_cols.intersection(old_cols[:old_loc])
434 new_loc = len(overlap)
435 result.insert(new_loc, name, extra_col)
436 else:
437 # insert at the end
438 result[name] = extra_col
440 @property
441 def _index_array(self):
442 # TODO: why do we get here with e.g. MultiIndex?
443 if needs_i8_conversion(self._on.dtype):
444 return self._on.asi8
445 return None
447 def _resolve_output(self, out: DataFrame, obj: DataFrame) -> DataFrame:
448 """Validate and finalize result."""
449 if out.shape[1] == 0 and obj.shape[1] > 0:
450 raise DataError("No numeric types to aggregate")
451 elif out.shape[1] == 0:
452 return obj.astype("float64")
454 self._insert_on_column(out, obj)
455 return out
457 def _get_window_indexer(self) -> BaseIndexer:
458 """
459 Return an indexer class that will compute the window start and end bounds
460 """
461 if isinstance(self.window, BaseIndexer):
462 return self.window
463 if self._win_freq_i8 is not None:
464 return VariableWindowIndexer(
465 index_array=self._index_array,
466 window_size=self._win_freq_i8,
467 center=self.center,
468 )
469 return FixedWindowIndexer(window_size=self.window)
471 def _apply_series(
472 self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None
473 ) -> Series:
474 """
475 Series version of _apply_blockwise
476 """
477 obj = self._create_data(self._selected_obj)
479 if name == "count":
480 # GH 12541: Special case for count where we support date-like types
481 obj = notna(obj).astype(int)
482 try:
483 values = self._prep_values(obj._values)
484 except (TypeError, NotImplementedError) as err:
485 raise DataError("No numeric types to aggregate") from err
487 result = homogeneous_func(values)
488 index = self._slice_axis_for_step(obj.index, result)
489 return obj._constructor(result, index=index, name=obj.name)
491 def _apply_blockwise(
492 self,
493 homogeneous_func: Callable[..., ArrayLike],
494 name: str,
495 numeric_only: bool = False,
496 ) -> DataFrame | Series:
497 """
498 Apply the given function to the DataFrame broken down into homogeneous
499 sub-frames.
500 """
501 self._validate_numeric_only(name, numeric_only)
502 if self._selected_obj.ndim == 1:
503 return self._apply_series(homogeneous_func, name)
505 obj = self._create_data(self._selected_obj, numeric_only)
506 if name == "count":
507 # GH 12541: Special case for count where we support date-like types
508 obj = notna(obj).astype(int)
509 obj._mgr = obj._mgr.consolidate()
511 def hfunc(values: ArrayLike) -> ArrayLike:
512 values = self._prep_values(values)
513 return homogeneous_func(values)
515 if self.axis == 1:
516 obj = obj.T
518 taker = []
519 res_values = []
520 for i, arr in enumerate(obj._iter_column_arrays()):
521 # GH#42736 operate column-wise instead of block-wise
522 try:
523 res = hfunc(arr)
524 except (TypeError, NotImplementedError):
525 pass
526 else:
527 res_values.append(res)
528 taker.append(i)
530 index = self._slice_axis_for_step(
531 obj.index, res_values[0] if len(res_values) > 0 else None
532 )
533 df = type(obj)._from_arrays(
534 res_values,
535 index=index,
536 columns=obj.columns.take(taker),
537 verify_integrity=False,
538 )
540 if self.axis == 1:
541 df = df.T
543 if 0 != len(res_values) != len(obj.columns):
544 # GH#42738 ignore_failures dropped nuisance columns
545 dropped = obj.columns.difference(obj.columns.take(taker))
546 warnings.warn(
547 "Dropping of nuisance columns in rolling operations "
548 "is deprecated; in a future version this will raise TypeError. "
549 "Select only valid columns before calling the operation. "
550 f"Dropped columns were {dropped}",
551 FutureWarning,
552 stacklevel=find_stack_level(),
553 )
555 return self._resolve_output(df, obj)
557 def _apply_tablewise(
558 self,
559 homogeneous_func: Callable[..., ArrayLike],
560 name: str | None = None,
561 numeric_only: bool = False,
562 ) -> DataFrame | Series:
563 """
564 Apply the given function to the DataFrame across the entire object
565 """
566 if self._selected_obj.ndim == 1:
567 raise ValueError("method='table' not applicable for Series objects.")
568 obj = self._create_data(self._selected_obj, numeric_only)
569 values = self._prep_values(obj.to_numpy())
570 values = values.T if self.axis == 1 else values
571 result = homogeneous_func(values)
572 result = result.T if self.axis == 1 else result
573 index = self._slice_axis_for_step(obj.index, result)
574 columns = (
575 obj.columns
576 if result.shape[1] == len(obj.columns)
577 else obj.columns[:: self.step]
578 )
579 out = obj._constructor(result, index=index, columns=columns)
581 return self._resolve_output(out, obj)
583 def _apply_pairwise(
584 self,
585 target: DataFrame | Series,
586 other: DataFrame | Series | None,
587 pairwise: bool | None,
588 func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series],
589 numeric_only: bool,
590 ) -> DataFrame | Series:
591 """
592 Apply the given pairwise function given 2 pandas objects (DataFrame/Series)
593 """
594 target = self._create_data(target, numeric_only)
595 if other is None:
596 other = target
597 # only default unset
598 pairwise = True if pairwise is None else pairwise
599 elif not isinstance(other, (ABCDataFrame, ABCSeries)):
600 raise ValueError("other must be a DataFrame or Series")
601 elif other.ndim == 2 and numeric_only:
602 other = self._make_numeric_only(other)
604 return flex_binary_moment(target, other, func, pairwise=bool(pairwise))
606 def _apply(
607 self,
608 func: Callable[..., Any],
609 name: str,
610 numeric_only: bool = False,
611 numba_args: tuple[Any, ...] = (),
612 **kwargs,
613 ):
614 """
615 Rolling statistical measure using supplied function.
617 Designed to be used with passed-in Cython array-based functions.
619 Parameters
620 ----------
621 func : callable function to apply
622 name : str,
623 numba_args : tuple
624 args to be passed when func is a numba func
625 **kwargs
626 additional arguments for rolling function and window function
628 Returns
629 -------
630 y : type of input
631 """
632 window_indexer = self._get_window_indexer()
633 min_periods = (
634 self.min_periods
635 if self.min_periods is not None
636 else window_indexer.window_size
637 )
639 def homogeneous_func(values: np.ndarray):
640 # calculation function
642 if values.size == 0:
643 return values.copy()
645 def calc(x):
646 start, end = window_indexer.get_window_bounds(
647 num_values=len(x),
648 min_periods=min_periods,
649 center=self.center,
650 closed=self.closed,
651 step=self.step,
652 )
653 self._check_window_bounds(start, end, len(x))
655 return func(x, start, end, min_periods, *numba_args)
657 with np.errstate(all="ignore"):
658 result = calc(values)
660 return result
662 if self.method == "single":
663 return self._apply_blockwise(homogeneous_func, name, numeric_only)
664 else:
665 return self._apply_tablewise(homogeneous_func, name, numeric_only)
667 def _numba_apply(
668 self,
669 func: Callable[..., Any],
670 engine_kwargs: dict[str, bool] | None = None,
671 *func_args,
672 ):
673 window_indexer = self._get_window_indexer()
674 min_periods = (
675 self.min_periods
676 if self.min_periods is not None
677 else window_indexer.window_size
678 )
679 obj = self._create_data(self._selected_obj)
680 if self.axis == 1:
681 obj = obj.T
682 values = self._prep_values(obj.to_numpy())
683 if values.ndim == 1:
684 values = values.reshape(-1, 1)
685 start, end = window_indexer.get_window_bounds(
686 num_values=len(values),
687 min_periods=min_periods,
688 center=self.center,
689 closed=self.closed,
690 step=self.step,
691 )
692 self._check_window_bounds(start, end, len(values))
693 aggregator = executor.generate_shared_aggregator(
694 func, **get_jit_arguments(engine_kwargs)
695 )
696 result = aggregator(values, start, end, min_periods, *func_args)
697 result = result.T if self.axis == 1 else result
698 index = self._slice_axis_for_step(obj.index, result)
699 if obj.ndim == 1:
700 result = result.squeeze()
701 out = obj._constructor(result, index=index, name=obj.name)
702 return out
703 else:
704 columns = self._slice_axis_for_step(obj.columns, result.T)
705 out = obj._constructor(result, index=index, columns=columns)
706 return self._resolve_output(out, obj)
708 def aggregate(self, func, *args, **kwargs):
709 result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
710 if result is None:
711 return self.apply(func, raw=False, args=args, kwargs=kwargs)
712 return result
714 agg = aggregate
717class BaseWindowGroupby(BaseWindow):
718 """
719 Provide the groupby windowing facilities.
720 """
722 _grouper: BaseGrouper
723 _as_index: bool
724 _attributes: list[str] = ["_grouper"]
726 def __init__(
727 self,
728 obj: DataFrame | Series,
729 *args,
730 _grouper: BaseGrouper,
731 _as_index: bool = True,
732 **kwargs,
733 ) -> None:
734 from pandas.core.groupby.ops import BaseGrouper
736 if not isinstance(_grouper, BaseGrouper):
737 raise ValueError("Must pass a BaseGrouper object.")
738 self._grouper = _grouper
739 self._as_index = _as_index
740 # GH 32262: It's convention to keep the grouping column in
741 # groupby.<agg_func>, but unexpected to users in
742 # groupby.rolling.<agg_func>
743 obj = obj.drop(columns=self._grouper.names, errors="ignore")
744 # GH 15354
745 if kwargs.get("step") is not None:
746 raise NotImplementedError("step not implemented for groupby")
747 super().__init__(obj, *args, **kwargs)
749 def _apply(
750 self,
751 func: Callable[..., Any],
752 name: str,
753 numeric_only: bool = False,
754 numba_args: tuple[Any, ...] = (),
755 **kwargs,
756 ) -> DataFrame | Series:
757 result = super()._apply(
758 func,
759 name,
760 numeric_only,
761 numba_args,
762 **kwargs,
763 )
764 # Reconstruct the resulting MultiIndex
765 # 1st set of levels = group by labels
766 # 2nd set of levels = original DataFrame/Series index
767 grouped_object_index = self.obj.index
768 grouped_index_name = [*grouped_object_index.names]
769 groupby_keys = copy.copy(self._grouper.names)
770 result_index_names = groupby_keys + grouped_index_name
772 drop_columns = [
773 key
774 for key in self._grouper.names
775 if key not in self.obj.index.names or key is None
776 ]
778 if len(drop_columns) != len(groupby_keys):
779 # Our result will have still kept the column in the result
780 result = result.drop(columns=drop_columns, errors="ignore")
782 codes = self._grouper.codes
783 levels = copy.copy(self._grouper.levels)
785 group_indices = self._grouper.indices.values()
786 if group_indices:
787 indexer = np.concatenate(list(group_indices))
788 else:
789 indexer = np.array([], dtype=np.intp)
790 codes = [c.take(indexer) for c in codes]
792 # if the index of the original dataframe needs to be preserved, append
793 # this index (but reordered) to the codes/levels from the groupby
794 if grouped_object_index is not None:
795 idx = grouped_object_index.take(indexer)
796 if not isinstance(idx, MultiIndex):
797 idx = MultiIndex.from_arrays([idx])
798 codes.extend(list(idx.codes))
799 levels.extend(list(idx.levels))
801 result_index = MultiIndex(
802 levels, codes, names=result_index_names, verify_integrity=False
803 )
805 result.index = result_index
806 if not self._as_index:
807 result = result.reset_index(level=list(range(len(groupby_keys))))
808 return result
810 def _apply_pairwise(
811 self,
812 target: DataFrame | Series,
813 other: DataFrame | Series | None,
814 pairwise: bool | None,
815 func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series],
816 numeric_only: bool,
817 ) -> DataFrame | Series:
818 """
819 Apply the given pairwise function given 2 pandas objects (DataFrame/Series)
820 """
821 # Manually drop the grouping column first
822 target = target.drop(columns=self._grouper.names, errors="ignore")
823 result = super()._apply_pairwise(target, other, pairwise, func, numeric_only)
824 # 1) Determine the levels + codes of the groupby levels
825 if other is not None and not all(
826 len(group) == len(other) for group in self._grouper.indices.values()
827 ):
828 # GH 42915
829 # len(other) != len(any group), so must reindex (expand) the result
830 # from flex_binary_moment to a "transform"-like result
831 # per groupby combination
832 old_result_len = len(result)
833 result = concat(
834 [
835 result.take(gb_indices).reindex(result.index)
836 for gb_indices in self._grouper.indices.values()
837 ]
838 )
840 gb_pairs = (
841 com.maybe_make_list(pair) for pair in self._grouper.indices.keys()
842 )
843 groupby_codes = []
844 groupby_levels = []
845 # e.g. [[1, 2], [4, 5]] as [[1, 4], [2, 5]]
846 for gb_level_pair in map(list, zip(*gb_pairs)):
847 labels = np.repeat(np.array(gb_level_pair), old_result_len)
848 codes, levels = factorize(labels)
849 groupby_codes.append(codes)
850 groupby_levels.append(levels)
851 else:
852 # pairwise=True or len(other) == len(each group), so repeat
853 # the groupby labels by the number of columns in the original object
854 groupby_codes = self._grouper.codes
855 # error: Incompatible types in assignment (expression has type
856 # "List[Index]", variable has type "List[Union[ndarray, Index]]")
857 groupby_levels = self._grouper.levels # type: ignore[assignment]
859 group_indices = self._grouper.indices.values()
860 if group_indices:
861 indexer = np.concatenate(list(group_indices))
862 else:
863 indexer = np.array([], dtype=np.intp)
865 if target.ndim == 1:
866 repeat_by = 1
867 else:
868 repeat_by = len(target.columns)
869 groupby_codes = [
870 np.repeat(c.take(indexer), repeat_by) for c in groupby_codes
871 ]
872 # 2) Determine the levels + codes of the result from super()._apply_pairwise
873 if isinstance(result.index, MultiIndex):
874 result_codes = list(result.index.codes)
875 result_levels = list(result.index.levels)
876 result_names = list(result.index.names)
877 else:
878 idx_codes, idx_levels = factorize(result.index)
879 result_codes = [idx_codes]
880 result_levels = [idx_levels]
881 result_names = [result.index.name]
883 # 3) Create the resulting index by combining 1) + 2)
884 result_codes = groupby_codes + result_codes
885 result_levels = groupby_levels + result_levels
886 result_names = self._grouper.names + result_names
888 result_index = MultiIndex(
889 result_levels, result_codes, names=result_names, verify_integrity=False
890 )
891 result.index = result_index
892 return result
894 def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT:
895 """
896 Split data into blocks & return conformed data.
897 """
898 # Ensure the object we're rolling over is monotonically sorted relative
899 # to the groups
900 # GH 36197
901 if not obj.empty:
902 groupby_order = np.concatenate(list(self._grouper.indices.values())).astype(
903 np.int64
904 )
905 obj = obj.take(groupby_order)
906 return super()._create_data(obj, numeric_only)
908 def _gotitem(self, key, ndim, subset=None):
909 # we are setting the index on the actual object
910 # here so our index is carried through to the selected obj
911 # when we do the splitting for the groupby
912 if self.on is not None:
913 # GH 43355
914 subset = self.obj.set_index(self._on)
915 return super()._gotitem(key, ndim, subset=subset)
918class Window(BaseWindow):
919 """
920 Provide rolling window calculations.
922 Parameters
923 ----------
924 window : int, offset, or BaseIndexer subclass
925 Size of the moving window.
927 If an integer, the fixed number of observations used for
928 each window.
930 If an offset, the time period of each window. Each
931 window will be a variable sized based on the observations included in
932 the time-period. This is only valid for datetimelike indexes.
933 To learn more about the offsets & frequency strings, please see `this link
934 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
936 If a BaseIndexer subclass, the window boundaries
937 based on the defined ``get_window_bounds`` method. Additional rolling
938 keyword arguments, namely ``min_periods``, ``center``, ``closed`` and
939 ``step`` will be passed to ``get_window_bounds``.
941 min_periods : int, default None
942 Minimum number of observations in window required to have a value;
943 otherwise, result is ``np.nan``.
945 For a window that is specified by an offset, ``min_periods`` will default to 1.
947 For a window that is specified by an integer, ``min_periods`` will default
948 to the size of the window.
950 center : bool, default False
951 If False, set the window labels as the right edge of the window index.
953 If True, set the window labels as the center of the window index.
955 win_type : str, default None
956 If ``None``, all points are evenly weighted.
958 If a string, it must be a valid `scipy.signal window function
959 <https://docs.scipy.org/doc/scipy/reference/signal.windows.html#module-scipy.signal.windows>`__.
961 Certain Scipy window types require additional parameters to be passed
962 in the aggregation function. The additional parameters must match
963 the keywords specified in the Scipy window type method signature.
965 on : str, optional
966 For a DataFrame, a column label or Index level on which
967 to calculate the rolling window, rather than the DataFrame's index.
969 Provided integer column is ignored and excluded from result since
970 an integer index is not used to calculate the rolling window.
972 axis : int or str, default 0
973 If ``0`` or ``'index'``, roll across the rows.
975 If ``1`` or ``'columns'``, roll across the columns.
977 For `Series` this parameter is unused and defaults to 0.
979 closed : str, default None
980 If ``'right'``, the first point in the window is excluded from calculations.
982 If ``'left'``, the last point in the window is excluded from calculations.
984 If ``'both'``, the no points in the window are excluded from calculations.
986 If ``'neither'``, the first and last points in the window are excluded
987 from calculations.
989 Default ``None`` (``'right'``).
991 .. versionchanged:: 1.2.0
993 The closed parameter with fixed windows is now supported.
995 step : int, default None
997 .. versionadded:: 1.5.0
999 Evaluate the window at every ``step`` result, equivalent to slicing as
1000 ``[::step]``. ``window`` must be an integer. Using a step argument other
1001 than None or 1 will produce a result with a different shape than the input.
1003 method : str {'single', 'table'}, default 'single'
1005 .. versionadded:: 1.3.0
1007 Execute the rolling operation per single column or row (``'single'``)
1008 or over the entire object (``'table'``).
1010 This argument is only implemented when specifying ``engine='numba'``
1011 in the method call.
1013 Returns
1014 -------
1015 ``Window`` subclass if a ``win_type`` is passed
1017 ``Rolling`` subclass if ``win_type`` is not passed
1019 See Also
1020 --------
1021 expanding : Provides expanding transformations.
1022 ewm : Provides exponential weighted functions.
1024 Notes
1025 -----
1026 See :ref:`Windowing Operations <window.generic>` for further usage details
1027 and examples.
1029 Examples
1030 --------
1031 >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
1032 >>> df
1033 B
1034 0 0.0
1035 1 1.0
1036 2 2.0
1037 3 NaN
1038 4 4.0
1040 **window**
1042 Rolling sum with a window length of 2 observations.
1044 >>> df.rolling(2).sum()
1045 B
1046 0 NaN
1047 1 1.0
1048 2 3.0
1049 3 NaN
1050 4 NaN
1052 Rolling sum with a window span of 2 seconds.
1054 >>> df_time = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
1055 ... index = [pd.Timestamp('20130101 09:00:00'),
1056 ... pd.Timestamp('20130101 09:00:02'),
1057 ... pd.Timestamp('20130101 09:00:03'),
1058 ... pd.Timestamp('20130101 09:00:05'),
1059 ... pd.Timestamp('20130101 09:00:06')])
1061 >>> df_time
1062 B
1063 2013-01-01 09:00:00 0.0
1064 2013-01-01 09:00:02 1.0
1065 2013-01-01 09:00:03 2.0
1066 2013-01-01 09:00:05 NaN
1067 2013-01-01 09:00:06 4.0
1069 >>> df_time.rolling('2s').sum()
1070 B
1071 2013-01-01 09:00:00 0.0
1072 2013-01-01 09:00:02 1.0
1073 2013-01-01 09:00:03 3.0
1074 2013-01-01 09:00:05 NaN
1075 2013-01-01 09:00:06 4.0
1077 Rolling sum with forward looking windows with 2 observations.
1079 >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2)
1080 >>> df.rolling(window=indexer, min_periods=1).sum()
1081 B
1082 0 1.0
1083 1 3.0
1084 2 2.0
1085 3 4.0
1086 4 4.0
1088 **min_periods**
1090 Rolling sum with a window length of 2 observations, but only needs a minimum of 1
1091 observation to calculate a value.
1093 >>> df.rolling(2, min_periods=1).sum()
1094 B
1095 0 0.0
1096 1 1.0
1097 2 3.0
1098 3 2.0
1099 4 4.0
1101 **center**
1103 Rolling sum with the result assigned to the center of the window index.
1105 >>> df.rolling(3, min_periods=1, center=True).sum()
1106 B
1107 0 1.0
1108 1 3.0
1109 2 3.0
1110 3 6.0
1111 4 4.0
1113 >>> df.rolling(3, min_periods=1, center=False).sum()
1114 B
1115 0 0.0
1116 1 1.0
1117 2 3.0
1118 3 3.0
1119 4 6.0
1121 **step**
1123 Rolling sum with a window length of 2 observations, minimum of 1 observation to
1124 calculate a value, and a step of 2.
1126 >>> df.rolling(2, min_periods=1, step=2).sum()
1127 B
1128 0 0.0
1129 2 3.0
1130 4 4.0
1132 **win_type**
1134 Rolling sum with a window length of 2, using the Scipy ``'gaussian'``
1135 window type. ``std`` is required in the aggregation function.
1137 >>> df.rolling(2, win_type='gaussian').sum(std=3)
1138 B
1139 0 NaN
1140 1 0.986207
1141 2 2.958621
1142 3 NaN
1143 4 NaN
1144 """
1146 _attributes = [
1147 "window",
1148 "min_periods",
1149 "center",
1150 "win_type",
1151 "axis",
1152 "on",
1153 "closed",
1154 "step",
1155 "method",
1156 ]
1158 def _validate(self):
1159 super()._validate()
1161 if not isinstance(self.win_type, str):
1162 raise ValueError(f"Invalid win_type {self.win_type}")
1163 signal = import_optional_dependency(
1164 "scipy.signal", extra="Scipy is required to generate window weight."
1165 )
1166 self._scipy_weight_generator = getattr(signal, self.win_type, None)
1167 if self._scipy_weight_generator is None:
1168 raise ValueError(f"Invalid win_type {self.win_type}")
1170 if isinstance(self.window, BaseIndexer):
1171 raise NotImplementedError(
1172 "BaseIndexer subclasses not implemented with win_types."
1173 )
1174 elif not is_integer(self.window) or self.window < 0:
1175 raise ValueError("window must be an integer 0 or greater")
1177 if self.method != "single":
1178 raise NotImplementedError("'single' is the only supported method type.")
1180 def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray:
1181 """
1182 Center the result in the window for weighted rolling aggregations.
1183 """
1184 if offset > 0:
1185 lead_indexer = [slice(offset, None)]
1186 result = np.copy(result[tuple(lead_indexer)])
1187 return result
1189 def _apply(
1190 self,
1191 func: Callable[[np.ndarray, int, int], np.ndarray],
1192 name: str,
1193 numeric_only: bool = False,
1194 numba_args: tuple[Any, ...] = (),
1195 **kwargs,
1196 ):
1197 """
1198 Rolling with weights statistical measure using supplied function.
1200 Designed to be used with passed-in Cython array-based functions.
1202 Parameters
1203 ----------
1204 func : callable function to apply
1205 name : str,
1206 numeric_only : bool, default False
1207 Whether to only operate on bool, int, and float columns
1208 numba_args : tuple
1209 unused
1210 **kwargs
1211 additional arguments for scipy windows if necessary
1213 Returns
1214 -------
1215 y : type of input
1216 """
1217 # "None" not callable [misc]
1218 window = self._scipy_weight_generator( # type: ignore[misc]
1219 self.window, **kwargs
1220 )
1221 offset = (len(window) - 1) // 2 if self.center else 0
1223 def homogeneous_func(values: np.ndarray):
1224 # calculation function
1226 if values.size == 0:
1227 return values.copy()
1229 def calc(x):
1230 additional_nans = np.array([np.nan] * offset)
1231 x = np.concatenate((x, additional_nans))
1232 return func(x, window, self.min_periods or len(window))
1234 with np.errstate(all="ignore"):
1235 # Our weighted aggregations return memoryviews
1236 result = np.asarray(calc(values))
1238 if self.center:
1239 result = self._center_window(result, offset)
1241 return result
1243 return self._apply_blockwise(homogeneous_func, name, numeric_only)[:: self.step]
1245 @doc(
1246 _shared_docs["aggregate"],
1247 see_also=dedent(
1248 """
1249 See Also
1250 --------
1251 pandas.DataFrame.aggregate : Similar DataFrame method.
1252 pandas.Series.aggregate : Similar Series method.
1253 """
1254 ),
1255 examples=dedent(
1256 """
1257 Examples
1258 --------
1259 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
1260 >>> df
1261 A B C
1262 0 1 4 7
1263 1 2 5 8
1264 2 3 6 9
1266 >>> df.rolling(2, win_type="boxcar").agg("mean")
1267 A B C
1268 0 NaN NaN NaN
1269 1 1.5 4.5 7.5
1270 2 2.5 5.5 8.5
1271 """
1272 ),
1273 klass="Series/DataFrame",
1274 axis="",
1275 )
1276 def aggregate(self, func, *args, **kwargs):
1277 result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
1278 if result is None:
1280 # these must apply directly
1281 result = func(self)
1283 return result
1285 agg = aggregate
1287 @doc(
1288 template_header,
1289 create_section_header("Parameters"),
1290 kwargs_numeric_only,
1291 kwargs_scipy,
1292 create_section_header("Returns"),
1293 template_returns,
1294 create_section_header("See Also"),
1295 template_see_also[:-1],
1296 window_method="rolling",
1297 aggregation_description="weighted window sum",
1298 agg_method="sum",
1299 )
1300 def sum(self, numeric_only: bool = False, *args, **kwargs):
1301 nv.validate_window_func("sum", args, kwargs)
1302 window_func = window_aggregations.roll_weighted_sum
1303 # error: Argument 1 to "_apply" of "Window" has incompatible type
1304 # "Callable[[ndarray, ndarray, int], ndarray]"; expected
1305 # "Callable[[ndarray, int, int], ndarray]"
1306 return self._apply(
1307 window_func, # type: ignore[arg-type]
1308 name="sum",
1309 numeric_only=numeric_only,
1310 **kwargs,
1311 )
1313 @doc(
1314 template_header,
1315 create_section_header("Parameters"),
1316 kwargs_numeric_only,
1317 kwargs_scipy,
1318 create_section_header("Returns"),
1319 template_returns,
1320 create_section_header("See Also"),
1321 template_see_also[:-1],
1322 window_method="rolling",
1323 aggregation_description="weighted window mean",
1324 agg_method="mean",
1325 )
1326 def mean(self, numeric_only: bool = False, *args, **kwargs):
1327 nv.validate_window_func("mean", args, kwargs)
1328 window_func = window_aggregations.roll_weighted_mean
1329 # error: Argument 1 to "_apply" of "Window" has incompatible type
1330 # "Callable[[ndarray, ndarray, int], ndarray]"; expected
1331 # "Callable[[ndarray, int, int], ndarray]"
1332 return self._apply(
1333 window_func, # type: ignore[arg-type]
1334 name="mean",
1335 numeric_only=numeric_only,
1336 **kwargs,
1337 )
1339 @doc(
1340 template_header,
1341 ".. versionadded:: 1.0.0 \n\n",
1342 create_section_header("Parameters"),
1343 kwargs_numeric_only,
1344 kwargs_scipy,
1345 create_section_header("Returns"),
1346 template_returns,
1347 create_section_header("See Also"),
1348 template_see_also[:-1],
1349 window_method="rolling",
1350 aggregation_description="weighted window variance",
1351 agg_method="var",
1352 )
1353 def var(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs):
1354 nv.validate_window_func("var", args, kwargs)
1355 window_func = partial(window_aggregations.roll_weighted_var, ddof=ddof)
1356 kwargs.pop("name", None)
1357 return self._apply(window_func, name="var", numeric_only=numeric_only, **kwargs)
1359 @doc(
1360 template_header,
1361 ".. versionadded:: 1.0.0 \n\n",
1362 create_section_header("Parameters"),
1363 kwargs_numeric_only,
1364 kwargs_scipy,
1365 create_section_header("Returns"),
1366 template_returns,
1367 create_section_header("See Also"),
1368 template_see_also[:-1],
1369 window_method="rolling",
1370 aggregation_description="weighted window standard deviation",
1371 agg_method="std",
1372 )
1373 def std(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs):
1374 nv.validate_window_func("std", args, kwargs)
1375 return zsqrt(
1376 self.var(ddof=ddof, name="std", numeric_only=numeric_only, **kwargs)
1377 )
1380class RollingAndExpandingMixin(BaseWindow):
1381 def count(self, numeric_only: bool = False):
1382 window_func = window_aggregations.roll_sum
1383 return self._apply(window_func, name="count", numeric_only=numeric_only)
1385 def apply(
1386 self,
1387 func: Callable[..., Any],
1388 raw: bool = False,
1389 engine: str | None = None,
1390 engine_kwargs: dict[str, bool] | None = None,
1391 args: tuple[Any, ...] | None = None,
1392 kwargs: dict[str, Any] | None = None,
1393 ):
1394 if args is None:
1395 args = ()
1396 if kwargs is None:
1397 kwargs = {}
1399 if not is_bool(raw):
1400 raise ValueError("raw parameter must be `True` or `False`")
1402 numba_args: tuple[Any, ...] = ()
1403 if maybe_use_numba(engine):
1404 if raw is False:
1405 raise ValueError("raw must be `True` when using the numba engine")
1406 numba_args = args
1407 if self.method == "single":
1408 apply_func = generate_numba_apply_func(
1409 func, **get_jit_arguments(engine_kwargs, kwargs)
1410 )
1411 else:
1412 apply_func = generate_numba_table_func(
1413 func, **get_jit_arguments(engine_kwargs, kwargs)
1414 )
1415 elif engine in ("cython", None):
1416 if engine_kwargs is not None:
1417 raise ValueError("cython engine does not accept engine_kwargs")
1418 apply_func = self._generate_cython_apply_func(args, kwargs, raw, func)
1419 else:
1420 raise ValueError("engine must be either 'numba' or 'cython'")
1422 return self._apply(
1423 apply_func,
1424 name="apply",
1425 numba_args=numba_args,
1426 )
1428 def _generate_cython_apply_func(
1429 self,
1430 args: tuple[Any, ...],
1431 kwargs: dict[str, Any],
1432 raw: bool,
1433 function: Callable[..., Any],
1434 ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]:
1435 from pandas import Series
1437 window_func = partial(
1438 window_aggregations.roll_apply,
1439 args=args,
1440 kwargs=kwargs,
1441 raw=raw,
1442 function=function,
1443 )
1445 def apply_func(values, begin, end, min_periods, raw=raw):
1446 if not raw:
1447 # GH 45912
1448 values = Series(values, index=self._on)
1449 return window_func(values, begin, end, min_periods)
1451 return apply_func
1453 def sum(
1454 self,
1455 numeric_only: bool = False,
1456 *args,
1457 engine: str | None = None,
1458 engine_kwargs: dict[str, bool] | None = None,
1459 **kwargs,
1460 ):
1461 nv.validate_window_func("sum", args, kwargs)
1462 if maybe_use_numba(engine):
1463 if self.method == "table":
1464 func = generate_manual_numpy_nan_agg_with_axis(np.nansum)
1465 return self.apply(
1466 func,
1467 raw=True,
1468 engine=engine,
1469 engine_kwargs=engine_kwargs,
1470 )
1471 else:
1472 from pandas.core._numba.kernels import sliding_sum
1474 return self._numba_apply(sliding_sum, engine_kwargs)
1475 window_func = window_aggregations.roll_sum
1476 return self._apply(window_func, name="sum", numeric_only=numeric_only, **kwargs)
1478 def max(
1479 self,
1480 numeric_only: bool = False,
1481 *args,
1482 engine: str | None = None,
1483 engine_kwargs: dict[str, bool] | None = None,
1484 **kwargs,
1485 ):
1486 nv.validate_window_func("max", args, kwargs)
1487 if maybe_use_numba(engine):
1488 if self.method == "table":
1489 func = generate_manual_numpy_nan_agg_with_axis(np.nanmax)
1490 return self.apply(
1491 func,
1492 raw=True,
1493 engine=engine,
1494 engine_kwargs=engine_kwargs,
1495 )
1496 else:
1497 from pandas.core._numba.kernels import sliding_min_max
1499 return self._numba_apply(sliding_min_max, engine_kwargs, True)
1500 window_func = window_aggregations.roll_max
1501 return self._apply(window_func, name="max", numeric_only=numeric_only, **kwargs)
1503 def min(
1504 self,
1505 numeric_only: bool = False,
1506 *args,
1507 engine: str | None = None,
1508 engine_kwargs: dict[str, bool] | None = None,
1509 **kwargs,
1510 ):
1511 nv.validate_window_func("min", args, kwargs)
1512 if maybe_use_numba(engine):
1513 if self.method == "table":
1514 func = generate_manual_numpy_nan_agg_with_axis(np.nanmin)
1515 return self.apply(
1516 func,
1517 raw=True,
1518 engine=engine,
1519 engine_kwargs=engine_kwargs,
1520 )
1521 else:
1522 from pandas.core._numba.kernels import sliding_min_max
1524 return self._numba_apply(sliding_min_max, engine_kwargs, False)
1525 window_func = window_aggregations.roll_min
1526 return self._apply(window_func, name="min", numeric_only=numeric_only, **kwargs)
1528 def mean(
1529 self,
1530 numeric_only: bool = False,
1531 *args,
1532 engine: str | None = None,
1533 engine_kwargs: dict[str, bool] | None = None,
1534 **kwargs,
1535 ):
1536 nv.validate_window_func("mean", args, kwargs)
1537 if maybe_use_numba(engine):
1538 if self.method == "table":
1539 func = generate_manual_numpy_nan_agg_with_axis(np.nanmean)
1540 return self.apply(
1541 func,
1542 raw=True,
1543 engine=engine,
1544 engine_kwargs=engine_kwargs,
1545 )
1546 else:
1547 from pandas.core._numba.kernels import sliding_mean
1549 return self._numba_apply(sliding_mean, engine_kwargs)
1550 window_func = window_aggregations.roll_mean
1551 return self._apply(
1552 window_func, name="mean", numeric_only=numeric_only, **kwargs
1553 )
1555 def median(
1556 self,
1557 numeric_only: bool = False,
1558 engine: str | None = None,
1559 engine_kwargs: dict[str, bool] | None = None,
1560 **kwargs,
1561 ):
1562 if maybe_use_numba(engine):
1563 if self.method == "table":
1564 func = generate_manual_numpy_nan_agg_with_axis(np.nanmedian)
1565 else:
1566 func = np.nanmedian
1568 return self.apply(
1569 func,
1570 raw=True,
1571 engine=engine,
1572 engine_kwargs=engine_kwargs,
1573 )
1574 window_func = window_aggregations.roll_median_c
1575 return self._apply(
1576 window_func, name="median", numeric_only=numeric_only, **kwargs
1577 )
1579 def std(
1580 self,
1581 ddof: int = 1,
1582 numeric_only: bool = False,
1583 *args,
1584 engine: str | None = None,
1585 engine_kwargs: dict[str, bool] | None = None,
1586 **kwargs,
1587 ):
1588 nv.validate_window_func("std", args, kwargs)
1589 if maybe_use_numba(engine):
1590 if self.method == "table":
1591 raise NotImplementedError("std not supported with method='table'")
1592 else:
1593 from pandas.core._numba.kernels import sliding_var
1595 return zsqrt(self._numba_apply(sliding_var, engine_kwargs, ddof))
1596 window_func = window_aggregations.roll_var
1598 def zsqrt_func(values, begin, end, min_periods):
1599 return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof))
1601 return self._apply(
1602 zsqrt_func,
1603 name="std",
1604 numeric_only=numeric_only,
1605 **kwargs,
1606 )
1608 def var(
1609 self,
1610 ddof: int = 1,
1611 numeric_only: bool = False,
1612 *args,
1613 engine: str | None = None,
1614 engine_kwargs: dict[str, bool] | None = None,
1615 **kwargs,
1616 ):
1617 nv.validate_window_func("var", args, kwargs)
1618 if maybe_use_numba(engine):
1619 if self.method == "table":
1620 raise NotImplementedError("var not supported with method='table'")
1621 else:
1622 from pandas.core._numba.kernels import sliding_var
1624 return self._numba_apply(sliding_var, engine_kwargs, ddof)
1625 window_func = partial(window_aggregations.roll_var, ddof=ddof)
1626 return self._apply(
1627 window_func,
1628 name="var",
1629 numeric_only=numeric_only,
1630 **kwargs,
1631 )
1633 def skew(self, numeric_only: bool = False, **kwargs):
1634 window_func = window_aggregations.roll_skew
1635 return self._apply(
1636 window_func,
1637 name="skew",
1638 numeric_only=numeric_only,
1639 **kwargs,
1640 )
1642 def sem(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs):
1643 nv.validate_rolling_func("sem", args, kwargs)
1644 # Raise here so error message says sem instead of std
1645 self._validate_numeric_only("sem", numeric_only)
1646 return self.std(numeric_only=numeric_only, **kwargs) / (
1647 self.count(numeric_only=numeric_only) - ddof
1648 ).pow(0.5)
1650 def kurt(self, numeric_only: bool = False, **kwargs):
1651 window_func = window_aggregations.roll_kurt
1652 return self._apply(
1653 window_func,
1654 name="kurt",
1655 numeric_only=numeric_only,
1656 **kwargs,
1657 )
1659 def quantile(
1660 self,
1661 quantile: float,
1662 interpolation: QuantileInterpolation = "linear",
1663 numeric_only: bool = False,
1664 **kwargs,
1665 ):
1666 if quantile == 1.0:
1667 window_func = window_aggregations.roll_max
1668 elif quantile == 0.0:
1669 window_func = window_aggregations.roll_min
1670 else:
1671 window_func = partial(
1672 window_aggregations.roll_quantile,
1673 quantile=quantile,
1674 interpolation=interpolation,
1675 )
1677 return self._apply(
1678 window_func, name="quantile", numeric_only=numeric_only, **kwargs
1679 )
1681 def rank(
1682 self,
1683 method: WindowingRankType = "average",
1684 ascending: bool = True,
1685 pct: bool = False,
1686 numeric_only: bool = False,
1687 **kwargs,
1688 ):
1689 window_func = partial(
1690 window_aggregations.roll_rank,
1691 method=method,
1692 ascending=ascending,
1693 percentile=pct,
1694 )
1696 return self._apply(
1697 window_func, name="rank", numeric_only=numeric_only, **kwargs
1698 )
1700 def cov(
1701 self,
1702 other: DataFrame | Series | None = None,
1703 pairwise: bool | None = None,
1704 ddof: int = 1,
1705 numeric_only: bool = False,
1706 **kwargs,
1707 ):
1708 if self.step is not None:
1709 raise NotImplementedError("step not implemented for cov")
1710 self._validate_numeric_only("cov", numeric_only)
1712 from pandas import Series
1714 def cov_func(x, y):
1715 x_array = self._prep_values(x)
1716 y_array = self._prep_values(y)
1717 window_indexer = self._get_window_indexer()
1718 min_periods = (
1719 self.min_periods
1720 if self.min_periods is not None
1721 else window_indexer.window_size
1722 )
1723 start, end = window_indexer.get_window_bounds(
1724 num_values=len(x_array),
1725 min_periods=min_periods,
1726 center=self.center,
1727 closed=self.closed,
1728 step=self.step,
1729 )
1730 self._check_window_bounds(start, end, len(x_array))
1732 with np.errstate(all="ignore"):
1733 mean_x_y = window_aggregations.roll_mean(
1734 x_array * y_array, start, end, min_periods
1735 )
1736 mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods)
1737 mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods)
1738 count_x_y = window_aggregations.roll_sum(
1739 notna(x_array + y_array).astype(np.float64), start, end, 0
1740 )
1741 result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof))
1742 return Series(result, index=x.index, name=x.name)
1744 return self._apply_pairwise(
1745 self._selected_obj, other, pairwise, cov_func, numeric_only
1746 )
1748 def corr(
1749 self,
1750 other: DataFrame | Series | None = None,
1751 pairwise: bool | None = None,
1752 ddof: int = 1,
1753 numeric_only: bool = False,
1754 **kwargs,
1755 ):
1756 if self.step is not None:
1757 raise NotImplementedError("step not implemented for corr")
1758 self._validate_numeric_only("corr", numeric_only)
1760 from pandas import Series
1762 def corr_func(x, y):
1763 x_array = self._prep_values(x)
1764 y_array = self._prep_values(y)
1765 window_indexer = self._get_window_indexer()
1766 min_periods = (
1767 self.min_periods
1768 if self.min_periods is not None
1769 else window_indexer.window_size
1770 )
1771 start, end = window_indexer.get_window_bounds(
1772 num_values=len(x_array),
1773 min_periods=min_periods,
1774 center=self.center,
1775 closed=self.closed,
1776 step=self.step,
1777 )
1778 self._check_window_bounds(start, end, len(x_array))
1780 with np.errstate(all="ignore"):
1781 mean_x_y = window_aggregations.roll_mean(
1782 x_array * y_array, start, end, min_periods
1783 )
1784 mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods)
1785 mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods)
1786 count_x_y = window_aggregations.roll_sum(
1787 notna(x_array + y_array).astype(np.float64), start, end, 0
1788 )
1789 x_var = window_aggregations.roll_var(
1790 x_array, start, end, min_periods, ddof
1791 )
1792 y_var = window_aggregations.roll_var(
1793 y_array, start, end, min_periods, ddof
1794 )
1795 numerator = (mean_x_y - mean_x * mean_y) * (
1796 count_x_y / (count_x_y - ddof)
1797 )
1798 denominator = (x_var * y_var) ** 0.5
1799 result = numerator / denominator
1800 return Series(result, index=x.index, name=x.name)
1802 return self._apply_pairwise(
1803 self._selected_obj, other, pairwise, corr_func, numeric_only
1804 )
1807class Rolling(RollingAndExpandingMixin):
1809 _attributes: list[str] = [
1810 "window",
1811 "min_periods",
1812 "center",
1813 "win_type",
1814 "axis",
1815 "on",
1816 "closed",
1817 "step",
1818 "method",
1819 ]
1821 def _validate(self):
1822 super()._validate()
1824 # we allow rolling on a datetimelike index
1825 if (
1826 self.obj.empty
1827 or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex))
1828 ) and isinstance(self.window, (str, BaseOffset, timedelta)):
1830 self._validate_datetimelike_monotonic()
1832 # this will raise ValueError on non-fixed freqs
1833 try:
1834 freq = to_offset(self.window)
1835 except (TypeError, ValueError) as err:
1836 raise ValueError(
1837 f"passed window {self.window} is not "
1838 "compatible with a datetimelike index"
1839 ) from err
1840 if isinstance(self._on, PeriodIndex):
1841 # error: Incompatible types in assignment (expression has type
1842 # "float", variable has type "Optional[int]")
1843 self._win_freq_i8 = freq.nanos / ( # type: ignore[assignment]
1844 self._on.freq.nanos / self._on.freq.n
1845 )
1846 else:
1847 self._win_freq_i8 = freq.nanos
1849 # min_periods must be an integer
1850 if self.min_periods is None:
1851 self.min_periods = 1
1853 if self.step is not None:
1854 raise NotImplementedError(
1855 "step is not supported with frequency windows"
1856 )
1858 elif isinstance(self.window, BaseIndexer):
1859 # Passed BaseIndexer subclass should handle all other rolling kwargs
1860 pass
1861 elif not is_integer(self.window) or self.window < 0:
1862 raise ValueError("window must be an integer 0 or greater")
1864 def _validate_datetimelike_monotonic(self):
1865 """
1866 Validate self._on is monotonic (increasing or decreasing) and has
1867 no NaT values for frequency windows.
1868 """
1869 if self._on.hasnans:
1870 self._raise_monotonic_error("values must not have NaT")
1871 if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
1872 self._raise_monotonic_error("values must be monotonic")
1874 def _raise_monotonic_error(self, msg: str):
1875 on = self.on
1876 if on is None:
1877 if self.axis == 0:
1878 on = "index"
1879 else:
1880 on = "column"
1881 raise ValueError(f"{on} {msg}")
1883 @doc(
1884 _shared_docs["aggregate"],
1885 see_also=dedent(
1886 """
1887 See Also
1888 --------
1889 pandas.Series.rolling : Calling object with Series data.
1890 pandas.DataFrame.rolling : Calling object with DataFrame data.
1891 """
1892 ),
1893 examples=dedent(
1894 """
1895 Examples
1896 --------
1897 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
1898 >>> df
1899 A B C
1900 0 1 4 7
1901 1 2 5 8
1902 2 3 6 9
1904 >>> df.rolling(2).sum()
1905 A B C
1906 0 NaN NaN NaN
1907 1 3.0 9.0 15.0
1908 2 5.0 11.0 17.0
1910 >>> df.rolling(2).agg({"A": "sum", "B": "min"})
1911 A B
1912 0 NaN NaN
1913 1 3.0 4.0
1914 2 5.0 5.0
1915 """
1916 ),
1917 klass="Series/Dataframe",
1918 axis="",
1919 )
1920 def aggregate(self, func, *args, **kwargs):
1921 return super().aggregate(func, *args, **kwargs)
1923 agg = aggregate
1925 @doc(
1926 template_header,
1927 create_section_header("Parameters"),
1928 kwargs_numeric_only,
1929 create_section_header("Returns"),
1930 template_returns,
1931 create_section_header("See Also"),
1932 template_see_also,
1933 create_section_header("Examples"),
1934 dedent(
1935 """
1936 >>> s = pd.Series([2, 3, np.nan, 10])
1937 >>> s.rolling(2).count()
1938 0 1.0
1939 1 2.0
1940 2 1.0
1941 3 1.0
1942 dtype: float64
1943 >>> s.rolling(3).count()
1944 0 1.0
1945 1 2.0
1946 2 2.0
1947 3 2.0
1948 dtype: float64
1949 >>> s.rolling(4).count()
1950 0 1.0
1951 1 2.0
1952 2 2.0
1953 3 3.0
1954 dtype: float64
1955 """
1956 ).replace("\n", "", 1),
1957 window_method="rolling",
1958 aggregation_description="count of non NaN observations",
1959 agg_method="count",
1960 )
1961 def count(self, numeric_only: bool = False):
1962 if self.min_periods is None:
1963 warnings.warn(
1964 (
1965 "min_periods=None will default to the size of window "
1966 "consistent with other methods in a future version. "
1967 "Specify min_periods=0 instead."
1968 ),
1969 FutureWarning,
1970 stacklevel=find_stack_level(),
1971 )
1972 self.min_periods = 0
1973 result = super().count()
1974 self.min_periods = None
1975 else:
1976 result = super().count(numeric_only)
1977 return result
1979 @doc(
1980 template_header,
1981 create_section_header("Parameters"),
1982 window_apply_parameters,
1983 create_section_header("Returns"),
1984 template_returns,
1985 create_section_header("See Also"),
1986 template_see_also[:-1],
1987 window_method="rolling",
1988 aggregation_description="custom aggregation function",
1989 agg_method="apply",
1990 )
1991 def apply(
1992 self,
1993 func: Callable[..., Any],
1994 raw: bool = False,
1995 engine: str | None = None,
1996 engine_kwargs: dict[str, bool] | None = None,
1997 args: tuple[Any, ...] | None = None,
1998 kwargs: dict[str, Any] | None = None,
1999 ):
2000 return super().apply(
2001 func,
2002 raw=raw,
2003 engine=engine,
2004 engine_kwargs=engine_kwargs,
2005 args=args,
2006 kwargs=kwargs,
2007 )
2009 @doc(
2010 template_header,
2011 create_section_header("Parameters"),
2012 kwargs_numeric_only,
2013 args_compat,
2014 window_agg_numba_parameters(),
2015 kwargs_compat,
2016 create_section_header("Returns"),
2017 template_returns,
2018 create_section_header("See Also"),
2019 template_see_also,
2020 create_section_header("Notes"),
2021 numba_notes,
2022 create_section_header("Examples"),
2023 dedent(
2024 """
2025 >>> s = pd.Series([1, 2, 3, 4, 5])
2026 >>> s
2027 0 1
2028 1 2
2029 2 3
2030 3 4
2031 4 5
2032 dtype: int64
2034 >>> s.rolling(3).sum()
2035 0 NaN
2036 1 NaN
2037 2 6.0
2038 3 9.0
2039 4 12.0
2040 dtype: float64
2042 >>> s.rolling(3, center=True).sum()
2043 0 NaN
2044 1 6.0
2045 2 9.0
2046 3 12.0
2047 4 NaN
2048 dtype: float64
2050 For DataFrame, each sum is computed column-wise.
2052 >>> df = pd.DataFrame({{"A": s, "B": s ** 2}})
2053 >>> df
2054 A B
2055 0 1 1
2056 1 2 4
2057 2 3 9
2058 3 4 16
2059 4 5 25
2061 >>> df.rolling(3).sum()
2062 A B
2063 0 NaN NaN
2064 1 NaN NaN
2065 2 6.0 14.0
2066 3 9.0 29.0
2067 4 12.0 50.0
2068 """
2069 ).replace("\n", "", 1),
2070 window_method="rolling",
2071 aggregation_description="sum",
2072 agg_method="sum",
2073 )
2074 def sum(
2075 self,
2076 numeric_only: bool = False,
2077 *args,
2078 engine: str | None = None,
2079 engine_kwargs: dict[str, bool] | None = None,
2080 **kwargs,
2081 ):
2082 maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs)
2083 nv.validate_rolling_func("sum", args, kwargs)
2084 return super().sum(
2085 numeric_only=numeric_only,
2086 engine=engine,
2087 engine_kwargs=engine_kwargs,
2088 **kwargs,
2089 )
2091 @doc(
2092 template_header,
2093 create_section_header("Parameters"),
2094 kwargs_numeric_only,
2095 args_compat,
2096 window_agg_numba_parameters(),
2097 kwargs_compat,
2098 create_section_header("Returns"),
2099 template_returns,
2100 create_section_header("See Also"),
2101 template_see_also,
2102 create_section_header("Notes"),
2103 numba_notes[:-1],
2104 window_method="rolling",
2105 aggregation_description="maximum",
2106 agg_method="max",
2107 )
2108 def max(
2109 self,
2110 numeric_only: bool = False,
2111 *args,
2112 engine: str | None = None,
2113 engine_kwargs: dict[str, bool] | None = None,
2114 **kwargs,
2115 ):
2116 maybe_warn_args_and_kwargs(type(self), "max", args, kwargs)
2117 nv.validate_rolling_func("max", args, kwargs)
2118 return super().max(
2119 numeric_only=numeric_only,
2120 engine=engine,
2121 engine_kwargs=engine_kwargs,
2122 **kwargs,
2123 )
2125 @doc(
2126 template_header,
2127 create_section_header("Parameters"),
2128 kwargs_numeric_only,
2129 args_compat,
2130 window_agg_numba_parameters(),
2131 kwargs_compat,
2132 create_section_header("Returns"),
2133 template_returns,
2134 create_section_header("See Also"),
2135 template_see_also,
2136 create_section_header("Notes"),
2137 numba_notes,
2138 create_section_header("Examples"),
2139 dedent(
2140 """
2141 Performing a rolling minimum with a window size of 3.
2143 >>> s = pd.Series([4, 3, 5, 2, 6])
2144 >>> s.rolling(3).min()
2145 0 NaN
2146 1 NaN
2147 2 3.0
2148 3 2.0
2149 4 2.0
2150 dtype: float64
2151 """
2152 ).replace("\n", "", 1),
2153 window_method="rolling",
2154 aggregation_description="minimum",
2155 agg_method="min",
2156 )
2157 def min(
2158 self,
2159 numeric_only: bool = False,
2160 *args,
2161 engine: str | None = None,
2162 engine_kwargs: dict[str, bool] | None = None,
2163 **kwargs,
2164 ):
2165 maybe_warn_args_and_kwargs(type(self), "min", args, kwargs)
2166 nv.validate_rolling_func("min", args, kwargs)
2167 return super().min(
2168 numeric_only=numeric_only,
2169 engine=engine,
2170 engine_kwargs=engine_kwargs,
2171 **kwargs,
2172 )
2174 @doc(
2175 template_header,
2176 create_section_header("Parameters"),
2177 kwargs_numeric_only,
2178 args_compat,
2179 window_agg_numba_parameters(),
2180 kwargs_compat,
2181 create_section_header("Returns"),
2182 template_returns,
2183 create_section_header("See Also"),
2184 template_see_also,
2185 create_section_header("Notes"),
2186 numba_notes,
2187 create_section_header("Examples"),
2188 dedent(
2189 """
2190 The below examples will show rolling mean calculations with window sizes of
2191 two and three, respectively.
2193 >>> s = pd.Series([1, 2, 3, 4])
2194 >>> s.rolling(2).mean()
2195 0 NaN
2196 1 1.5
2197 2 2.5
2198 3 3.5
2199 dtype: float64
2201 >>> s.rolling(3).mean()
2202 0 NaN
2203 1 NaN
2204 2 2.0
2205 3 3.0
2206 dtype: float64
2207 """
2208 ).replace("\n", "", 1),
2209 window_method="rolling",
2210 aggregation_description="mean",
2211 agg_method="mean",
2212 )
2213 def mean(
2214 self,
2215 numeric_only: bool = False,
2216 *args,
2217 engine: str | None = None,
2218 engine_kwargs: dict[str, bool] | None = None,
2219 **kwargs,
2220 ):
2221 maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs)
2222 nv.validate_rolling_func("mean", args, kwargs)
2223 return super().mean(
2224 numeric_only=numeric_only,
2225 engine=engine,
2226 engine_kwargs=engine_kwargs,
2227 **kwargs,
2228 )
2230 @doc(
2231 template_header,
2232 create_section_header("Parameters"),
2233 kwargs_numeric_only,
2234 window_agg_numba_parameters(),
2235 kwargs_compat,
2236 create_section_header("Returns"),
2237 template_returns,
2238 create_section_header("See Also"),
2239 template_see_also,
2240 create_section_header("Notes"),
2241 numba_notes,
2242 create_section_header("Examples"),
2243 dedent(
2244 """
2245 Compute the rolling median of a series with a window size of 3.
2247 >>> s = pd.Series([0, 1, 2, 3, 4])
2248 >>> s.rolling(3).median()
2249 0 NaN
2250 1 NaN
2251 2 1.0
2252 3 2.0
2253 4 3.0
2254 dtype: float64
2255 """
2256 ).replace("\n", "", 1),
2257 window_method="rolling",
2258 aggregation_description="median",
2259 agg_method="median",
2260 )
2261 def median(
2262 self,
2263 numeric_only: bool = False,
2264 engine: str | None = None,
2265 engine_kwargs: dict[str, bool] | None = None,
2266 **kwargs,
2267 ):
2268 maybe_warn_args_and_kwargs(type(self), "median", None, kwargs)
2269 return super().median(
2270 numeric_only=numeric_only,
2271 engine=engine,
2272 engine_kwargs=engine_kwargs,
2273 **kwargs,
2274 )
2276 @doc(
2277 template_header,
2278 create_section_header("Parameters"),
2279 dedent(
2280 """
2281 ddof : int, default 1
2282 Delta Degrees of Freedom. The divisor used in calculations
2283 is ``N - ddof``, where ``N`` represents the number of elements.
2284 """
2285 ).replace("\n", "", 1),
2286 kwargs_numeric_only,
2287 args_compat,
2288 window_agg_numba_parameters("1.4"),
2289 kwargs_compat,
2290 create_section_header("Returns"),
2291 template_returns,
2292 create_section_header("See Also"),
2293 "numpy.std : Equivalent method for NumPy array.\n",
2294 template_see_also,
2295 create_section_header("Notes"),
2296 dedent(
2297 """
2298 The default ``ddof`` of 1 used in :meth:`Series.std` is different
2299 than the default ``ddof`` of 0 in :func:`numpy.std`.
2301 A minimum of one period is required for the rolling calculation.\n
2302 """
2303 ).replace("\n", "", 1),
2304 create_section_header("Examples"),
2305 dedent(
2306 """
2307 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
2308 >>> s.rolling(3).std()
2309 0 NaN
2310 1 NaN
2311 2 0.577350
2312 3 1.000000
2313 4 1.000000
2314 5 1.154701
2315 6 0.000000
2316 dtype: float64
2317 """
2318 ).replace("\n", "", 1),
2319 window_method="rolling",
2320 aggregation_description="standard deviation",
2321 agg_method="std",
2322 )
2323 def std(
2324 self,
2325 ddof: int = 1,
2326 numeric_only: bool = False,
2327 *args,
2328 engine: str | None = None,
2329 engine_kwargs: dict[str, bool] | None = None,
2330 **kwargs,
2331 ):
2332 maybe_warn_args_and_kwargs(type(self), "std", args, kwargs)
2333 nv.validate_rolling_func("std", args, kwargs)
2334 return super().std(
2335 ddof=ddof,
2336 numeric_only=numeric_only,
2337 engine=engine,
2338 engine_kwargs=engine_kwargs,
2339 **kwargs,
2340 )
2342 @doc(
2343 template_header,
2344 create_section_header("Parameters"),
2345 dedent(
2346 """
2347 ddof : int, default 1
2348 Delta Degrees of Freedom. The divisor used in calculations
2349 is ``N - ddof``, where ``N`` represents the number of elements.
2350 """
2351 ).replace("\n", "", 1),
2352 kwargs_numeric_only,
2353 args_compat,
2354 window_agg_numba_parameters("1.4"),
2355 kwargs_compat,
2356 create_section_header("Returns"),
2357 template_returns,
2358 create_section_header("See Also"),
2359 "numpy.var : Equivalent method for NumPy array.\n",
2360 template_see_also,
2361 create_section_header("Notes"),
2362 dedent(
2363 """
2364 The default ``ddof`` of 1 used in :meth:`Series.var` is different
2365 than the default ``ddof`` of 0 in :func:`numpy.var`.
2367 A minimum of one period is required for the rolling calculation.\n
2368 """
2369 ).replace("\n", "", 1),
2370 create_section_header("Examples"),
2371 dedent(
2372 """
2373 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
2374 >>> s.rolling(3).var()
2375 0 NaN
2376 1 NaN
2377 2 0.333333
2378 3 1.000000
2379 4 1.000000
2380 5 1.333333
2381 6 0.000000
2382 dtype: float64
2383 """
2384 ).replace("\n", "", 1),
2385 window_method="rolling",
2386 aggregation_description="variance",
2387 agg_method="var",
2388 )
2389 def var(
2390 self,
2391 ddof: int = 1,
2392 numeric_only: bool = False,
2393 *args,
2394 engine: str | None = None,
2395 engine_kwargs: dict[str, bool] | None = None,
2396 **kwargs,
2397 ):
2398 maybe_warn_args_and_kwargs(type(self), "var", args, kwargs)
2399 nv.validate_rolling_func("var", args, kwargs)
2400 return super().var(
2401 ddof=ddof,
2402 numeric_only=numeric_only,
2403 engine=engine,
2404 engine_kwargs=engine_kwargs,
2405 **kwargs,
2406 )
2408 @doc(
2409 template_header,
2410 create_section_header("Parameters"),
2411 kwargs_numeric_only,
2412 kwargs_compat,
2413 create_section_header("Returns"),
2414 template_returns,
2415 create_section_header("See Also"),
2416 "scipy.stats.skew : Third moment of a probability density.\n",
2417 template_see_also,
2418 create_section_header("Notes"),
2419 "A minimum of three periods is required for the rolling calculation.\n",
2420 window_method="rolling",
2421 aggregation_description="unbiased skewness",
2422 agg_method="skew",
2423 )
2424 def skew(self, numeric_only: bool = False, **kwargs):
2425 maybe_warn_args_and_kwargs(type(self), "skew", None, kwargs)
2426 return super().skew(numeric_only=numeric_only, **kwargs)
2428 @doc(
2429 template_header,
2430 create_section_header("Parameters"),
2431 dedent(
2432 """
2433 ddof : int, default 1
2434 Delta Degrees of Freedom. The divisor used in calculations
2435 is ``N - ddof``, where ``N`` represents the number of elements.
2436 """
2437 ).replace("\n", "", 1),
2438 kwargs_numeric_only,
2439 args_compat,
2440 kwargs_compat,
2441 create_section_header("Returns"),
2442 template_returns,
2443 create_section_header("See Also"),
2444 template_see_also,
2445 create_section_header("Notes"),
2446 "A minimum of one period is required for the calculation.\n\n",
2447 create_section_header("Examples"),
2448 dedent(
2449 """
2450 >>> s = pd.Series([0, 1, 2, 3])
2451 >>> s.rolling(2, min_periods=1).sem()
2452 0 NaN
2453 1 0.707107
2454 2 0.707107
2455 3 0.707107
2456 dtype: float64
2457 """
2458 ).replace("\n", "", 1),
2459 window_method="rolling",
2460 aggregation_description="standard error of mean",
2461 agg_method="sem",
2462 )
2463 def sem(self, ddof: int = 1, numeric_only: bool = False, *args, **kwargs):
2464 maybe_warn_args_and_kwargs(type(self), "sem", args, kwargs)
2465 nv.validate_rolling_func("sem", args, kwargs)
2466 # Raise here so error message says sem instead of std
2467 self._validate_numeric_only("sem", numeric_only)
2468 return self.std(numeric_only=numeric_only, **kwargs) / (
2469 self.count(numeric_only) - ddof
2470 ).pow(0.5)
2472 @doc(
2473 template_header,
2474 create_section_header("Parameters"),
2475 kwargs_numeric_only,
2476 kwargs_compat,
2477 create_section_header("Returns"),
2478 template_returns,
2479 create_section_header("See Also"),
2480 "scipy.stats.kurtosis : Reference SciPy method.\n",
2481 template_see_also,
2482 create_section_header("Notes"),
2483 "A minimum of four periods is required for the calculation.\n\n",
2484 create_section_header("Examples"),
2485 dedent(
2486 """
2487 The example below will show a rolling calculation with a window size of
2488 four matching the equivalent function call using `scipy.stats`.
2490 >>> arr = [1, 2, 3, 4, 999]
2491 >>> import scipy.stats
2492 >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")
2493 -1.200000
2494 >>> print(f"{{scipy.stats.kurtosis(arr[1:], bias=False):.6f}}")
2495 3.999946
2496 >>> s = pd.Series(arr)
2497 >>> s.rolling(4).kurt()
2498 0 NaN
2499 1 NaN
2500 2 NaN
2501 3 -1.200000
2502 4 3.999946
2503 dtype: float64
2504 """
2505 ).replace("\n", "", 1),
2506 window_method="rolling",
2507 aggregation_description="Fisher's definition of kurtosis without bias",
2508 agg_method="kurt",
2509 )
2510 def kurt(self, numeric_only: bool = False, **kwargs):
2511 maybe_warn_args_and_kwargs(type(self), "kurt", None, kwargs)
2512 return super().kurt(numeric_only=numeric_only, **kwargs)
2514 @doc(
2515 template_header,
2516 create_section_header("Parameters"),
2517 dedent(
2518 """
2519 quantile : float
2520 Quantile to compute. 0 <= quantile <= 1.
2521 interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}
2522 This optional parameter specifies the interpolation method to use,
2523 when the desired quantile lies between two data points `i` and `j`:
2525 * linear: `i + (j - i) * fraction`, where `fraction` is the
2526 fractional part of the index surrounded by `i` and `j`.
2527 * lower: `i`.
2528 * higher: `j`.
2529 * nearest: `i` or `j` whichever is nearest.
2530 * midpoint: (`i` + `j`) / 2.
2531 """
2532 ).replace("\n", "", 1),
2533 kwargs_numeric_only,
2534 kwargs_compat,
2535 create_section_header("Returns"),
2536 template_returns,
2537 create_section_header("See Also"),
2538 template_see_also,
2539 create_section_header("Examples"),
2540 dedent(
2541 """
2542 >>> s = pd.Series([1, 2, 3, 4])
2543 >>> s.rolling(2).quantile(.4, interpolation='lower')
2544 0 NaN
2545 1 1.0
2546 2 2.0
2547 3 3.0
2548 dtype: float64
2550 >>> s.rolling(2).quantile(.4, interpolation='midpoint')
2551 0 NaN
2552 1 1.5
2553 2 2.5
2554 3 3.5
2555 dtype: float64
2556 """
2557 ).replace("\n", "", 1),
2558 window_method="rolling",
2559 aggregation_description="quantile",
2560 agg_method="quantile",
2561 )
2562 def quantile(
2563 self,
2564 quantile: float,
2565 interpolation: QuantileInterpolation = "linear",
2566 numeric_only: bool = False,
2567 **kwargs,
2568 ):
2569 maybe_warn_args_and_kwargs(type(self), "quantile", None, kwargs)
2570 return super().quantile(
2571 quantile=quantile,
2572 interpolation=interpolation,
2573 numeric_only=numeric_only,
2574 **kwargs,
2575 )
2577 @doc(
2578 template_header,
2579 ".. versionadded:: 1.4.0 \n\n",
2580 create_section_header("Parameters"),
2581 dedent(
2582 """
2583 method : {{'average', 'min', 'max'}}, default 'average'
2584 How to rank the group of records that have the same value (i.e. ties):
2586 * average: average rank of the group
2587 * min: lowest rank in the group
2588 * max: highest rank in the group
2590 ascending : bool, default True
2591 Whether or not the elements should be ranked in ascending order.
2592 pct : bool, default False
2593 Whether or not to display the returned rankings in percentile
2594 form.
2595 """
2596 ).replace("\n", "", 1),
2597 kwargs_numeric_only,
2598 kwargs_compat,
2599 create_section_header("Returns"),
2600 template_returns,
2601 create_section_header("See Also"),
2602 template_see_also,
2603 create_section_header("Examples"),
2604 dedent(
2605 """
2606 >>> s = pd.Series([1, 4, 2, 3, 5, 3])
2607 >>> s.rolling(3).rank()
2608 0 NaN
2609 1 NaN
2610 2 2.0
2611 3 2.0
2612 4 3.0
2613 5 1.5
2614 dtype: float64
2616 >>> s.rolling(3).rank(method="max")
2617 0 NaN
2618 1 NaN
2619 2 2.0
2620 3 2.0
2621 4 3.0
2622 5 2.0
2623 dtype: float64
2625 >>> s.rolling(3).rank(method="min")
2626 0 NaN
2627 1 NaN
2628 2 2.0
2629 3 2.0
2630 4 3.0
2631 5 1.0
2632 dtype: float64
2633 """
2634 ).replace("\n", "", 1),
2635 window_method="rolling",
2636 aggregation_description="rank",
2637 agg_method="rank",
2638 )
2639 def rank(
2640 self,
2641 method: WindowingRankType = "average",
2642 ascending: bool = True,
2643 pct: bool = False,
2644 numeric_only: bool = False,
2645 **kwargs,
2646 ):
2647 maybe_warn_args_and_kwargs(type(self), "rank", None, kwargs)
2648 return super().rank(
2649 method=method,
2650 ascending=ascending,
2651 pct=pct,
2652 numeric_only=numeric_only,
2653 **kwargs,
2654 )
2656 @doc(
2657 template_header,
2658 create_section_header("Parameters"),
2659 dedent(
2660 """
2661 other : Series or DataFrame, optional
2662 If not supplied then will default to self and produce pairwise
2663 output.
2664 pairwise : bool, default None
2665 If False then only matching columns between self and other will be
2666 used and the output will be a DataFrame.
2667 If True then all pairwise combinations will be calculated and the
2668 output will be a MultiIndexed DataFrame in the case of DataFrame
2669 inputs. In the case of missing elements, only complete pairwise
2670 observations will be used.
2671 ddof : int, default 1
2672 Delta Degrees of Freedom. The divisor used in calculations
2673 is ``N - ddof``, where ``N`` represents the number of elements.
2674 """
2675 ).replace("\n", "", 1),
2676 kwargs_numeric_only,
2677 kwargs_compat,
2678 create_section_header("Returns"),
2679 template_returns,
2680 create_section_header("See Also"),
2681 template_see_also[:-1],
2682 window_method="rolling",
2683 aggregation_description="sample covariance",
2684 agg_method="cov",
2685 )
2686 def cov(
2687 self,
2688 other: DataFrame | Series | None = None,
2689 pairwise: bool | None = None,
2690 ddof: int = 1,
2691 numeric_only: bool = False,
2692 **kwargs,
2693 ):
2694 maybe_warn_args_and_kwargs(type(self), "cov", None, kwargs)
2695 return super().cov(
2696 other=other,
2697 pairwise=pairwise,
2698 ddof=ddof,
2699 numeric_only=numeric_only,
2700 **kwargs,
2701 )
2703 @doc(
2704 template_header,
2705 create_section_header("Parameters"),
2706 dedent(
2707 """
2708 other : Series or DataFrame, optional
2709 If not supplied then will default to self and produce pairwise
2710 output.
2711 pairwise : bool, default None
2712 If False then only matching columns between self and other will be
2713 used and the output will be a DataFrame.
2714 If True then all pairwise combinations will be calculated and the
2715 output will be a MultiIndexed DataFrame in the case of DataFrame
2716 inputs. In the case of missing elements, only complete pairwise
2717 observations will be used.
2718 ddof : int, default 1
2719 Delta Degrees of Freedom. The divisor used in calculations
2720 is ``N - ddof``, where ``N`` represents the number of elements.
2721 """
2722 ).replace("\n", "", 1),
2723 kwargs_numeric_only,
2724 kwargs_compat,
2725 create_section_header("Returns"),
2726 template_returns,
2727 create_section_header("See Also"),
2728 dedent(
2729 """
2730 cov : Similar method to calculate covariance.
2731 numpy.corrcoef : NumPy Pearson's correlation calculation.
2732 """
2733 ).replace("\n", "", 1),
2734 template_see_also,
2735 create_section_header("Notes"),
2736 dedent(
2737 """
2738 This function uses Pearson's definition of correlation
2739 (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
2741 When `other` is not specified, the output will be self correlation (e.g.
2742 all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
2743 set to `True`.
2745 Function will return ``NaN`` for correlations of equal valued sequences;
2746 this is the result of a 0/0 division error.
2748 When `pairwise` is set to `False`, only matching columns between `self` and
2749 `other` will be used.
2751 When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
2752 with the original index on the first level, and the `other` DataFrame
2753 columns on the second level.
2755 In the case of missing elements, only complete pairwise observations
2756 will be used.\n
2757 """
2758 ).replace("\n", "", 1),
2759 create_section_header("Examples"),
2760 dedent(
2761 """
2762 The below example shows a rolling calculation with a window size of
2763 four matching the equivalent function call using :meth:`numpy.corrcoef`.
2765 >>> v1 = [3, 3, 3, 5, 8]
2766 >>> v2 = [3, 4, 4, 4, 8]
2767 >>> # numpy returns a 2X2 array, the correlation coefficient
2768 >>> # is the number at entry [0][1]
2769 >>> print(f"{{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}}")
2770 0.333333
2771 >>> print(f"{{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}}")
2772 0.916949
2773 >>> s1 = pd.Series(v1)
2774 >>> s2 = pd.Series(v2)
2775 >>> s1.rolling(4).corr(s2)
2776 0 NaN
2777 1 NaN
2778 2 NaN
2779 3 0.333333
2780 4 0.916949
2781 dtype: float64
2783 The below example shows a similar rolling calculation on a
2784 DataFrame using the pairwise option.
2786 >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\
2787 [46., 31.], [50., 36.]])
2788 >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7))
2789 [[1. 0.6263001]
2790 [0.6263001 1. ]]
2791 >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7))
2792 [[1. 0.5553681]
2793 [0.5553681 1. ]]
2794 >>> df = pd.DataFrame(matrix, columns=['X','Y'])
2795 >>> df
2796 X Y
2797 0 51.0 35.0
2798 1 49.0 30.0
2799 2 47.0 32.0
2800 3 46.0 31.0
2801 4 50.0 36.0
2802 >>> df.rolling(4).corr(pairwise=True)
2803 X Y
2804 0 X NaN NaN
2805 Y NaN NaN
2806 1 X NaN NaN
2807 Y NaN NaN
2808 2 X NaN NaN
2809 Y NaN NaN
2810 3 X 1.000000 0.626300
2811 Y 0.626300 1.000000
2812 4 X 1.000000 0.555368
2813 Y 0.555368 1.000000
2814 """
2815 ).replace("\n", "", 1),
2816 window_method="rolling",
2817 aggregation_description="correlation",
2818 agg_method="corr",
2819 )
2820 def corr(
2821 self,
2822 other: DataFrame | Series | None = None,
2823 pairwise: bool | None = None,
2824 ddof: int = 1,
2825 numeric_only: bool = False,
2826 **kwargs,
2827 ):
2828 maybe_warn_args_and_kwargs(type(self), "corr", None, kwargs)
2829 return super().corr(
2830 other=other,
2831 pairwise=pairwise,
2832 ddof=ddof,
2833 numeric_only=numeric_only,
2834 **kwargs,
2835 )
2838Rolling.__doc__ = Window.__doc__
2841class RollingGroupby(BaseWindowGroupby, Rolling):
2842 """
2843 Provide a rolling groupby implementation.
2844 """
2846 _attributes = Rolling._attributes + BaseWindowGroupby._attributes
2848 def _get_window_indexer(self) -> GroupbyIndexer:
2849 """
2850 Return an indexer class that will compute the window start and end bounds
2852 Returns
2853 -------
2854 GroupbyIndexer
2855 """
2856 rolling_indexer: type[BaseIndexer]
2857 indexer_kwargs: dict[str, Any] | None = None
2858 index_array = self._index_array
2859 if isinstance(self.window, BaseIndexer):
2860 rolling_indexer = type(self.window)
2861 indexer_kwargs = self.window.__dict__.copy()
2862 assert isinstance(indexer_kwargs, dict) # for mypy
2863 # We'll be using the index of each group later
2864 indexer_kwargs.pop("index_array", None)
2865 window = self.window
2866 elif self._win_freq_i8 is not None:
2867 rolling_indexer = VariableWindowIndexer
2868 # error: Incompatible types in assignment (expression has type
2869 # "int", variable has type "BaseIndexer")
2870 window = self._win_freq_i8 # type: ignore[assignment]
2871 else:
2872 rolling_indexer = FixedWindowIndexer
2873 window = self.window
2874 window_indexer = GroupbyIndexer(
2875 index_array=index_array,
2876 window_size=window,
2877 groupby_indices=self._grouper.indices,
2878 window_indexer=rolling_indexer,
2879 indexer_kwargs=indexer_kwargs,
2880 )
2881 return window_indexer
2883 def _validate_datetimelike_monotonic(self):
2884 """
2885 Validate that each group in self._on is monotonic
2886 """
2887 # GH 46061
2888 if self._on.hasnans:
2889 self._raise_monotonic_error("values must not have NaT")
2890 for group_indices in self._grouper.indices.values():
2891 group_on = self._on.take(group_indices)
2892 if not (
2893 group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing
2894 ):
2895 on = "index" if self.on is None else self.on
2896 raise ValueError(
2897 f"Each group within {on} must be monotonic. "
2898 f"Sort the values in {on} first."
2899 )