Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/groupby/indexing.py: 21%
106 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from typing import (
4 TYPE_CHECKING,
5 Iterable,
6 Literal,
7 cast,
8)
10import numpy as np
12from pandas._typing import PositionalIndexer
13from pandas.util._decorators import (
14 cache_readonly,
15 doc,
16)
18from pandas.core.dtypes.common import (
19 is_integer,
20 is_list_like,
21)
23if TYPE_CHECKING: 23 ↛ 24line 23 didn't jump to line 24, because the condition on line 23 was never true
24 from pandas import (
25 DataFrame,
26 Series,
27 )
28 from pandas.core.groupby import groupby
31class GroupByIndexingMixin:
32 """
33 Mixin for adding ._positional_selector to GroupBy.
34 """
36 @cache_readonly
37 def _positional_selector(self) -> GroupByPositionalSelector:
38 """
39 Return positional selection for each group.
41 ``groupby._positional_selector[i:j]`` is similar to
42 ``groupby.apply(lambda x: x.iloc[i:j])``
43 but much faster and preserves the original index and order.
45 ``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head`
46 and :meth:`~GroupBy.tail`. For example:
48 - ``head(5)``
49 - ``_positional_selector[5:-5]``
50 - ``tail(5)``
52 together return all the rows.
54 Allowed inputs for the index are:
56 - An integer valued iterable, e.g. ``range(2, 4)``.
57 - A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``.
59 The output format is the same as :meth:`~GroupBy.head` and
60 :meth:`~GroupBy.tail`, namely
61 a subset of the ``DataFrame`` or ``Series`` with the index and order preserved.
63 Returns
64 -------
65 Series
66 The filtered subset of the original Series.
67 DataFrame
68 The filtered subset of the original DataFrame.
70 See Also
71 --------
72 DataFrame.iloc : Purely integer-location based indexing for selection by
73 position.
74 GroupBy.head : Return first n rows of each group.
75 GroupBy.tail : Return last n rows of each group.
76 GroupBy.nth : Take the nth row from each group if n is an int, or a
77 subset of rows, if n is a list of ints.
79 Notes
80 -----
81 - The slice step cannot be negative.
82 - If the index specification results in overlaps, the item is not duplicated.
83 - If the index specification changes the order of items, then
84 they are returned in their original order.
85 By contrast, ``DataFrame.iloc`` can change the row order.
86 - ``groupby()`` parameters such as as_index and dropna are ignored.
88 The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth`
89 with ``as_index=False`` are:
91 - Input to ``_positional_selector`` can include
92 one or more slices whereas ``nth``
93 just handles an integer or a list of integers.
94 - ``_positional_selector`` can accept a slice relative to the
95 last row of each group.
96 - ``_positional_selector`` does not have an equivalent to the
97 ``nth()`` ``dropna`` parameter.
99 Examples
100 --------
101 >>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]],
102 ... columns=["A", "B"])
103 >>> df.groupby("A")._positional_selector[1:2]
104 A B
105 1 a 2
106 4 b 5
108 >>> df.groupby("A")._positional_selector[1, -1]
109 A B
110 1 a 2
111 2 a 3
112 4 b 5
113 """
114 if TYPE_CHECKING:
115 groupby_self = cast(groupby.GroupBy, self)
116 else:
117 groupby_self = self
119 return GroupByPositionalSelector(groupby_self)
121 def _make_mask_from_positional_indexer(
122 self,
123 arg: PositionalIndexer | tuple,
124 ) -> np.ndarray:
125 if is_list_like(arg):
126 if all(is_integer(i) for i in cast(Iterable, arg)):
127 mask = self._make_mask_from_list(cast(Iterable[int], arg))
128 else:
129 mask = self._make_mask_from_tuple(cast(tuple, arg))
131 elif isinstance(arg, slice):
132 mask = self._make_mask_from_slice(arg)
133 elif is_integer(arg):
134 mask = self._make_mask_from_int(cast(int, arg))
135 else:
136 raise TypeError(
137 f"Invalid index {type(arg)}. "
138 "Must be integer, list-like, slice or a tuple of "
139 "integers and slices"
140 )
142 if isinstance(mask, bool):
143 if mask:
144 mask = self._ascending_count >= 0
145 else:
146 mask = self._ascending_count < 0
148 return cast(np.ndarray, mask)
150 def _make_mask_from_int(self, arg: int) -> np.ndarray:
151 if arg >= 0:
152 return self._ascending_count == arg
153 else:
154 return self._descending_count == (-arg - 1)
156 def _make_mask_from_list(self, args: Iterable[int]) -> bool | np.ndarray:
157 positive = [arg for arg in args if arg >= 0]
158 negative = [-arg - 1 for arg in args if arg < 0]
160 mask: bool | np.ndarray = False
162 if positive:
163 mask |= np.isin(self._ascending_count, positive)
165 if negative:
166 mask |= np.isin(self._descending_count, negative)
168 return mask
170 def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray:
171 mask: bool | np.ndarray = False
173 for arg in args:
174 if is_integer(arg):
175 mask |= self._make_mask_from_int(cast(int, arg))
176 elif isinstance(arg, slice):
177 mask |= self._make_mask_from_slice(arg)
178 else:
179 raise ValueError(
180 f"Invalid argument {type(arg)}. Should be int or slice."
181 )
183 return mask
185 def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray:
186 start = arg.start
187 stop = arg.stop
188 step = arg.step
190 if step is not None and step < 0:
191 raise ValueError(f"Invalid step {step}. Must be non-negative")
193 mask: bool | np.ndarray = True
195 if step is None:
196 step = 1
198 if start is None:
199 if step > 1:
200 mask &= self._ascending_count % step == 0
202 elif start >= 0:
203 mask &= self._ascending_count >= start
205 if step > 1:
206 mask &= (self._ascending_count - start) % step == 0
208 else:
209 mask &= self._descending_count < -start
211 offset_array = self._descending_count + start + 1
212 limit_array = (
213 self._ascending_count + self._descending_count + (start + 1)
214 ) < 0
215 offset_array = np.where(limit_array, self._ascending_count, offset_array)
217 mask &= offset_array % step == 0
219 if stop is not None:
220 if stop >= 0:
221 mask &= self._ascending_count < stop
222 else:
223 mask &= self._descending_count >= -stop
225 return mask
227 @cache_readonly
228 def _ascending_count(self) -> np.ndarray:
229 if TYPE_CHECKING:
230 groupby_self = cast(groupby.GroupBy, self)
231 else:
232 groupby_self = self
234 return groupby_self._cumcount_array()
236 @cache_readonly
237 def _descending_count(self) -> np.ndarray:
238 if TYPE_CHECKING:
239 groupby_self = cast(groupby.GroupBy, self)
240 else:
241 groupby_self = self
243 return groupby_self._cumcount_array(ascending=False)
246@doc(GroupByIndexingMixin._positional_selector)
247class GroupByPositionalSelector:
248 def __init__(self, groupby_object: groupby.GroupBy) -> None:
249 self.groupby_object = groupby_object
251 def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series:
252 """
253 Select by positional index per group.
255 Implements GroupBy._positional_selector
257 Parameters
258 ----------
259 arg : PositionalIndexer | tuple
260 Allowed values are:
261 - int
262 - int valued iterable such as list or range
263 - slice with step either None or positive
264 - tuple of integers and slices
266 Returns
267 -------
268 Series
269 The filtered subset of the original groupby Series.
270 DataFrame
271 The filtered subset of the original groupby DataFrame.
273 See Also
274 --------
275 DataFrame.iloc : Integer-location based indexing for selection by position.
276 GroupBy.head : Return first n rows of each group.
277 GroupBy.tail : Return last n rows of each group.
278 GroupBy._positional_selector : Return positional selection for each group.
279 GroupBy.nth : Take the nth row from each group if n is an int, or a
280 subset of rows, if n is a list of ints.
281 """
282 self.groupby_object._reset_group_selection()
283 mask = self.groupby_object._make_mask_from_positional_indexer(arg)
284 return self.groupby_object._mask_selected_obj(mask)
287class GroupByNthSelector:
288 """
289 Dynamically substituted for GroupBy.nth to enable both call and index
290 """
292 def __init__(self, groupby_object: groupby.GroupBy) -> None:
293 self.groupby_object = groupby_object
295 def __call__(
296 self,
297 n: PositionalIndexer | tuple,
298 dropna: Literal["any", "all", None] = None,
299 ) -> DataFrame | Series:
300 return self.groupby_object._nth(n, dropna)
302 def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series:
303 return self.groupby_object._nth(n)