Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/groupby/indexing.py: 21%

1from __future__ import annotations

3from typing import (

4 TYPE_CHECKING,

5 Iterable,

6 Literal,

7 cast,

10import numpy as np

12from pandas._typing import PositionalIndexer

13from pandas.util._decorators import (

14 cache_readonly,

15 doc,

16)

18from pandas.core.dtypes.common import (

19 is_integer,

20 is_list_like,

21)

23if TYPE_CHECKING: 23 ↛ 24line 23 didn't jump to line 24, because the condition on line 23 was never true

24 from pandas import (

25 DataFrame,

26 Series,

27 )

28 from pandas.core.groupby import groupby

31class GroupByIndexingMixin:

32 """

33 Mixin for adding ._positional_selector to GroupBy.

34 """

36 @cache_readonly

37 def _positional_selector(self) -> GroupByPositionalSelector:

38 """

39 Return positional selection for each group.

41 ``groupby._positional_selector[i:j]`` is similar to

42 ``groupby.apply(lambda x: x.iloc[i:j])``

43 but much faster and preserves the original index and order.

45 ``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head`

46 and :meth:`~GroupBy.tail`. For example:

48 - ``head(5)``

49 - ``_positional_selector[5:-5]``

50 - ``tail(5)``

52 together return all the rows.

54 Allowed inputs for the index are:

56 - An integer valued iterable, e.g. ``range(2, 4)``.

57 - A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``.

59 The output format is the same as :meth:`~GroupBy.head` and

60 :meth:`~GroupBy.tail`, namely

61 a subset of the ``DataFrame`` or ``Series`` with the index and order preserved.

63 Returns

64 -------

65 Series

66 The filtered subset of the original Series.

67 DataFrame

68 The filtered subset of the original DataFrame.

70 See Also

71 --------

72 DataFrame.iloc : Purely integer-location based indexing for selection by

73 position.

74 GroupBy.head : Return first n rows of each group.

75 GroupBy.tail : Return last n rows of each group.

76 GroupBy.nth : Take the nth row from each group if n is an int, or a

77 subset of rows, if n is a list of ints.

79 Notes

80 -----

81 - The slice step cannot be negative.

82 - If the index specification results in overlaps, the item is not duplicated.

83 - If the index specification changes the order of items, then

84 they are returned in their original order.

85 By contrast, ``DataFrame.iloc`` can change the row order.

86 - ``groupby()`` parameters such as as_index and dropna are ignored.

88 The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth`

89 with ``as_index=False`` are:

91 - Input to ``_positional_selector`` can include

92 one or more slices whereas ``nth``

93 just handles an integer or a list of integers.

94 - ``_positional_selector`` can accept a slice relative to the

95 last row of each group.

96 - ``_positional_selector`` does not have an equivalent to the

97 ``nth()`` ``dropna`` parameter.

99 Examples

100 --------

101 >>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]],

102 ... columns=["A", "B"])

103 >>> df.groupby("A")._positional_selector[1:2]

104 A B

105 1 a 2

106 4 b 5

107

108 >>> df.groupby("A")._positional_selector[1, -1]

109 A B

110 1 a 2

111 2 a 3

112 4 b 5

113 """

114 if TYPE_CHECKING:

115 groupby_self = cast(groupby.GroupBy, self)

116 else:

117 groupby_self = self

118

119 return GroupByPositionalSelector(groupby_self)

120

121 def _make_mask_from_positional_indexer(

122 self,

123 arg: PositionalIndexer | tuple,

124 ) -> np.ndarray:

125 if is_list_like(arg):

126 if all(is_integer(i) for i in cast(Iterable, arg)):

127 mask = self._make_mask_from_list(cast(Iterable[int], arg))

128 else:

129 mask = self._make_mask_from_tuple(cast(tuple, arg))

130

131 elif isinstance(arg, slice):

132 mask = self._make_mask_from_slice(arg)

133 elif is_integer(arg):

134 mask = self._make_mask_from_int(cast(int, arg))

135 else:

136 raise TypeError(

137 f"Invalid index {type(arg)}. "

138 "Must be integer, list-like, slice or a tuple of "

139 "integers and slices"

140 )

141

142 if isinstance(mask, bool):

143 if mask:

144 mask = self._ascending_count >= 0

145 else:

146 mask = self._ascending_count < 0

147

148 return cast(np.ndarray, mask)

149

150 def _make_mask_from_int(self, arg: int) -> np.ndarray:

151 if arg >= 0:

152 return self._ascending_count == arg

153 else:

154 return self._descending_count == (-arg - 1)

155

156 def _make_mask_from_list(self, args: Iterable[int]) -> bool | np.ndarray:

157 positive = [arg for arg in args if arg >= 0]

158 negative = [-arg - 1 for arg in args if arg < 0]

159

160 mask: bool | np.ndarray = False

161

162 if positive:

163 mask |= np.isin(self._ascending_count, positive)

164

165 if negative:

166 mask |= np.isin(self._descending_count, negative)

167

168 return mask

169

170 def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray:

171 mask: bool | np.ndarray = False

172

173 for arg in args:

174 if is_integer(arg):

175 mask |= self._make_mask_from_int(cast(int, arg))

176 elif isinstance(arg, slice):

177 mask |= self._make_mask_from_slice(arg)

178 else:

179 raise ValueError(

180 f"Invalid argument {type(arg)}. Should be int or slice."

181 )

182

183 return mask

184

185 def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray:

186 start = arg.start

187 stop = arg.stop

188 step = arg.step

189

190 if step is not None and step < 0:

191 raise ValueError(f"Invalid step {step}. Must be non-negative")

192

193 mask: bool | np.ndarray = True

194

195 if step is None:

196 step = 1

197

198 if start is None:

199 if step > 1:

200 mask &= self._ascending_count % step == 0

201

202 elif start >= 0:

203 mask &= self._ascending_count >= start

204

205 if step > 1:

206 mask &= (self._ascending_count - start) % step == 0

207

208 else:

209 mask &= self._descending_count < -start

210

211 offset_array = self._descending_count + start + 1

212 limit_array = (

213 self._ascending_count + self._descending_count + (start + 1)

214 ) < 0

215 offset_array = np.where(limit_array, self._ascending_count, offset_array)

216

217 mask &= offset_array % step == 0

218

219 if stop is not None:

220 if stop >= 0:

221 mask &= self._ascending_count < stop

222 else:

223 mask &= self._descending_count >= -stop

224

225 return mask

226

227 @cache_readonly

228 def _ascending_count(self) -> np.ndarray:

229 if TYPE_CHECKING:

230 groupby_self = cast(groupby.GroupBy, self)

231 else:

232 groupby_self = self

233

234 return groupby_self._cumcount_array()

235

236 @cache_readonly

237 def _descending_count(self) -> np.ndarray:

238 if TYPE_CHECKING:

239 groupby_self = cast(groupby.GroupBy, self)

240 else:

241 groupby_self = self

242

243 return groupby_self._cumcount_array(ascending=False)

244

245

246@doc(GroupByIndexingMixin._positional_selector)

247class GroupByPositionalSelector:

248 def __init__(self, groupby_object: groupby.GroupBy) -> None:

249 self.groupby_object = groupby_object

250

251 def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series:

252 """

253 Select by positional index per group.

254

255 Implements GroupBy._positional_selector

256

257 Parameters

258 ----------

259 arg : PositionalIndexer | tuple

260 Allowed values are:

261 - int

262 - int valued iterable such as list or range

263 - slice with step either None or positive

264 - tuple of integers and slices

265

266 Returns

267 -------

268 Series

269 The filtered subset of the original groupby Series.

270 DataFrame

271 The filtered subset of the original groupby DataFrame.

272

273 See Also

274 --------

275 DataFrame.iloc : Integer-location based indexing for selection by position.

276 GroupBy.head : Return first n rows of each group.

277 GroupBy.tail : Return last n rows of each group.

278 GroupBy._positional_selector : Return positional selection for each group.

279 GroupBy.nth : Take the nth row from each group if n is an int, or a

280 subset of rows, if n is a list of ints.

281 """

282 self.groupby_object._reset_group_selection()

283 mask = self.groupby_object._make_mask_from_positional_indexer(arg)

284 return self.groupby_object._mask_selected_obj(mask)

285

286

287class GroupByNthSelector:

288 """

289 Dynamically substituted for GroupBy.nth to enable both call and index

290 """

291

292 def __init__(self, groupby_object: groupby.GroupBy) -> None:

293 self.groupby_object = groupby_object

294

295 def __call__(

296 self,

297 n: PositionalIndexer | tuple,

298 dropna: Literal["any", "all", None] = None,

299 ) -> DataFrame | Series:

300 return self.groupby_object._nth(n, dropna)

301

302 def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series:

303 return self.groupby_object._nth(n)