Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/groupby/indexing.py: 21%

106 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from typing import ( 

4 TYPE_CHECKING, 

5 Iterable, 

6 Literal, 

7 cast, 

8) 

9 

10import numpy as np 

11 

12from pandas._typing import PositionalIndexer 

13from pandas.util._decorators import ( 

14 cache_readonly, 

15 doc, 

16) 

17 

18from pandas.core.dtypes.common import ( 

19 is_integer, 

20 is_list_like, 

21) 

22 

23if TYPE_CHECKING: 23 ↛ 24line 23 didn't jump to line 24, because the condition on line 23 was never true

24 from pandas import ( 

25 DataFrame, 

26 Series, 

27 ) 

28 from pandas.core.groupby import groupby 

29 

30 

31class GroupByIndexingMixin: 

32 """ 

33 Mixin for adding ._positional_selector to GroupBy. 

34 """ 

35 

36 @cache_readonly 

37 def _positional_selector(self) -> GroupByPositionalSelector: 

38 """ 

39 Return positional selection for each group. 

40 

41 ``groupby._positional_selector[i:j]`` is similar to 

42 ``groupby.apply(lambda x: x.iloc[i:j])`` 

43 but much faster and preserves the original index and order. 

44 

45 ``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head` 

46 and :meth:`~GroupBy.tail`. For example: 

47 

48 - ``head(5)`` 

49 - ``_positional_selector[5:-5]`` 

50 - ``tail(5)`` 

51 

52 together return all the rows. 

53 

54 Allowed inputs for the index are: 

55 

56 - An integer valued iterable, e.g. ``range(2, 4)``. 

57 - A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``. 

58 

59 The output format is the same as :meth:`~GroupBy.head` and 

60 :meth:`~GroupBy.tail`, namely 

61 a subset of the ``DataFrame`` or ``Series`` with the index and order preserved. 

62 

63 Returns 

64 ------- 

65 Series 

66 The filtered subset of the original Series. 

67 DataFrame 

68 The filtered subset of the original DataFrame. 

69 

70 See Also 

71 -------- 

72 DataFrame.iloc : Purely integer-location based indexing for selection by 

73 position. 

74 GroupBy.head : Return first n rows of each group. 

75 GroupBy.tail : Return last n rows of each group. 

76 GroupBy.nth : Take the nth row from each group if n is an int, or a 

77 subset of rows, if n is a list of ints. 

78 

79 Notes 

80 ----- 

81 - The slice step cannot be negative. 

82 - If the index specification results in overlaps, the item is not duplicated. 

83 - If the index specification changes the order of items, then 

84 they are returned in their original order. 

85 By contrast, ``DataFrame.iloc`` can change the row order. 

86 - ``groupby()`` parameters such as as_index and dropna are ignored. 

87 

88 The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth` 

89 with ``as_index=False`` are: 

90 

91 - Input to ``_positional_selector`` can include 

92 one or more slices whereas ``nth`` 

93 just handles an integer or a list of integers. 

94 - ``_positional_selector`` can accept a slice relative to the 

95 last row of each group. 

96 - ``_positional_selector`` does not have an equivalent to the 

97 ``nth()`` ``dropna`` parameter. 

98 

99 Examples 

100 -------- 

101 >>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]], 

102 ... columns=["A", "B"]) 

103 >>> df.groupby("A")._positional_selector[1:2] 

104 A B 

105 1 a 2 

106 4 b 5 

107 

108 >>> df.groupby("A")._positional_selector[1, -1] 

109 A B 

110 1 a 2 

111 2 a 3 

112 4 b 5 

113 """ 

114 if TYPE_CHECKING: 

115 groupby_self = cast(groupby.GroupBy, self) 

116 else: 

117 groupby_self = self 

118 

119 return GroupByPositionalSelector(groupby_self) 

120 

121 def _make_mask_from_positional_indexer( 

122 self, 

123 arg: PositionalIndexer | tuple, 

124 ) -> np.ndarray: 

125 if is_list_like(arg): 

126 if all(is_integer(i) for i in cast(Iterable, arg)): 

127 mask = self._make_mask_from_list(cast(Iterable[int], arg)) 

128 else: 

129 mask = self._make_mask_from_tuple(cast(tuple, arg)) 

130 

131 elif isinstance(arg, slice): 

132 mask = self._make_mask_from_slice(arg) 

133 elif is_integer(arg): 

134 mask = self._make_mask_from_int(cast(int, arg)) 

135 else: 

136 raise TypeError( 

137 f"Invalid index {type(arg)}. " 

138 "Must be integer, list-like, slice or a tuple of " 

139 "integers and slices" 

140 ) 

141 

142 if isinstance(mask, bool): 

143 if mask: 

144 mask = self._ascending_count >= 0 

145 else: 

146 mask = self._ascending_count < 0 

147 

148 return cast(np.ndarray, mask) 

149 

150 def _make_mask_from_int(self, arg: int) -> np.ndarray: 

151 if arg >= 0: 

152 return self._ascending_count == arg 

153 else: 

154 return self._descending_count == (-arg - 1) 

155 

156 def _make_mask_from_list(self, args: Iterable[int]) -> bool | np.ndarray: 

157 positive = [arg for arg in args if arg >= 0] 

158 negative = [-arg - 1 for arg in args if arg < 0] 

159 

160 mask: bool | np.ndarray = False 

161 

162 if positive: 

163 mask |= np.isin(self._ascending_count, positive) 

164 

165 if negative: 

166 mask |= np.isin(self._descending_count, negative) 

167 

168 return mask 

169 

170 def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray: 

171 mask: bool | np.ndarray = False 

172 

173 for arg in args: 

174 if is_integer(arg): 

175 mask |= self._make_mask_from_int(cast(int, arg)) 

176 elif isinstance(arg, slice): 

177 mask |= self._make_mask_from_slice(arg) 

178 else: 

179 raise ValueError( 

180 f"Invalid argument {type(arg)}. Should be int or slice." 

181 ) 

182 

183 return mask 

184 

185 def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray: 

186 start = arg.start 

187 stop = arg.stop 

188 step = arg.step 

189 

190 if step is not None and step < 0: 

191 raise ValueError(f"Invalid step {step}. Must be non-negative") 

192 

193 mask: bool | np.ndarray = True 

194 

195 if step is None: 

196 step = 1 

197 

198 if start is None: 

199 if step > 1: 

200 mask &= self._ascending_count % step == 0 

201 

202 elif start >= 0: 

203 mask &= self._ascending_count >= start 

204 

205 if step > 1: 

206 mask &= (self._ascending_count - start) % step == 0 

207 

208 else: 

209 mask &= self._descending_count < -start 

210 

211 offset_array = self._descending_count + start + 1 

212 limit_array = ( 

213 self._ascending_count + self._descending_count + (start + 1) 

214 ) < 0 

215 offset_array = np.where(limit_array, self._ascending_count, offset_array) 

216 

217 mask &= offset_array % step == 0 

218 

219 if stop is not None: 

220 if stop >= 0: 

221 mask &= self._ascending_count < stop 

222 else: 

223 mask &= self._descending_count >= -stop 

224 

225 return mask 

226 

227 @cache_readonly 

228 def _ascending_count(self) -> np.ndarray: 

229 if TYPE_CHECKING: 

230 groupby_self = cast(groupby.GroupBy, self) 

231 else: 

232 groupby_self = self 

233 

234 return groupby_self._cumcount_array() 

235 

236 @cache_readonly 

237 def _descending_count(self) -> np.ndarray: 

238 if TYPE_CHECKING: 

239 groupby_self = cast(groupby.GroupBy, self) 

240 else: 

241 groupby_self = self 

242 

243 return groupby_self._cumcount_array(ascending=False) 

244 

245 

246@doc(GroupByIndexingMixin._positional_selector) 

247class GroupByPositionalSelector: 

248 def __init__(self, groupby_object: groupby.GroupBy) -> None: 

249 self.groupby_object = groupby_object 

250 

251 def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series: 

252 """ 

253 Select by positional index per group. 

254 

255 Implements GroupBy._positional_selector 

256 

257 Parameters 

258 ---------- 

259 arg : PositionalIndexer | tuple 

260 Allowed values are: 

261 - int 

262 - int valued iterable such as list or range 

263 - slice with step either None or positive 

264 - tuple of integers and slices 

265 

266 Returns 

267 ------- 

268 Series 

269 The filtered subset of the original groupby Series. 

270 DataFrame 

271 The filtered subset of the original groupby DataFrame. 

272 

273 See Also 

274 -------- 

275 DataFrame.iloc : Integer-location based indexing for selection by position. 

276 GroupBy.head : Return first n rows of each group. 

277 GroupBy.tail : Return last n rows of each group. 

278 GroupBy._positional_selector : Return positional selection for each group. 

279 GroupBy.nth : Take the nth row from each group if n is an int, or a 

280 subset of rows, if n is a list of ints. 

281 """ 

282 self.groupby_object._reset_group_selection() 

283 mask = self.groupby_object._make_mask_from_positional_indexer(arg) 

284 return self.groupby_object._mask_selected_obj(mask) 

285 

286 

287class GroupByNthSelector: 

288 """ 

289 Dynamically substituted for GroupBy.nth to enable both call and index 

290 """ 

291 

292 def __init__(self, groupby_object: groupby.GroupBy) -> None: 

293 self.groupby_object = groupby_object 

294 

295 def __call__( 

296 self, 

297 n: PositionalIndexer | tuple, 

298 dropna: Literal["any", "all", None] = None, 

299 ) -> DataFrame | Series: 

300 return self.groupby_object._nth(n, dropna) 

301 

302 def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series: 

303 return self.groupby_object._nth(n)