Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/groupby/grouper.py: 15%
350 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Provide user facing operators for doing the split part of the
3split-apply-combine paradigm.
4"""
5from __future__ import annotations
7from typing import (
8 TYPE_CHECKING,
9 Any,
10 Hashable,
11 final,
12)
13import warnings
15import numpy as np
17from pandas._typing import (
18 ArrayLike,
19 NDFrameT,
20 npt,
21)
22from pandas.errors import InvalidIndexError
23from pandas.util._decorators import cache_readonly
24from pandas.util._exceptions import find_stack_level
26from pandas.core.dtypes.cast import sanitize_to_nanoseconds
27from pandas.core.dtypes.common import (
28 is_categorical_dtype,
29 is_list_like,
30 is_scalar,
31)
33import pandas.core.algorithms as algorithms
34from pandas.core.arrays import (
35 Categorical,
36 ExtensionArray,
37)
38import pandas.core.common as com
39from pandas.core.frame import DataFrame
40from pandas.core.groupby import ops
41from pandas.core.groupby.categorical import (
42 recode_for_groupby,
43 recode_from_groupby,
44)
45from pandas.core.indexes.api import (
46 CategoricalIndex,
47 Index,
48 MultiIndex,
49)
50from pandas.core.series import Series
52from pandas.io.formats.printing import pprint_thing
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 from pandas.core.generic import NDFrame
58class Grouper:
59 """
60 A Grouper allows the user to specify a groupby instruction for an object.
62 This specification will select a column via the key parameter, or if the
63 level and/or axis parameters are given, a level of the index of the target
64 object.
66 If `axis` and/or `level` are passed as keywords to both `Grouper` and
67 `groupby`, the values passed to `Grouper` take precedence.
69 Parameters
70 ----------
71 key : str, defaults to None
72 Groupby key, which selects the grouping column of the target.
73 level : name/number, defaults to None
74 The level for the target index.
75 freq : str / frequency object, defaults to None
76 This will groupby the specified frequency if the target selection
77 (via key or level) is a datetime-like object. For full specification
78 of available frequencies, please see `here
79 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
80 axis : str, int, defaults to 0
81 Number/name of the axis.
82 sort : bool, default to False
83 Whether to sort the resulting labels.
84 closed : {'left' or 'right'}
85 Closed end of interval. Only when `freq` parameter is passed.
86 label : {'left' or 'right'}
87 Interval boundary to use for labeling.
88 Only when `freq` parameter is passed.
89 convention : {'start', 'end', 'e', 's'}
90 If grouper is PeriodIndex and `freq` parameter is passed.
91 base : int, default 0
92 Only when `freq` parameter is passed.
93 For frequencies that evenly subdivide 1 day, the "origin" of the
94 aggregated intervals. For example, for '5min' frequency, base could
95 range from 0 through 4. Defaults to 0.
97 .. deprecated:: 1.1.0
98 The new arguments that you should use are 'offset' or 'origin'.
100 loffset : str, DateOffset, timedelta object
101 Only when `freq` parameter is passed.
103 .. deprecated:: 1.1.0
104 loffset is only working for ``.resample(...)`` and not for
105 Grouper (:issue:`28302`).
106 However, loffset is also deprecated for ``.resample(...)``
107 See: :class:`DataFrame.resample`
109 origin : Timestamp or str, default 'start_day'
110 The timestamp on which to adjust the grouping. The timezone of origin must
111 match the timezone of the index.
112 If string, must be one of the following:
114 - 'epoch': `origin` is 1970-01-01
115 - 'start': `origin` is the first value of the timeseries
116 - 'start_day': `origin` is the first day at midnight of the timeseries
118 .. versionadded:: 1.1.0
120 - 'end': `origin` is the last value of the timeseries
121 - 'end_day': `origin` is the ceiling midnight of the last day
123 .. versionadded:: 1.3.0
125 offset : Timedelta or str, default is None
126 An offset timedelta added to the origin.
128 .. versionadded:: 1.1.0
130 dropna : bool, default True
131 If True, and if group keys contain NA values, NA values together with
132 row/column will be dropped. If False, NA values will also be treated as
133 the key in groups.
135 .. versionadded:: 1.2.0
137 Returns
138 -------
139 A specification for a groupby instruction
141 Examples
142 --------
143 Syntactic sugar for ``df.groupby('A')``
145 >>> df = pd.DataFrame(
146 ... {
147 ... "Animal": ["Falcon", "Parrot", "Falcon", "Falcon", "Parrot"],
148 ... "Speed": [100, 5, 200, 300, 15],
149 ... }
150 ... )
151 >>> df
152 Animal Speed
153 0 Falcon 100
154 1 Parrot 5
155 2 Falcon 200
156 3 Falcon 300
157 4 Parrot 15
158 >>> df.groupby(pd.Grouper(key="Animal")).mean()
159 Speed
160 Animal
161 Falcon 200.0
162 Parrot 10.0
164 Specify a resample operation on the column 'Publish date'
166 >>> df = pd.DataFrame(
167 ... {
168 ... "Publish date": [
169 ... pd.Timestamp("2000-01-02"),
170 ... pd.Timestamp("2000-01-02"),
171 ... pd.Timestamp("2000-01-09"),
172 ... pd.Timestamp("2000-01-16")
173 ... ],
174 ... "ID": [0, 1, 2, 3],
175 ... "Price": [10, 20, 30, 40]
176 ... }
177 ... )
178 >>> df
179 Publish date ID Price
180 0 2000-01-02 0 10
181 1 2000-01-02 1 20
182 2 2000-01-09 2 30
183 3 2000-01-16 3 40
184 >>> df.groupby(pd.Grouper(key="Publish date", freq="1W")).mean()
185 ID Price
186 Publish date
187 2000-01-02 0.5 15.0
188 2000-01-09 2.0 30.0
189 2000-01-16 3.0 40.0
191 If you want to adjust the start of the bins based on a fixed timestamp:
193 >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00'
194 >>> rng = pd.date_range(start, end, freq='7min')
195 >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng)
196 >>> ts
197 2000-10-01 23:30:00 0
198 2000-10-01 23:37:00 3
199 2000-10-01 23:44:00 6
200 2000-10-01 23:51:00 9
201 2000-10-01 23:58:00 12
202 2000-10-02 00:05:00 15
203 2000-10-02 00:12:00 18
204 2000-10-02 00:19:00 21
205 2000-10-02 00:26:00 24
206 Freq: 7T, dtype: int64
208 >>> ts.groupby(pd.Grouper(freq='17min')).sum()
209 2000-10-01 23:14:00 0
210 2000-10-01 23:31:00 9
211 2000-10-01 23:48:00 21
212 2000-10-02 00:05:00 54
213 2000-10-02 00:22:00 24
214 Freq: 17T, dtype: int64
216 >>> ts.groupby(pd.Grouper(freq='17min', origin='epoch')).sum()
217 2000-10-01 23:18:00 0
218 2000-10-01 23:35:00 18
219 2000-10-01 23:52:00 27
220 2000-10-02 00:09:00 39
221 2000-10-02 00:26:00 24
222 Freq: 17T, dtype: int64
224 >>> ts.groupby(pd.Grouper(freq='17min', origin='2000-01-01')).sum()
225 2000-10-01 23:24:00 3
226 2000-10-01 23:41:00 15
227 2000-10-01 23:58:00 45
228 2000-10-02 00:15:00 45
229 Freq: 17T, dtype: int64
231 If you want to adjust the start of the bins with an `offset` Timedelta, the two
232 following lines are equivalent:
234 >>> ts.groupby(pd.Grouper(freq='17min', origin='start')).sum()
235 2000-10-01 23:30:00 9
236 2000-10-01 23:47:00 21
237 2000-10-02 00:04:00 54
238 2000-10-02 00:21:00 24
239 Freq: 17T, dtype: int64
241 >>> ts.groupby(pd.Grouper(freq='17min', offset='23h30min')).sum()
242 2000-10-01 23:30:00 9
243 2000-10-01 23:47:00 21
244 2000-10-02 00:04:00 54
245 2000-10-02 00:21:00 24
246 Freq: 17T, dtype: int64
248 To replace the use of the deprecated `base` argument, you can now use `offset`,
249 in this example it is equivalent to have `base=2`:
251 >>> ts.groupby(pd.Grouper(freq='17min', offset='2min')).sum()
252 2000-10-01 23:16:00 0
253 2000-10-01 23:33:00 9
254 2000-10-01 23:50:00 36
255 2000-10-02 00:07:00 39
256 2000-10-02 00:24:00 24
257 Freq: 17T, dtype: int64
258 """
260 axis: int
261 sort: bool
262 dropna: bool
263 _gpr_index: Index | None
264 _grouper: Index | None
266 _attributes: tuple[str, ...] = ("key", "level", "freq", "axis", "sort", "dropna")
268 def __new__(cls, *args, **kwargs):
269 if kwargs.get("freq") is not None:
270 from pandas.core.resample import TimeGrouper
272 _check_deprecated_resample_kwargs(kwargs, origin=cls)
273 cls = TimeGrouper
274 return super().__new__(cls)
276 def __init__(
277 self,
278 key=None,
279 level=None,
280 freq=None,
281 axis: int = 0,
282 sort: bool = False,
283 dropna: bool = True,
284 ) -> None:
285 self.key = key
286 self.level = level
287 self.freq = freq
288 self.axis = axis
289 self.sort = sort
290 self.dropna = dropna
292 self.grouper = None
293 self._gpr_index = None
294 self.obj = None
295 self.indexer = None
296 self.binner = None
297 self._grouper = None
298 self._indexer = None
300 @final
301 @property
302 def ax(self) -> Index:
303 index = self._gpr_index
304 if index is None:
305 raise ValueError("_set_grouper must be called before ax is accessed")
306 return index
308 def _get_grouper(
309 self, obj: NDFrameT, validate: bool = True
310 ) -> tuple[Any, ops.BaseGrouper, NDFrameT]:
311 """
312 Parameters
313 ----------
314 obj : Series or DataFrame
315 validate : bool, default True
316 if True, validate the grouper
318 Returns
319 -------
320 a tuple of binner, grouper, obj (possibly sorted)
321 """
322 self._set_grouper(obj)
323 # error: Value of type variable "NDFrameT" of "get_grouper" cannot be
324 # "Optional[Any]"
325 # error: Incompatible types in assignment (expression has type "BaseGrouper",
326 # variable has type "None")
327 self.grouper, _, self.obj = get_grouper( # type: ignore[type-var,assignment]
328 self.obj,
329 [self.key],
330 axis=self.axis,
331 level=self.level,
332 sort=self.sort,
333 validate=validate,
334 dropna=self.dropna,
335 )
337 # error: Incompatible return value type (got "Tuple[None, None, None]",
338 # expected "Tuple[Any, BaseGrouper, NDFrameT]")
339 return self.binner, self.grouper, self.obj # type: ignore[return-value]
341 @final
342 def _set_grouper(self, obj: NDFrame, sort: bool = False) -> None:
343 """
344 given an object and the specifications, setup the internal grouper
345 for this particular specification
347 Parameters
348 ----------
349 obj : Series or DataFrame
350 sort : bool, default False
351 whether the resulting grouper should be sorted
352 """
353 assert obj is not None
355 if self.key is not None and self.level is not None:
356 raise ValueError("The Grouper cannot specify both a key and a level!")
358 # Keep self.grouper value before overriding
359 if self._grouper is None:
360 # TODO: What are we assuming about subsequent calls?
361 self._grouper = self._gpr_index
362 self._indexer = self.indexer
364 # the key must be a valid info item
365 if self.key is not None:
366 key = self.key
367 # The 'on' is already defined
368 if getattr(self._gpr_index, "name", None) == key and isinstance(
369 obj, Series
370 ):
371 # Sometimes self._grouper will have been resorted while
372 # obj has not. In this case there is a mismatch when we
373 # call self._grouper.take(obj.index) so we need to undo the sorting
374 # before we call _grouper.take.
375 assert self._grouper is not None
376 if self._indexer is not None:
377 reverse_indexer = self._indexer.argsort()
378 unsorted_ax = self._grouper.take(reverse_indexer)
379 ax = unsorted_ax.take(obj.index)
380 else:
381 ax = self._grouper.take(obj.index)
382 else:
383 if key not in obj._info_axis:
384 raise KeyError(f"The grouper name {key} is not found")
385 ax = Index(obj[key], name=key)
387 else:
388 ax = obj._get_axis(self.axis)
389 if self.level is not None:
390 level = self.level
392 # if a level is given it must be a mi level or
393 # equivalent to the axis name
394 if isinstance(ax, MultiIndex):
395 level = ax._get_level_number(level)
396 ax = Index(ax._get_level_values(level), name=ax.names[level])
398 else:
399 if level not in (0, ax.name):
400 raise ValueError(f"The level {level} is not valid")
402 # possibly sort
403 if (self.sort or sort) and not ax.is_monotonic_increasing:
404 # use stable sort to support first, last, nth
405 # TODO: why does putting na_position="first" fix datetimelike cases?
406 indexer = self.indexer = ax.array.argsort(
407 kind="mergesort", na_position="first"
408 )
409 ax = ax.take(indexer)
410 obj = obj.take(indexer, axis=self.axis)
412 # error: Incompatible types in assignment (expression has type
413 # "NDFrameT", variable has type "None")
414 self.obj = obj # type: ignore[assignment]
415 self._gpr_index = ax
417 @final
418 @property
419 def groups(self):
420 # error: "None" has no attribute "groups"
421 return self.grouper.groups # type: ignore[attr-defined]
423 @final
424 def __repr__(self) -> str:
425 attrs_list = (
426 f"{attr_name}={repr(getattr(self, attr_name))}"
427 for attr_name in self._attributes
428 if getattr(self, attr_name) is not None
429 )
430 attrs = ", ".join(attrs_list)
431 cls_name = type(self).__name__
432 return f"{cls_name}({attrs})"
435@final
436class Grouping:
437 """
438 Holds the grouping information for a single key
440 Parameters
441 ----------
442 index : Index
443 grouper :
444 obj : DataFrame or Series
445 name : Label
446 level :
447 observed : bool, default False
448 If we are a Categorical, use the observed values
449 in_axis : if the Grouping is a column in self.obj and hence among
450 Groupby.exclusions list
452 Returns
453 -------
454 **Attributes**:
455 * indices : dict of {group -> index_list}
456 * codes : ndarray, group codes
457 * group_index : unique groups
458 * groups : dict of {group -> label_list}
459 """
461 _codes: npt.NDArray[np.signedinteger] | None = None
462 _group_index: Index | None = None
463 _passed_categorical: bool
464 _all_grouper: Categorical | None
465 _index: Index
467 def __init__(
468 self,
469 index: Index,
470 grouper=None,
471 obj: NDFrame | None = None,
472 level=None,
473 sort: bool = True,
474 observed: bool = False,
475 in_axis: bool = False,
476 dropna: bool = True,
477 ) -> None:
478 self.level = level
479 self._orig_grouper = grouper
480 self.grouping_vector = _convert_grouper(index, grouper)
481 self._all_grouper = None
482 self._index = index
483 self._sort = sort
484 self.obj = obj
485 self._observed = observed
486 self.in_axis = in_axis
487 self._dropna = dropna
489 self._passed_categorical = False
491 # we have a single grouper which may be a myriad of things,
492 # some of which are dependent on the passing in level
494 ilevel = self._ilevel
495 if ilevel is not None:
496 mapper = self.grouping_vector
497 # In extant tests, the new self.grouping_vector matches
498 # `index.get_level_values(ilevel)` whenever
499 # mapper is None and isinstance(index, MultiIndex)
500 (
501 self.grouping_vector, # Index
502 self._codes,
503 self._group_index,
504 ) = index._get_grouper_for_level(mapper, level=ilevel, dropna=dropna)
506 # a passed Grouper like, directly get the grouper in the same way
507 # as single grouper groupby, use the group_info to get codes
508 elif isinstance(self.grouping_vector, Grouper):
509 # get the new grouper; we already have disambiguated
510 # what key/level refer to exactly, don't need to
511 # check again as we have by this point converted these
512 # to an actual value (rather than a pd.Grouper)
513 assert self.obj is not None # for mypy
514 _, newgrouper, newobj = self.grouping_vector._get_grouper(
515 self.obj, validate=False
516 )
517 self.obj = newobj
519 ng = newgrouper._get_grouper()
520 if isinstance(newgrouper, ops.BinGrouper):
521 # in this case we have `ng is newgrouper`
522 self.grouping_vector = ng
523 else:
524 # ops.BaseGrouper
525 # use Index instead of ndarray so we can recover the name
526 self.grouping_vector = Index(ng, name=newgrouper.result_index.name)
528 elif is_categorical_dtype(self.grouping_vector):
529 # a passed Categorical
530 self._passed_categorical = True
532 self.grouping_vector, self._all_grouper = recode_for_groupby(
533 self.grouping_vector, sort, observed
534 )
536 elif not isinstance(
537 self.grouping_vector, (Series, Index, ExtensionArray, np.ndarray)
538 ):
539 # no level passed
540 if getattr(self.grouping_vector, "ndim", 1) != 1:
541 t = self.name or str(type(self.grouping_vector))
542 raise ValueError(f"Grouper for '{t}' not 1-dimensional")
544 self.grouping_vector = index.map(self.grouping_vector)
546 if not (
547 hasattr(self.grouping_vector, "__len__")
548 and len(self.grouping_vector) == len(index)
549 ):
550 grper = pprint_thing(self.grouping_vector)
551 errmsg = (
552 "Grouper result violates len(labels) == "
553 f"len(data)\nresult: {grper}"
554 )
555 self.grouping_vector = None # Try for sanity
556 raise AssertionError(errmsg)
558 if isinstance(self.grouping_vector, np.ndarray):
559 # if we have a date/time-like grouper, make sure that we have
560 # Timestamps like
561 self.grouping_vector = sanitize_to_nanoseconds(self.grouping_vector)
563 def __repr__(self) -> str:
564 return f"Grouping({self.name})"
566 def __iter__(self):
567 return iter(self.indices)
569 @cache_readonly
570 def name(self) -> Hashable:
571 ilevel = self._ilevel
572 if ilevel is not None:
573 return self._index.names[ilevel]
575 if isinstance(self._orig_grouper, (Index, Series)):
576 return self._orig_grouper.name
578 elif isinstance(self.grouping_vector, ops.BaseGrouper):
579 return self.grouping_vector.result_index.name
581 elif isinstance(self.grouping_vector, Index):
582 return self.grouping_vector.name
584 # otherwise we have ndarray or ExtensionArray -> no name
585 return None
587 @cache_readonly
588 def _ilevel(self) -> int | None:
589 """
590 If necessary, converted index level name to index level position.
591 """
592 level = self.level
593 if level is None:
594 return None
595 if not isinstance(level, int):
596 index = self._index
597 if level not in index.names:
598 raise AssertionError(f"Level {level} not in index")
599 return index.names.index(level)
600 return level
602 @property
603 def ngroups(self) -> int:
604 return len(self.group_index)
606 @cache_readonly
607 def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
608 # we have a list of groupers
609 if isinstance(self.grouping_vector, ops.BaseGrouper):
610 return self.grouping_vector.indices
612 values = Categorical(self.grouping_vector)
613 return values._reverse_indexer()
615 @property
616 def codes(self) -> npt.NDArray[np.signedinteger]:
617 if self._codes is not None:
618 # _codes is set in __init__ for MultiIndex cases
619 return self._codes
621 return self._codes_and_uniques[0]
623 @cache_readonly
624 def group_arraylike(self) -> ArrayLike:
625 """
626 Analogous to result_index, but holding an ArrayLike to ensure
627 we can retain ExtensionDtypes.
628 """
629 if self._group_index is not None:
630 # _group_index is set in __init__ for MultiIndex cases
631 return self._group_index._values
633 elif self._all_grouper is not None:
634 # retain dtype for categories, including unobserved ones
635 return self.result_index._values
637 return self._codes_and_uniques[1]
639 @cache_readonly
640 def result_index(self) -> Index:
641 # result_index retains dtype for categories, including unobserved ones,
642 # which group_index does not
643 if self._all_grouper is not None:
644 group_idx = self.group_index
645 assert isinstance(group_idx, CategoricalIndex)
646 return recode_from_groupby(self._all_grouper, self._sort, group_idx)
647 return self.group_index
649 @cache_readonly
650 def group_index(self) -> Index:
651 if self._group_index is not None:
652 # _group_index is set in __init__ for MultiIndex cases
653 return self._group_index
655 uniques = self._codes_and_uniques[1]
656 return Index._with_infer(uniques, name=self.name)
658 @cache_readonly
659 def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
660 if self._passed_categorical:
661 # we make a CategoricalIndex out of the cat grouper
662 # preserving the categories / ordered attributes;
663 # doesn't (yet - GH#46909) handle dropna=False
664 cat = self.grouping_vector
665 categories = cat.categories
667 if self._observed:
668 ucodes = algorithms.unique1d(cat.codes)
669 ucodes = ucodes[ucodes != -1]
670 if self._sort or cat.ordered:
671 ucodes = np.sort(ucodes)
672 else:
673 ucodes = np.arange(len(categories))
675 uniques = Categorical.from_codes(
676 codes=ucodes, categories=categories, ordered=cat.ordered
677 )
678 return cat.codes, uniques
680 elif isinstance(self.grouping_vector, ops.BaseGrouper):
681 # we have a list of groupers
682 codes = self.grouping_vector.codes_info
683 # error: Incompatible types in assignment (expression has type "Union
684 # [ExtensionArray, ndarray[Any, Any]]", variable has type "Categorical")
685 uniques = (
686 self.grouping_vector.result_index._values # type: ignore[assignment]
687 )
688 else:
689 # GH35667, replace dropna=False with use_na_sentinel=False
690 # error: Incompatible types in assignment (expression has type "Union[
691 # ndarray[Any, Any], Index]", variable has type "Categorical")
692 codes, uniques = algorithms.factorize( # type: ignore[assignment]
693 self.grouping_vector, sort=self._sort, use_na_sentinel=self._dropna
694 )
695 return codes, uniques
697 @cache_readonly
698 def groups(self) -> dict[Hashable, np.ndarray]:
699 return self._index.groupby(Categorical.from_codes(self.codes, self.group_index))
702def get_grouper(
703 obj: NDFrameT,
704 key=None,
705 axis: int = 0,
706 level=None,
707 sort: bool = True,
708 observed: bool = False,
709 mutated: bool = False,
710 validate: bool = True,
711 dropna: bool = True,
712) -> tuple[ops.BaseGrouper, frozenset[Hashable], NDFrameT]:
713 """
714 Create and return a BaseGrouper, which is an internal
715 mapping of how to create the grouper indexers.
716 This may be composed of multiple Grouping objects, indicating
717 multiple groupers
719 Groupers are ultimately index mappings. They can originate as:
720 index mappings, keys to columns, functions, or Groupers
722 Groupers enable local references to axis,level,sort, while
723 the passed in axis, level, and sort are 'global'.
725 This routine tries to figure out what the passing in references
726 are and then creates a Grouping for each one, combined into
727 a BaseGrouper.
729 If observed & we have a categorical grouper, only show the observed
730 values.
732 If validate, then check for key/level overlaps.
734 """
735 group_axis = obj._get_axis(axis)
737 # validate that the passed single level is compatible with the passed
738 # axis of the object
739 if level is not None:
740 # TODO: These if-block and else-block are almost same.
741 # MultiIndex instance check is removable, but it seems that there are
742 # some processes only for non-MultiIndex in else-block,
743 # eg. `obj.index.name != level`. We have to consider carefully whether
744 # these are applicable for MultiIndex. Even if these are applicable,
745 # we need to check if it makes no side effect to subsequent processes
746 # on the outside of this condition.
747 # (GH 17621)
748 if isinstance(group_axis, MultiIndex):
749 if is_list_like(level) and len(level) == 1:
750 level = level[0]
752 if key is None and is_scalar(level):
753 # Get the level values from group_axis
754 key = group_axis.get_level_values(level)
755 level = None
757 else:
758 # allow level to be a length-one list-like object
759 # (e.g., level=[0])
760 # GH 13901
761 if is_list_like(level):
762 nlevels = len(level)
763 if nlevels == 1:
764 level = level[0]
765 elif nlevels == 0:
766 raise ValueError("No group keys passed!")
767 else:
768 raise ValueError("multiple levels only valid with MultiIndex")
770 if isinstance(level, str):
771 if obj._get_axis(axis).name != level:
772 raise ValueError(
773 f"level name {level} is not the name "
774 f"of the {obj._get_axis_name(axis)}"
775 )
776 elif level > 0 or level < -1:
777 raise ValueError("level > 0 or level < -1 only valid with MultiIndex")
779 # NOTE: `group_axis` and `group_axis.get_level_values(level)`
780 # are same in this section.
781 level = None
782 key = group_axis
784 # a passed-in Grouper, directly convert
785 if isinstance(key, Grouper):
786 binner, grouper, obj = key._get_grouper(obj, validate=False)
787 if key.key is None:
788 return grouper, frozenset(), obj
789 else:
790 return grouper, frozenset({key.key}), obj
792 # already have a BaseGrouper, just return it
793 elif isinstance(key, ops.BaseGrouper):
794 return key, frozenset(), obj
796 if not isinstance(key, list):
797 keys = [key]
798 match_axis_length = False
799 else:
800 keys = key
801 match_axis_length = len(keys) == len(group_axis)
803 # what are we after, exactly?
804 any_callable = any(callable(g) or isinstance(g, dict) for g in keys)
805 any_groupers = any(isinstance(g, (Grouper, Grouping)) for g in keys)
806 any_arraylike = any(
807 isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys
808 )
810 # is this an index replacement?
811 if (
812 not any_callable
813 and not any_arraylike
814 and not any_groupers
815 and match_axis_length
816 and level is None
817 ):
818 if isinstance(obj, DataFrame):
819 all_in_columns_index = all(
820 g in obj.columns or g in obj.index.names for g in keys
821 )
822 else:
823 assert isinstance(obj, Series)
824 all_in_columns_index = all(g in obj.index.names for g in keys)
826 if not all_in_columns_index:
827 keys = [com.asarray_tuplesafe(keys)]
829 if isinstance(level, (tuple, list)):
830 if key is None:
831 keys = [None] * len(level)
832 levels = level
833 else:
834 levels = [level] * len(keys)
836 groupings: list[Grouping] = []
837 exclusions: set[Hashable] = set()
839 # if the actual grouper should be obj[key]
840 def is_in_axis(key) -> bool:
842 if not _is_label_like(key):
843 if obj.ndim == 1:
844 return False
846 # items -> .columns for DataFrame, .index for Series
847 items = obj.axes[-1]
848 try:
849 items.get_loc(key)
850 except (KeyError, TypeError, InvalidIndexError):
851 # TypeError shows up here if we pass e.g. Int64Index
852 return False
854 return True
856 # if the grouper is obj[name]
857 def is_in_obj(gpr) -> bool:
858 if not hasattr(gpr, "name"):
859 return False
860 try:
861 return gpr is obj[gpr.name]
862 except (KeyError, IndexError, InvalidIndexError):
863 # IndexError reached in e.g. test_skip_group_keys when we pass
864 # lambda here
865 # InvalidIndexError raised on key-types inappropriate for index,
866 # e.g. DatetimeIndex.get_loc(tuple())
867 return False
869 for gpr, level in zip(keys, levels):
871 if is_in_obj(gpr): # df.groupby(df['name'])
872 in_axis = True
873 exclusions.add(gpr.name)
875 elif is_in_axis(gpr): # df.groupby('name')
876 if gpr in obj:
877 if validate:
878 obj._check_label_or_level_ambiguity(gpr, axis=axis)
879 in_axis, name, gpr = True, gpr, obj[gpr]
880 if gpr.ndim != 1:
881 # non-unique columns; raise here to get the name in the
882 # exception message
883 raise ValueError(f"Grouper for '{name}' not 1-dimensional")
884 exclusions.add(name)
885 elif obj._is_level_reference(gpr, axis=axis):
886 in_axis, level, gpr = False, gpr, None
887 else:
888 raise KeyError(gpr)
889 elif isinstance(gpr, Grouper) and gpr.key is not None:
890 # Add key to exclusions
891 exclusions.add(gpr.key)
892 in_axis = False
893 else:
894 in_axis = False
896 # create the Grouping
897 # allow us to passing the actual Grouping as the gpr
898 ping = (
899 Grouping(
900 group_axis,
901 gpr,
902 obj=obj,
903 level=level,
904 sort=sort,
905 observed=observed,
906 in_axis=in_axis,
907 dropna=dropna,
908 )
909 if not isinstance(gpr, Grouping)
910 else gpr
911 )
913 groupings.append(ping)
915 if len(groupings) == 0 and len(obj):
916 raise ValueError("No group keys passed!")
917 elif len(groupings) == 0:
918 groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp)))
920 # create the internals grouper
921 grouper = ops.BaseGrouper(
922 group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna
923 )
924 return grouper, frozenset(exclusions), obj
927def _is_label_like(val) -> bool:
928 return isinstance(val, (str, tuple)) or (val is not None and is_scalar(val))
931def _convert_grouper(axis: Index, grouper):
932 if isinstance(grouper, dict):
933 return grouper.get
934 elif isinstance(grouper, Series):
935 if grouper.index.equals(axis):
936 return grouper._values
937 else:
938 return grouper.reindex(axis)._values
939 elif isinstance(grouper, MultiIndex):
940 return grouper._values
941 elif isinstance(grouper, (list, tuple, Index, Categorical, np.ndarray)):
942 if len(grouper) != len(axis):
943 raise ValueError("Grouper and axis must be same length")
945 if isinstance(grouper, (list, tuple)):
946 grouper = com.asarray_tuplesafe(grouper)
947 return grouper
948 else:
949 return grouper
952def _check_deprecated_resample_kwargs(kwargs, origin):
953 """
954 Check for use of deprecated parameters in ``resample`` and related functions.
956 Raises the appropriate warnings if these parameters are detected.
957 Only sets an approximate ``stacklevel`` for the warnings (see #37603, #36629).
959 Parameters
960 ----------
961 kwargs : dict
962 Dictionary of keyword arguments to check for deprecated parameters.
963 origin : object
964 From where this function is being called; either Grouper or TimeGrouper. Used
965 to determine an approximate stacklevel.
966 """
967 # Deprecation warning of `base` and `loffset` since v1.1.0:
968 # we are raising the warning here to be able to set the `stacklevel`
969 # properly since we need to raise the `base` and `loffset` deprecation
970 # warning from three different cases:
971 # core/generic.py::NDFrame.resample
972 # core/groupby/groupby.py::GroupBy.resample
973 # core/groupby/grouper.py::Grouper
974 # raising these warnings from TimeGrouper directly would fail the test:
975 # tests/resample/test_deprecated.py::test_deprecating_on_loffset_and_base
977 if kwargs.get("base", None) is not None:
978 warnings.warn(
979 "'base' in .resample() and in Grouper() is deprecated.\n"
980 "The new arguments that you should use are 'offset' or 'origin'.\n"
981 '\n>>> df.resample(freq="3s", base=2)\n'
982 "\nbecomes:\n"
983 '\n>>> df.resample(freq="3s", offset="2s")\n',
984 FutureWarning,
985 stacklevel=find_stack_level(),
986 )
987 if kwargs.get("loffset", None) is not None:
988 warnings.warn(
989 "'loffset' in .resample() and in Grouper() is deprecated.\n"
990 '\n>>> df.resample(freq="3s", loffset="8H")\n'
991 "\nbecomes:\n"
992 "\n>>> from pandas.tseries.frequencies import to_offset"
993 '\n>>> df = df.resample(freq="3s").mean()'
994 '\n>>> df.index = df.index.to_timestamp() + to_offset("8H")\n',
995 FutureWarning,
996 stacklevel=find_stack_level(),
997 )