Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/groupby/categorical.py: 21%
31 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from typing import TYPE_CHECKING
5import numpy as np
7from pandas.core.algorithms import unique1d
8from pandas.core.arrays.categorical import (
9 Categorical,
10 CategoricalDtype,
11 recode_for_categories,
12)
14if TYPE_CHECKING: 14 ↛ 15line 14 didn't jump to line 15, because the condition on line 14 was never true
15 from pandas.core.indexes.api import CategoricalIndex
18def recode_for_groupby(
19 c: Categorical, sort: bool, observed: bool
20) -> tuple[Categorical, Categorical | None]:
21 """
22 Code the categories to ensure we can groupby for categoricals.
24 If observed=True, we return a new Categorical with the observed
25 categories only.
27 If sort=False, return a copy of self, coded with categories as
28 returned by .unique(), followed by any categories not appearing in
29 the data. If sort=True, return self.
31 This method is needed solely to ensure the categorical index of the
32 GroupBy result has categories in the order of appearance in the data
33 (GH-8868).
35 Parameters
36 ----------
37 c : Categorical
38 sort : bool
39 The value of the sort parameter groupby was called with.
40 observed : bool
41 Account only for the observed values
43 Returns
44 -------
45 Categorical
46 If sort=False, the new categories are set to the order of
47 appearance in codes (unless ordered=True, in which case the
48 original order is preserved), followed by any unrepresented
49 categories in the original order.
50 Categorical or None
51 If we are observed, return the original categorical, otherwise None
52 """
53 # we only care about observed values
54 if observed:
55 # In cases with c.ordered, this is equivalent to
56 # return c.remove_unused_categories(), c
58 unique_codes = unique1d(c.codes)
60 take_codes = unique_codes[unique_codes != -1]
61 if c.ordered:
62 take_codes = np.sort(take_codes)
64 # we recode according to the uniques
65 categories = c.categories.take(take_codes)
66 codes = recode_for_categories(c.codes, c.categories, categories)
68 # return a new categorical that maps our new codes
69 # and categories
70 dtype = CategoricalDtype(categories, ordered=c.ordered)
71 return Categorical(codes, dtype=dtype, fastpath=True), c
73 # Already sorted according to c.categories; all is fine
74 if sort:
75 return c, None
77 # sort=False should order groups in as-encountered order (GH-8868)
78 cat = c.unique()
80 # See GH-38140 for block below
81 # exclude nan from indexer for categories
82 take_codes = cat.codes[cat.codes != -1]
83 if cat.ordered:
84 take_codes = np.sort(take_codes)
85 cat = cat.set_categories(cat.categories.take(take_codes))
87 # But for groupby to work, all categories should be present,
88 # including those missing from the data (GH-13179), which .unique()
89 # above dropped
90 cat = cat.add_categories(c.categories[~c.categories.isin(cat.categories)])
92 return c.reorder_categories(cat.categories), None
95def recode_from_groupby(
96 c: Categorical, sort: bool, ci: CategoricalIndex
97) -> CategoricalIndex:
98 """
99 Reverse the codes_to_groupby to account for sort / observed.
101 Parameters
102 ----------
103 c : Categorical
104 sort : bool
105 The value of the sort parameter groupby was called with.
106 ci : CategoricalIndex
107 The codes / categories to recode
109 Returns
110 -------
111 CategoricalIndex
112 """
113 # we re-order to the original category orderings
114 if sort:
115 # error: "CategoricalIndex" has no attribute "set_categories"
116 return ci.set_categories(c.categories) # type: ignore[attr-defined]
118 # we are not sorting, so add unobserved to the end
119 new_cats = c.categories[~c.categories.isin(ci.categories)]
120 # error: "CategoricalIndex" has no attribute "add_categories"
121 return ci.add_categories(new_cats) # type: ignore[attr-defined]