Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/groupby/categorical.py: 21%

1from __future__ import annotations

3from typing import TYPE_CHECKING

5import numpy as np

7from pandas.core.algorithms import unique1d

8from pandas.core.arrays.categorical import (

9 Categorical,

10 CategoricalDtype,

11 recode_for_categories,

12)

14if TYPE_CHECKING: 14 ↛ 15line 14 didn't jump to line 15, because the condition on line 14 was never true

15 from pandas.core.indexes.api import CategoricalIndex

18def recode_for_groupby(

19 c: Categorical, sort: bool, observed: bool

20) -> tuple[Categorical, Categorical | None]:

21 """

22 Code the categories to ensure we can groupby for categoricals.

24 If observed=True, we return a new Categorical with the observed

25 categories only.

27 If sort=False, return a copy of self, coded with categories as

28 returned by .unique(), followed by any categories not appearing in

29 the data. If sort=True, return self.

31 This method is needed solely to ensure the categorical index of the

32 GroupBy result has categories in the order of appearance in the data

33 (GH-8868).

35 Parameters

36 ----------

37 c : Categorical

38 sort : bool

39 The value of the sort parameter groupby was called with.

40 observed : bool

41 Account only for the observed values

43 Returns

44 -------

45 Categorical

46 If sort=False, the new categories are set to the order of

47 appearance in codes (unless ordered=True, in which case the

48 original order is preserved), followed by any unrepresented

49 categories in the original order.

50 Categorical or None

51 If we are observed, return the original categorical, otherwise None

52 """

53 # we only care about observed values

54 if observed:

55 # In cases with c.ordered, this is equivalent to

56 # return c.remove_unused_categories(), c

58 unique_codes = unique1d(c.codes)

60 take_codes = unique_codes[unique_codes != -1]

61 if c.ordered:

62 take_codes = np.sort(take_codes)

64 # we recode according to the uniques

65 categories = c.categories.take(take_codes)

66 codes = recode_for_categories(c.codes, c.categories, categories)

68 # return a new categorical that maps our new codes

69 # and categories

70 dtype = CategoricalDtype(categories, ordered=c.ordered)

71 return Categorical(codes, dtype=dtype, fastpath=True), c

73 # Already sorted according to c.categories; all is fine

74 if sort:

75 return c, None

77 # sort=False should order groups in as-encountered order (GH-8868)

78 cat = c.unique()

80 # See GH-38140 for block below

81 # exclude nan from indexer for categories

82 take_codes = cat.codes[cat.codes != -1]

83 if cat.ordered:

84 take_codes = np.sort(take_codes)

85 cat = cat.set_categories(cat.categories.take(take_codes))

87 # But for groupby to work, all categories should be present,

88 # including those missing from the data (GH-13179), which .unique()

89 # above dropped

90 cat = cat.add_categories(c.categories[~c.categories.isin(cat.categories)])

92 return c.reorder_categories(cat.categories), None

95def recode_from_groupby(

96 c: Categorical, sort: bool, ci: CategoricalIndex

97) -> CategoricalIndex:

98 """

99 Reverse the codes_to_groupby to account for sort / observed.

100

101 Parameters

102 ----------

103 c : Categorical

104 sort : bool

105 The value of the sort parameter groupby was called with.

106 ci : CategoricalIndex

107 The codes / categories to recode

108

109 Returns

110 -------

111 CategoricalIndex

112 """

113 # we re-order to the original category orderings

114 if sort:

115 # error: "CategoricalIndex" has no attribute "set_categories"

116 return ci.set_categories(c.categories) # type: ignore[attr-defined]

117

118 # we are not sorting, so add unobserved to the end

119 new_cats = c.categories[~c.categories.isin(ci.categories)]

120 # error: "CategoricalIndex" has no attribute "add_categories"

121 return ci.add_categories(new_cats) # type: ignore[attr-defined]