Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/groupby/categorical.py: 21%

31 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from typing import TYPE_CHECKING 

4 

5import numpy as np 

6 

7from pandas.core.algorithms import unique1d 

8from pandas.core.arrays.categorical import ( 

9 Categorical, 

10 CategoricalDtype, 

11 recode_for_categories, 

12) 

13 

14if TYPE_CHECKING: 14 ↛ 15line 14 didn't jump to line 15, because the condition on line 14 was never true

15 from pandas.core.indexes.api import CategoricalIndex 

16 

17 

18def recode_for_groupby( 

19 c: Categorical, sort: bool, observed: bool 

20) -> tuple[Categorical, Categorical | None]: 

21 """ 

22 Code the categories to ensure we can groupby for categoricals. 

23 

24 If observed=True, we return a new Categorical with the observed 

25 categories only. 

26 

27 If sort=False, return a copy of self, coded with categories as 

28 returned by .unique(), followed by any categories not appearing in 

29 the data. If sort=True, return self. 

30 

31 This method is needed solely to ensure the categorical index of the 

32 GroupBy result has categories in the order of appearance in the data 

33 (GH-8868). 

34 

35 Parameters 

36 ---------- 

37 c : Categorical 

38 sort : bool 

39 The value of the sort parameter groupby was called with. 

40 observed : bool 

41 Account only for the observed values 

42 

43 Returns 

44 ------- 

45 Categorical 

46 If sort=False, the new categories are set to the order of 

47 appearance in codes (unless ordered=True, in which case the 

48 original order is preserved), followed by any unrepresented 

49 categories in the original order. 

50 Categorical or None 

51 If we are observed, return the original categorical, otherwise None 

52 """ 

53 # we only care about observed values 

54 if observed: 

55 # In cases with c.ordered, this is equivalent to 

56 # return c.remove_unused_categories(), c 

57 

58 unique_codes = unique1d(c.codes) 

59 

60 take_codes = unique_codes[unique_codes != -1] 

61 if c.ordered: 

62 take_codes = np.sort(take_codes) 

63 

64 # we recode according to the uniques 

65 categories = c.categories.take(take_codes) 

66 codes = recode_for_categories(c.codes, c.categories, categories) 

67 

68 # return a new categorical that maps our new codes 

69 # and categories 

70 dtype = CategoricalDtype(categories, ordered=c.ordered) 

71 return Categorical(codes, dtype=dtype, fastpath=True), c 

72 

73 # Already sorted according to c.categories; all is fine 

74 if sort: 

75 return c, None 

76 

77 # sort=False should order groups in as-encountered order (GH-8868) 

78 cat = c.unique() 

79 

80 # See GH-38140 for block below 

81 # exclude nan from indexer for categories 

82 take_codes = cat.codes[cat.codes != -1] 

83 if cat.ordered: 

84 take_codes = np.sort(take_codes) 

85 cat = cat.set_categories(cat.categories.take(take_codes)) 

86 

87 # But for groupby to work, all categories should be present, 

88 # including those missing from the data (GH-13179), which .unique() 

89 # above dropped 

90 cat = cat.add_categories(c.categories[~c.categories.isin(cat.categories)]) 

91 

92 return c.reorder_categories(cat.categories), None 

93 

94 

95def recode_from_groupby( 

96 c: Categorical, sort: bool, ci: CategoricalIndex 

97) -> CategoricalIndex: 

98 """ 

99 Reverse the codes_to_groupby to account for sort / observed. 

100 

101 Parameters 

102 ---------- 

103 c : Categorical 

104 sort : bool 

105 The value of the sort parameter groupby was called with. 

106 ci : CategoricalIndex 

107 The codes / categories to recode 

108 

109 Returns 

110 ------- 

111 CategoricalIndex 

112 """ 

113 # we re-order to the original category orderings 

114 if sort: 

115 # error: "CategoricalIndex" has no attribute "set_categories" 

116 return ci.set_categories(c.categories) # type: ignore[attr-defined] 

117 

118 # we are not sorting, so add unobserved to the end 

119 new_cats = c.categories[~c.categories.isin(ci.categories)] 

120 # error: "CategoricalIndex" has no attribute "add_categories" 

121 return ci.add_categories(new_cats) # type: ignore[attr-defined]