Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/array_algos/masked_reductions.py: 25%

38 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2masked_reductions.py is for reduction algorithms using a mask-based approach 

3for missing values. 

4""" 

5from __future__ import annotations 

6 

7from typing import Callable 

8 

9import numpy as np 

10 

11from pandas._libs import missing as libmissing 

12from pandas._typing import npt 

13 

14from pandas.core.nanops import check_below_min_count 

15 

16 

17def _sumprod( 

18 func: Callable, 

19 values: np.ndarray, 

20 mask: npt.NDArray[np.bool_], 

21 *, 

22 skipna: bool = True, 

23 min_count: int = 0, 

24 axis: int | None = None, 

25): 

26 """ 

27 Sum or product for 1D masked array. 

28 

29 Parameters 

30 ---------- 

31 func : np.sum or np.prod 

32 values : np.ndarray 

33 Numpy array with the values (can be of any dtype that support the 

34 operation). 

35 mask : np.ndarray[bool] 

36 Boolean numpy array (True values indicate missing values). 

37 skipna : bool, default True 

38 Whether to skip NA. 

39 min_count : int, default 0 

40 The required number of valid values to perform the operation. If fewer than 

41 ``min_count`` non-NA values are present the result will be NA. 

42 axis : int, optional, default None 

43 """ 

44 if not skipna: 

45 if mask.any(axis=axis) or check_below_min_count(values.shape, None, min_count): 

46 return libmissing.NA 

47 else: 

48 return func(values, axis=axis) 

49 else: 

50 if check_below_min_count(values.shape, mask, min_count) and ( 

51 axis is None or values.ndim == 1 

52 ): 

53 return libmissing.NA 

54 

55 return func(values, where=~mask, axis=axis) 

56 

57 

58def sum( 

59 values: np.ndarray, 

60 mask: npt.NDArray[np.bool_], 

61 *, 

62 skipna: bool = True, 

63 min_count: int = 0, 

64 axis: int | None = None, 

65): 

66 return _sumprod( 

67 np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis 

68 ) 

69 

70 

71def prod( 

72 values: np.ndarray, 

73 mask: npt.NDArray[np.bool_], 

74 *, 

75 skipna: bool = True, 

76 min_count: int = 0, 

77 axis: int | None = None, 

78): 

79 return _sumprod( 

80 np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis 

81 ) 

82 

83 

84def _minmax( 

85 func: Callable, 

86 values: np.ndarray, 

87 mask: npt.NDArray[np.bool_], 

88 *, 

89 skipna: bool = True, 

90 axis: int | None = None, 

91): 

92 """ 

93 Reduction for 1D masked array. 

94 

95 Parameters 

96 ---------- 

97 func : np.min or np.max 

98 values : np.ndarray 

99 Numpy array with the values (can be of any dtype that support the 

100 operation). 

101 mask : np.ndarray[bool] 

102 Boolean numpy array (True values indicate missing values). 

103 skipna : bool, default True 

104 Whether to skip NA. 

105 axis : int, optional, default None 

106 """ 

107 if not skipna: 

108 if mask.any() or not values.size: 

109 # min/max with empty array raise in numpy, pandas returns NA 

110 return libmissing.NA 

111 else: 

112 return func(values) 

113 else: 

114 subset = values[~mask] 

115 if subset.size: 

116 return func(subset) 

117 else: 

118 # min/max with empty array raise in numpy, pandas returns NA 

119 return libmissing.NA 

120 

121 

122def min( 

123 values: np.ndarray, 

124 mask: npt.NDArray[np.bool_], 

125 *, 

126 skipna: bool = True, 

127 axis: int | None = None, 

128): 

129 return _minmax(np.min, values=values, mask=mask, skipna=skipna, axis=axis) 

130 

131 

132def max( 

133 values: np.ndarray, 

134 mask: npt.NDArray[np.bool_], 

135 *, 

136 skipna: bool = True, 

137 axis: int | None = None, 

138): 

139 return _minmax(np.max, values=values, mask=mask, skipna=skipna, axis=axis) 

140 

141 

142# TODO: axis kwarg 

143def mean(values: np.ndarray, mask: npt.NDArray[np.bool_], skipna: bool = True): 

144 if not values.size or mask.all(): 

145 return libmissing.NA 

146 _sum = _sumprod(np.sum, values=values, mask=mask, skipna=skipna) 

147 count = np.count_nonzero(~mask) 

148 mean_value = _sum / count 

149 return mean_value