Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/array_algos/replace.py: 14%

53 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Methods used by Block.replace and related methods. 

3""" 

4from __future__ import annotations 

5 

6import operator 

7import re 

8from typing import ( 

9 Any, 

10 Pattern, 

11) 

12 

13import numpy as np 

14 

15from pandas._typing import ( 

16 ArrayLike, 

17 Scalar, 

18 npt, 

19) 

20 

21from pandas.core.dtypes.common import ( 

22 is_datetimelike_v_numeric, 

23 is_numeric_v_string_like, 

24 is_re, 

25 is_re_compilable, 

26 is_scalar, 

27) 

28from pandas.core.dtypes.missing import isna 

29 

30 

31def should_use_regex(regex: bool, to_replace: Any) -> bool: 

32 """ 

33 Decide whether to treat `to_replace` as a regular expression. 

34 """ 

35 if is_re(to_replace): 

36 regex = True 

37 

38 regex = regex and is_re_compilable(to_replace) 

39 

40 # Don't use regex if the pattern is empty. 

41 regex = regex and re.compile(to_replace).pattern != "" 

42 return regex 

43 

44 

45def compare_or_regex_search( 

46 a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: npt.NDArray[np.bool_] 

47) -> ArrayLike | bool: 

48 """ 

49 Compare two array-like inputs of the same shape or two scalar values 

50 

51 Calls operator.eq or re.search, depending on regex argument. If regex is 

52 True, perform an element-wise regex matching. 

53 

54 Parameters 

55 ---------- 

56 a : array-like 

57 b : scalar or regex pattern 

58 regex : bool 

59 mask : np.ndarray[bool] 

60 

61 Returns 

62 ------- 

63 mask : array-like of bool 

64 """ 

65 if isna(b): 

66 return ~mask 

67 

68 def _check_comparison_types( 

69 result: ArrayLike | bool, a: ArrayLike, b: Scalar | Pattern 

70 ): 

71 """ 

72 Raises an error if the two arrays (a,b) cannot be compared. 

73 Otherwise, returns the comparison result as expected. 

74 """ 

75 if is_scalar(result) and isinstance(a, np.ndarray): 

76 type_names = [type(a).__name__, type(b).__name__] 

77 

78 type_names[0] = f"ndarray(dtype={a.dtype})" 

79 

80 raise TypeError( 

81 f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" 

82 ) 

83 

84 if not regex or not should_use_regex(regex, b): 

85 # TODO: should use missing.mask_missing? 

86 op = lambda x: operator.eq(x, b) 

87 else: 

88 op = np.vectorize( 

89 lambda x: bool(re.search(b, x)) 

90 if isinstance(x, str) and isinstance(b, (str, Pattern)) 

91 else False 

92 ) 

93 

94 # GH#32621 use mask to avoid comparing to NAs 

95 if isinstance(a, np.ndarray): 

96 a = a[mask] 

97 

98 if is_numeric_v_string_like(a, b): 

99 # GH#29553 avoid deprecation warnings from numpy 

100 return np.zeros(a.shape, dtype=bool) 

101 

102 elif is_datetimelike_v_numeric(a, b): 

103 # GH#29553 avoid deprecation warnings from numpy 

104 _check_comparison_types(False, a, b) 

105 return False 

106 

107 result = op(a) 

108 

109 if isinstance(result, np.ndarray) and mask is not None: 

110 # The shape of the mask can differ to that of the result 

111 # since we may compare only a subset of a's or b's elements 

112 tmp = np.zeros(mask.shape, dtype=np.bool_) 

113 np.place(tmp, mask, result) 

114 result = tmp 

115 

116 _check_comparison_types(result, a, b) 

117 return result 

118 

119 

120def replace_regex( 

121 values: ArrayLike, rx: re.Pattern, value, mask: npt.NDArray[np.bool_] | None 

122) -> None: 

123 """ 

124 Parameters 

125 ---------- 

126 values : ArrayLike 

127 Object dtype. 

128 rx : re.Pattern 

129 value : Any 

130 mask : np.ndarray[bool], optional 

131 

132 Notes 

133 ----- 

134 Alters values in-place. 

135 """ 

136 

137 # deal with replacing values with objects (strings) that match but 

138 # whose replacement is not a string (numeric, nan, object) 

139 if isna(value) or not isinstance(value, str): 

140 

141 def re_replacer(s): 

142 if is_re(rx) and isinstance(s, str): 

143 return value if rx.search(s) is not None else s 

144 else: 

145 return s 

146 

147 else: 

148 # value is guaranteed to be a string here, s can be either a string 

149 # or null if it's null it gets returned 

150 def re_replacer(s): 

151 if is_re(rx) and isinstance(s, str): 

152 return rx.sub(value, s) 

153 else: 

154 return s 

155 

156 f = np.vectorize(re_replacer, otypes=[np.object_]) 

157 

158 if mask is None: 

159 values[:] = f(values) 

160 else: 

161 values[mask] = f(values[mask])