Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/array_algos/replace.py: 14%
53 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Methods used by Block.replace and related methods.
3"""
4from __future__ import annotations
6import operator
7import re
8from typing import (
9 Any,
10 Pattern,
11)
13import numpy as np
15from pandas._typing import (
16 ArrayLike,
17 Scalar,
18 npt,
19)
21from pandas.core.dtypes.common import (
22 is_datetimelike_v_numeric,
23 is_numeric_v_string_like,
24 is_re,
25 is_re_compilable,
26 is_scalar,
27)
28from pandas.core.dtypes.missing import isna
31def should_use_regex(regex: bool, to_replace: Any) -> bool:
32 """
33 Decide whether to treat `to_replace` as a regular expression.
34 """
35 if is_re(to_replace):
36 regex = True
38 regex = regex and is_re_compilable(to_replace)
40 # Don't use regex if the pattern is empty.
41 regex = regex and re.compile(to_replace).pattern != ""
42 return regex
45def compare_or_regex_search(
46 a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: npt.NDArray[np.bool_]
47) -> ArrayLike | bool:
48 """
49 Compare two array-like inputs of the same shape or two scalar values
51 Calls operator.eq or re.search, depending on regex argument. If regex is
52 True, perform an element-wise regex matching.
54 Parameters
55 ----------
56 a : array-like
57 b : scalar or regex pattern
58 regex : bool
59 mask : np.ndarray[bool]
61 Returns
62 -------
63 mask : array-like of bool
64 """
65 if isna(b):
66 return ~mask
68 def _check_comparison_types(
69 result: ArrayLike | bool, a: ArrayLike, b: Scalar | Pattern
70 ):
71 """
72 Raises an error if the two arrays (a,b) cannot be compared.
73 Otherwise, returns the comparison result as expected.
74 """
75 if is_scalar(result) and isinstance(a, np.ndarray):
76 type_names = [type(a).__name__, type(b).__name__]
78 type_names[0] = f"ndarray(dtype={a.dtype})"
80 raise TypeError(
81 f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
82 )
84 if not regex or not should_use_regex(regex, b):
85 # TODO: should use missing.mask_missing?
86 op = lambda x: operator.eq(x, b)
87 else:
88 op = np.vectorize(
89 lambda x: bool(re.search(b, x))
90 if isinstance(x, str) and isinstance(b, (str, Pattern))
91 else False
92 )
94 # GH#32621 use mask to avoid comparing to NAs
95 if isinstance(a, np.ndarray):
96 a = a[mask]
98 if is_numeric_v_string_like(a, b):
99 # GH#29553 avoid deprecation warnings from numpy
100 return np.zeros(a.shape, dtype=bool)
102 elif is_datetimelike_v_numeric(a, b):
103 # GH#29553 avoid deprecation warnings from numpy
104 _check_comparison_types(False, a, b)
105 return False
107 result = op(a)
109 if isinstance(result, np.ndarray) and mask is not None:
110 # The shape of the mask can differ to that of the result
111 # since we may compare only a subset of a's or b's elements
112 tmp = np.zeros(mask.shape, dtype=np.bool_)
113 np.place(tmp, mask, result)
114 result = tmp
116 _check_comparison_types(result, a, b)
117 return result
120def replace_regex(
121 values: ArrayLike, rx: re.Pattern, value, mask: npt.NDArray[np.bool_] | None
122) -> None:
123 """
124 Parameters
125 ----------
126 values : ArrayLike
127 Object dtype.
128 rx : re.Pattern
129 value : Any
130 mask : np.ndarray[bool], optional
132 Notes
133 -----
134 Alters values in-place.
135 """
137 # deal with replacing values with objects (strings) that match but
138 # whose replacement is not a string (numeric, nan, object)
139 if isna(value) or not isinstance(value, str):
141 def re_replacer(s):
142 if is_re(rx) and isinstance(s, str):
143 return value if rx.search(s) is not None else s
144 else:
145 return s
147 else:
148 # value is guaranteed to be a string here, s can be either a string
149 # or null if it's null it gets returned
150 def re_replacer(s):
151 if is_re(rx) and isinstance(s, str):
152 return rx.sub(value, s)
153 else:
154 return s
156 f = np.vectorize(re_replacer, otypes=[np.object_])
158 if mask is None:
159 values[:] = f(values)
160 else:
161 values[mask] = f(values[mask])