Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/boolean.py: 21%
159 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3import numbers
4from typing import (
5 TYPE_CHECKING,
6 cast,
7)
9import numpy as np
11from pandas._libs import (
12 lib,
13 missing as libmissing,
14)
15from pandas._typing import (
16 Dtype,
17 DtypeObj,
18 type_t,
19)
21from pandas.core.dtypes.common import (
22 is_list_like,
23 is_numeric_dtype,
24)
25from pandas.core.dtypes.dtypes import register_extension_dtype
26from pandas.core.dtypes.missing import isna
28from pandas.core import ops
29from pandas.core.arrays.masked import (
30 BaseMaskedArray,
31 BaseMaskedDtype,
32)
34if TYPE_CHECKING: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true
35 import pyarrow
37 from pandas._typing import npt
40@register_extension_dtype
41class BooleanDtype(BaseMaskedDtype):
42 """
43 Extension dtype for boolean data.
45 .. versionadded:: 1.0.0
47 .. warning::
49 BooleanDtype is considered experimental. The implementation and
50 parts of the API may change without warning.
52 Attributes
53 ----------
54 None
56 Methods
57 -------
58 None
60 Examples
61 --------
62 >>> pd.BooleanDtype()
63 BooleanDtype
64 """
66 name = "boolean"
68 # https://github.com/python/mypy/issues/4125
69 # error: Signature of "type" incompatible with supertype "BaseMaskedDtype"
70 @property
71 def type(self) -> type: # type: ignore[override]
72 return np.bool_
74 @property
75 def kind(self) -> str:
76 return "b"
78 @property
79 def numpy_dtype(self) -> np.dtype:
80 return np.dtype("bool")
82 @classmethod
83 def construct_array_type(cls) -> type_t[BooleanArray]:
84 """
85 Return the array type associated with this dtype.
87 Returns
88 -------
89 type
90 """
91 return BooleanArray
93 def __repr__(self) -> str:
94 return "BooleanDtype"
96 @property
97 def _is_boolean(self) -> bool:
98 return True
100 @property
101 def _is_numeric(self) -> bool:
102 return True
104 def __from_arrow__(
105 self, array: pyarrow.Array | pyarrow.ChunkedArray
106 ) -> BooleanArray:
107 """
108 Construct BooleanArray from pyarrow Array/ChunkedArray.
109 """
110 import pyarrow
112 if array.type != pyarrow.bool_():
113 raise TypeError(f"Expected array of boolean type, got {array.type} instead")
115 if isinstance(array, pyarrow.Array):
116 chunks = [array]
117 else:
118 # pyarrow.ChunkedArray
119 chunks = array.chunks
121 results = []
122 for arr in chunks:
123 buflist = arr.buffers()
124 data = pyarrow.BooleanArray.from_buffers(
125 arr.type, len(arr), [None, buflist[1]], offset=arr.offset
126 ).to_numpy(zero_copy_only=False)
127 if arr.null_count != 0:
128 mask = pyarrow.BooleanArray.from_buffers(
129 arr.type, len(arr), [None, buflist[0]], offset=arr.offset
130 ).to_numpy(zero_copy_only=False)
131 mask = ~mask
132 else:
133 mask = np.zeros(len(arr), dtype=bool)
135 bool_arr = BooleanArray(data, mask)
136 results.append(bool_arr)
138 if not results:
139 return BooleanArray(
140 np.array([], dtype=np.bool_), np.array([], dtype=np.bool_)
141 )
142 else:
143 return BooleanArray._concat_same_type(results)
146def coerce_to_array(
147 values, mask=None, copy: bool = False
148) -> tuple[np.ndarray, np.ndarray]:
149 """
150 Coerce the input values array to numpy arrays with a mask.
152 Parameters
153 ----------
154 values : 1D list-like
155 mask : bool 1D array, optional
156 copy : bool, default False
157 if True, copy the input
159 Returns
160 -------
161 tuple of (values, mask)
162 """
163 if isinstance(values, BooleanArray):
164 if mask is not None:
165 raise ValueError("cannot pass mask for BooleanArray input")
166 values, mask = values._data, values._mask
167 if copy:
168 values = values.copy()
169 mask = mask.copy()
170 return values, mask
172 mask_values = None
173 if isinstance(values, np.ndarray) and values.dtype == np.bool_:
174 if copy:
175 values = values.copy()
176 elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype):
177 mask_values = isna(values)
179 values_bool = np.zeros(len(values), dtype=bool)
180 values_bool[~mask_values] = values[~mask_values].astype(bool)
182 if not np.all(
183 values_bool[~mask_values].astype(values.dtype) == values[~mask_values]
184 ):
185 raise TypeError("Need to pass bool-like values")
187 values = values_bool
188 else:
189 values_object = np.asarray(values, dtype=object)
191 inferred_dtype = lib.infer_dtype(values_object, skipna=True)
192 integer_like = ("floating", "integer", "mixed-integer-float")
193 if inferred_dtype not in ("boolean", "empty") + integer_like:
194 raise TypeError("Need to pass bool-like values")
196 # mypy does not narrow the type of mask_values to npt.NDArray[np.bool_]
197 # within this branch, it assumes it can also be None
198 mask_values = cast("npt.NDArray[np.bool_]", isna(values_object))
199 values = np.zeros(len(values), dtype=bool)
200 values[~mask_values] = values_object[~mask_values].astype(bool)
202 # if the values were integer-like, validate it were actually 0/1's
203 if (inferred_dtype in integer_like) and not (
204 np.all(
205 values[~mask_values].astype(float)
206 == values_object[~mask_values].astype(float)
207 )
208 ):
209 raise TypeError("Need to pass bool-like values")
211 if mask is None and mask_values is None:
212 mask = np.zeros(values.shape, dtype=bool)
213 elif mask is None:
214 mask = mask_values
215 else:
216 if isinstance(mask, np.ndarray) and mask.dtype == np.bool_:
217 if mask_values is not None:
218 mask = mask | mask_values
219 else:
220 if copy:
221 mask = mask.copy()
222 else:
223 mask = np.array(mask, dtype=bool)
224 if mask_values is not None:
225 mask = mask | mask_values
227 if values.shape != mask.shape:
228 raise ValueError("values.shape and mask.shape must match")
230 return values, mask
233class BooleanArray(BaseMaskedArray):
234 """
235 Array of boolean (True/False) data with missing values.
237 This is a pandas Extension array for boolean data, under the hood
238 represented by 2 numpy arrays: a boolean array with the data and
239 a boolean array with the mask (True indicating missing).
241 BooleanArray implements Kleene logic (sometimes called three-value
242 logic) for logical operations. See :ref:`boolean.kleene` for more.
244 To construct an BooleanArray from generic array-like input, use
245 :func:`pandas.array` specifying ``dtype="boolean"`` (see examples
246 below).
248 .. versionadded:: 1.0.0
250 .. warning::
252 BooleanArray is considered experimental. The implementation and
253 parts of the API may change without warning.
255 Parameters
256 ----------
257 values : numpy.ndarray
258 A 1-d boolean-dtype array with the data.
259 mask : numpy.ndarray
260 A 1-d boolean-dtype array indicating missing values (True
261 indicates missing).
262 copy : bool, default False
263 Whether to copy the `values` and `mask` arrays.
265 Attributes
266 ----------
267 None
269 Methods
270 -------
271 None
273 Returns
274 -------
275 BooleanArray
277 Examples
278 --------
279 Create an BooleanArray with :func:`pandas.array`:
281 >>> pd.array([True, False, None], dtype="boolean")
282 <BooleanArray>
283 [True, False, <NA>]
284 Length: 3, dtype: boolean
285 """
287 # The value used to fill '_data' to avoid upcasting
288 _internal_fill_value = False
289 # Fill values used for any/all
290 _truthy_value = True
291 _falsey_value = False
292 _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
293 _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
295 def __init__(
296 self, values: np.ndarray, mask: np.ndarray, copy: bool = False
297 ) -> None:
298 if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
299 raise TypeError(
300 "values should be boolean numpy array. Use "
301 "the 'pd.array' function instead"
302 )
303 self._dtype = BooleanDtype()
304 super().__init__(values, mask, copy=copy)
306 @property
307 def dtype(self) -> BooleanDtype:
308 return self._dtype
310 @classmethod
311 def _from_sequence_of_strings(
312 cls,
313 strings: list[str],
314 *,
315 dtype: Dtype | None = None,
316 copy: bool = False,
317 true_values: list[str] | None = None,
318 false_values: list[str] | None = None,
319 ) -> BooleanArray:
320 true_values_union = cls._TRUE_VALUES.union(true_values or [])
321 false_values_union = cls._FALSE_VALUES.union(false_values or [])
323 def map_string(s):
324 if isna(s):
325 return s
326 elif s in true_values_union:
327 return True
328 elif s in false_values_union:
329 return False
330 else:
331 raise ValueError(f"{s} cannot be cast to bool")
333 scalars = [map_string(x) for x in strings]
334 return cls._from_sequence(scalars, dtype=dtype, copy=copy)
336 _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_)
338 @classmethod
339 def _coerce_to_array(
340 cls, value, *, dtype: DtypeObj, copy: bool = False
341 ) -> tuple[np.ndarray, np.ndarray]:
342 if dtype:
343 assert dtype == "boolean"
344 return coerce_to_array(value, copy=copy)
346 def _logical_method(self, other, op):
348 assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
349 other_is_scalar = lib.is_scalar(other)
350 mask = None
352 if isinstance(other, BooleanArray):
353 other, mask = other._data, other._mask
354 elif is_list_like(other):
355 other = np.asarray(other, dtype="bool")
356 if other.ndim > 1:
357 raise NotImplementedError("can only perform ops with 1-d structures")
358 other, mask = coerce_to_array(other, copy=False)
359 elif isinstance(other, np.bool_):
360 other = other.item()
362 if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other):
363 raise TypeError(
364 "'other' should be pandas.NA or a bool. "
365 f"Got {type(other).__name__} instead."
366 )
368 if not other_is_scalar and len(self) != len(other):
369 raise ValueError("Lengths must match")
371 if op.__name__ in {"or_", "ror_"}:
372 result, mask = ops.kleene_or(self._data, other, self._mask, mask)
373 elif op.__name__ in {"and_", "rand_"}:
374 result, mask = ops.kleene_and(self._data, other, self._mask, mask)
375 else:
376 # i.e. xor, rxor
377 result, mask = ops.kleene_xor(self._data, other, self._mask, mask)
379 # i.e. BooleanArray
380 return self._maybe_mask_result(result, mask)