Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/boolean.py: 21%

159 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import numbers 

4from typing import ( 

5 TYPE_CHECKING, 

6 cast, 

7) 

8 

9import numpy as np 

10 

11from pandas._libs import ( 

12 lib, 

13 missing as libmissing, 

14) 

15from pandas._typing import ( 

16 Dtype, 

17 DtypeObj, 

18 type_t, 

19) 

20 

21from pandas.core.dtypes.common import ( 

22 is_list_like, 

23 is_numeric_dtype, 

24) 

25from pandas.core.dtypes.dtypes import register_extension_dtype 

26from pandas.core.dtypes.missing import isna 

27 

28from pandas.core import ops 

29from pandas.core.arrays.masked import ( 

30 BaseMaskedArray, 

31 BaseMaskedDtype, 

32) 

33 

34if TYPE_CHECKING: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true

35 import pyarrow 

36 

37 from pandas._typing import npt 

38 

39 

40@register_extension_dtype 

41class BooleanDtype(BaseMaskedDtype): 

42 """ 

43 Extension dtype for boolean data. 

44 

45 .. versionadded:: 1.0.0 

46 

47 .. warning:: 

48 

49 BooleanDtype is considered experimental. The implementation and 

50 parts of the API may change without warning. 

51 

52 Attributes 

53 ---------- 

54 None 

55 

56 Methods 

57 ------- 

58 None 

59 

60 Examples 

61 -------- 

62 >>> pd.BooleanDtype() 

63 BooleanDtype 

64 """ 

65 

66 name = "boolean" 

67 

68 # https://github.com/python/mypy/issues/4125 

69 # error: Signature of "type" incompatible with supertype "BaseMaskedDtype" 

70 @property 

71 def type(self) -> type: # type: ignore[override] 

72 return np.bool_ 

73 

74 @property 

75 def kind(self) -> str: 

76 return "b" 

77 

78 @property 

79 def numpy_dtype(self) -> np.dtype: 

80 return np.dtype("bool") 

81 

82 @classmethod 

83 def construct_array_type(cls) -> type_t[BooleanArray]: 

84 """ 

85 Return the array type associated with this dtype. 

86 

87 Returns 

88 ------- 

89 type 

90 """ 

91 return BooleanArray 

92 

93 def __repr__(self) -> str: 

94 return "BooleanDtype" 

95 

96 @property 

97 def _is_boolean(self) -> bool: 

98 return True 

99 

100 @property 

101 def _is_numeric(self) -> bool: 

102 return True 

103 

104 def __from_arrow__( 

105 self, array: pyarrow.Array | pyarrow.ChunkedArray 

106 ) -> BooleanArray: 

107 """ 

108 Construct BooleanArray from pyarrow Array/ChunkedArray. 

109 """ 

110 import pyarrow 

111 

112 if array.type != pyarrow.bool_(): 

113 raise TypeError(f"Expected array of boolean type, got {array.type} instead") 

114 

115 if isinstance(array, pyarrow.Array): 

116 chunks = [array] 

117 else: 

118 # pyarrow.ChunkedArray 

119 chunks = array.chunks 

120 

121 results = [] 

122 for arr in chunks: 

123 buflist = arr.buffers() 

124 data = pyarrow.BooleanArray.from_buffers( 

125 arr.type, len(arr), [None, buflist[1]], offset=arr.offset 

126 ).to_numpy(zero_copy_only=False) 

127 if arr.null_count != 0: 

128 mask = pyarrow.BooleanArray.from_buffers( 

129 arr.type, len(arr), [None, buflist[0]], offset=arr.offset 

130 ).to_numpy(zero_copy_only=False) 

131 mask = ~mask 

132 else: 

133 mask = np.zeros(len(arr), dtype=bool) 

134 

135 bool_arr = BooleanArray(data, mask) 

136 results.append(bool_arr) 

137 

138 if not results: 

139 return BooleanArray( 

140 np.array([], dtype=np.bool_), np.array([], dtype=np.bool_) 

141 ) 

142 else: 

143 return BooleanArray._concat_same_type(results) 

144 

145 

146def coerce_to_array( 

147 values, mask=None, copy: bool = False 

148) -> tuple[np.ndarray, np.ndarray]: 

149 """ 

150 Coerce the input values array to numpy arrays with a mask. 

151 

152 Parameters 

153 ---------- 

154 values : 1D list-like 

155 mask : bool 1D array, optional 

156 copy : bool, default False 

157 if True, copy the input 

158 

159 Returns 

160 ------- 

161 tuple of (values, mask) 

162 """ 

163 if isinstance(values, BooleanArray): 

164 if mask is not None: 

165 raise ValueError("cannot pass mask for BooleanArray input") 

166 values, mask = values._data, values._mask 

167 if copy: 

168 values = values.copy() 

169 mask = mask.copy() 

170 return values, mask 

171 

172 mask_values = None 

173 if isinstance(values, np.ndarray) and values.dtype == np.bool_: 

174 if copy: 

175 values = values.copy() 

176 elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype): 

177 mask_values = isna(values) 

178 

179 values_bool = np.zeros(len(values), dtype=bool) 

180 values_bool[~mask_values] = values[~mask_values].astype(bool) 

181 

182 if not np.all( 

183 values_bool[~mask_values].astype(values.dtype) == values[~mask_values] 

184 ): 

185 raise TypeError("Need to pass bool-like values") 

186 

187 values = values_bool 

188 else: 

189 values_object = np.asarray(values, dtype=object) 

190 

191 inferred_dtype = lib.infer_dtype(values_object, skipna=True) 

192 integer_like = ("floating", "integer", "mixed-integer-float") 

193 if inferred_dtype not in ("boolean", "empty") + integer_like: 

194 raise TypeError("Need to pass bool-like values") 

195 

196 # mypy does not narrow the type of mask_values to npt.NDArray[np.bool_] 

197 # within this branch, it assumes it can also be None 

198 mask_values = cast("npt.NDArray[np.bool_]", isna(values_object)) 

199 values = np.zeros(len(values), dtype=bool) 

200 values[~mask_values] = values_object[~mask_values].astype(bool) 

201 

202 # if the values were integer-like, validate it were actually 0/1's 

203 if (inferred_dtype in integer_like) and not ( 

204 np.all( 

205 values[~mask_values].astype(float) 

206 == values_object[~mask_values].astype(float) 

207 ) 

208 ): 

209 raise TypeError("Need to pass bool-like values") 

210 

211 if mask is None and mask_values is None: 

212 mask = np.zeros(values.shape, dtype=bool) 

213 elif mask is None: 

214 mask = mask_values 

215 else: 

216 if isinstance(mask, np.ndarray) and mask.dtype == np.bool_: 

217 if mask_values is not None: 

218 mask = mask | mask_values 

219 else: 

220 if copy: 

221 mask = mask.copy() 

222 else: 

223 mask = np.array(mask, dtype=bool) 

224 if mask_values is not None: 

225 mask = mask | mask_values 

226 

227 if values.shape != mask.shape: 

228 raise ValueError("values.shape and mask.shape must match") 

229 

230 return values, mask 

231 

232 

233class BooleanArray(BaseMaskedArray): 

234 """ 

235 Array of boolean (True/False) data with missing values. 

236 

237 This is a pandas Extension array for boolean data, under the hood 

238 represented by 2 numpy arrays: a boolean array with the data and 

239 a boolean array with the mask (True indicating missing). 

240 

241 BooleanArray implements Kleene logic (sometimes called three-value 

242 logic) for logical operations. See :ref:`boolean.kleene` for more. 

243 

244 To construct an BooleanArray from generic array-like input, use 

245 :func:`pandas.array` specifying ``dtype="boolean"`` (see examples 

246 below). 

247 

248 .. versionadded:: 1.0.0 

249 

250 .. warning:: 

251 

252 BooleanArray is considered experimental. The implementation and 

253 parts of the API may change without warning. 

254 

255 Parameters 

256 ---------- 

257 values : numpy.ndarray 

258 A 1-d boolean-dtype array with the data. 

259 mask : numpy.ndarray 

260 A 1-d boolean-dtype array indicating missing values (True 

261 indicates missing). 

262 copy : bool, default False 

263 Whether to copy the `values` and `mask` arrays. 

264 

265 Attributes 

266 ---------- 

267 None 

268 

269 Methods 

270 ------- 

271 None 

272 

273 Returns 

274 ------- 

275 BooleanArray 

276 

277 Examples 

278 -------- 

279 Create an BooleanArray with :func:`pandas.array`: 

280 

281 >>> pd.array([True, False, None], dtype="boolean") 

282 <BooleanArray> 

283 [True, False, <NA>] 

284 Length: 3, dtype: boolean 

285 """ 

286 

287 # The value used to fill '_data' to avoid upcasting 

288 _internal_fill_value = False 

289 # Fill values used for any/all 

290 _truthy_value = True 

291 _falsey_value = False 

292 _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} 

293 _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} 

294 

295 def __init__( 

296 self, values: np.ndarray, mask: np.ndarray, copy: bool = False 

297 ) -> None: 

298 if not (isinstance(values, np.ndarray) and values.dtype == np.bool_): 

299 raise TypeError( 

300 "values should be boolean numpy array. Use " 

301 "the 'pd.array' function instead" 

302 ) 

303 self._dtype = BooleanDtype() 

304 super().__init__(values, mask, copy=copy) 

305 

306 @property 

307 def dtype(self) -> BooleanDtype: 

308 return self._dtype 

309 

310 @classmethod 

311 def _from_sequence_of_strings( 

312 cls, 

313 strings: list[str], 

314 *, 

315 dtype: Dtype | None = None, 

316 copy: bool = False, 

317 true_values: list[str] | None = None, 

318 false_values: list[str] | None = None, 

319 ) -> BooleanArray: 

320 true_values_union = cls._TRUE_VALUES.union(true_values or []) 

321 false_values_union = cls._FALSE_VALUES.union(false_values or []) 

322 

323 def map_string(s): 

324 if isna(s): 

325 return s 

326 elif s in true_values_union: 

327 return True 

328 elif s in false_values_union: 

329 return False 

330 else: 

331 raise ValueError(f"{s} cannot be cast to bool") 

332 

333 scalars = [map_string(x) for x in strings] 

334 return cls._from_sequence(scalars, dtype=dtype, copy=copy) 

335 

336 _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) 

337 

338 @classmethod 

339 def _coerce_to_array( 

340 cls, value, *, dtype: DtypeObj, copy: bool = False 

341 ) -> tuple[np.ndarray, np.ndarray]: 

342 if dtype: 

343 assert dtype == "boolean" 

344 return coerce_to_array(value, copy=copy) 

345 

346 def _logical_method(self, other, op): 

347 

348 assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} 

349 other_is_scalar = lib.is_scalar(other) 

350 mask = None 

351 

352 if isinstance(other, BooleanArray): 

353 other, mask = other._data, other._mask 

354 elif is_list_like(other): 

355 other = np.asarray(other, dtype="bool") 

356 if other.ndim > 1: 

357 raise NotImplementedError("can only perform ops with 1-d structures") 

358 other, mask = coerce_to_array(other, copy=False) 

359 elif isinstance(other, np.bool_): 

360 other = other.item() 

361 

362 if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other): 

363 raise TypeError( 

364 "'other' should be pandas.NA or a bool. " 

365 f"Got {type(other).__name__} instead." 

366 ) 

367 

368 if not other_is_scalar and len(self) != len(other): 

369 raise ValueError("Lengths must match") 

370 

371 if op.__name__ in {"or_", "ror_"}: 

372 result, mask = ops.kleene_or(self._data, other, self._mask, mask) 

373 elif op.__name__ in {"and_", "rand_"}: 

374 result, mask = ops.kleene_and(self._data, other, self._mask, mask) 

375 else: 

376 # i.e. xor, rxor 

377 result, mask = ops.kleene_xor(self._data, other, self._mask, mask) 

378 

379 # i.e. BooleanArray 

380 return self._maybe_mask_result(result, mask)