Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/_mixins.py: 29%

204 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from functools import wraps 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7 Literal, 

8 Sequence, 

9 TypeVar, 

10 cast, 

11 overload, 

12) 

13 

14import numpy as np 

15 

16from pandas._libs import lib 

17from pandas._libs.arrays import NDArrayBacked 

18from pandas._typing import ( 

19 ArrayLike, 

20 Dtype, 

21 F, 

22 PositionalIndexer2D, 

23 PositionalIndexerTuple, 

24 ScalarIndexer, 

25 SequenceIndexer, 

26 Shape, 

27 TakeIndexer, 

28 npt, 

29 type_t, 

30) 

31from pandas.errors import AbstractMethodError 

32from pandas.util._decorators import doc 

33from pandas.util._validators import ( 

34 validate_bool_kwarg, 

35 validate_fillna_kwargs, 

36 validate_insert_loc, 

37) 

38 

39from pandas.core.dtypes.common import ( 

40 is_dtype_equal, 

41 pandas_dtype, 

42) 

43from pandas.core.dtypes.dtypes import ( 

44 DatetimeTZDtype, 

45 ExtensionDtype, 

46 PeriodDtype, 

47) 

48from pandas.core.dtypes.missing import array_equivalent 

49 

50from pandas.core import missing 

51from pandas.core.algorithms import ( 

52 take, 

53 unique, 

54 value_counts, 

55) 

56from pandas.core.array_algos.quantile import quantile_with_mask 

57from pandas.core.array_algos.transforms import shift 

58from pandas.core.arrays.base import ExtensionArray 

59from pandas.core.construction import extract_array 

60from pandas.core.indexers import check_array_indexer 

61from pandas.core.sorting import nargminmax 

62 

63NDArrayBackedExtensionArrayT = TypeVar( 

64 "NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray" 

65) 

66 

67if TYPE_CHECKING: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true

68 from pandas._typing import ( 

69 NumpySorter, 

70 NumpyValueArrayLike, 

71 ) 

72 

73 from pandas import Series 

74 

75 

76def ravel_compat(meth: F) -> F: 

77 """ 

78 Decorator to ravel a 2D array before passing it to a cython operation, 

79 then reshape the result to our own shape. 

80 """ 

81 

82 @wraps(meth) 

83 def method(self, *args, **kwargs): 

84 if self.ndim == 1: 

85 return meth(self, *args, **kwargs) 

86 

87 flags = self._ndarray.flags 

88 flat = self.ravel("K") 

89 result = meth(flat, *args, **kwargs) 

90 order = "F" if flags.f_contiguous else "C" 

91 return result.reshape(self.shape, order=order) 

92 

93 return cast(F, method) 

94 

95 

96class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): 

97 """ 

98 ExtensionArray that is backed by a single NumPy ndarray. 

99 """ 

100 

101 _ndarray: np.ndarray 

102 

103 # scalar used to denote NA value inside our self._ndarray, e.g. -1 

104 # for Categorical, iNaT for Period. Outside of object dtype, 

105 # self.isna() should be exactly locations in self._ndarray with 

106 # _internal_fill_value. 

107 _internal_fill_value: Any 

108 

109 def _box_func(self, x): 

110 """ 

111 Wrap numpy type in our dtype.type if necessary. 

112 """ 

113 return x 

114 

115 def _validate_scalar(self, value): 

116 # used by NDArrayBackedExtensionIndex.insert 

117 raise AbstractMethodError(self) 

118 

119 # ------------------------------------------------------------------------ 

120 

121 def view(self, dtype: Dtype | None = None) -> ArrayLike: 

122 # We handle datetime64, datetime64tz, timedelta64, and period 

123 # dtypes here. Everything else we pass through to the underlying 

124 # ndarray. 

125 if dtype is None or dtype is self.dtype: 

126 return self._from_backing_data(self._ndarray) 

127 

128 if isinstance(dtype, type): 

129 # we sometimes pass non-dtype objects, e.g np.ndarray; 

130 # pass those through to the underlying ndarray 

131 return self._ndarray.view(dtype) 

132 

133 dtype = pandas_dtype(dtype) 

134 arr = self._ndarray 

135 

136 if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)): 

137 cls = dtype.construct_array_type() 

138 return cls(arr.view("i8"), dtype=dtype) 

139 elif dtype == "M8[ns]": 

140 from pandas.core.arrays import DatetimeArray 

141 

142 return DatetimeArray(arr.view("i8"), dtype=dtype) 

143 elif dtype == "m8[ns]": 

144 from pandas.core.arrays import TimedeltaArray 

145 

146 return TimedeltaArray(arr.view("i8"), dtype=dtype) 

147 

148 # error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible 

149 # type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None, 

150 # type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, 

151 # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" 

152 return arr.view(dtype=dtype) # type: ignore[arg-type] 

153 

154 def take( 

155 self: NDArrayBackedExtensionArrayT, 

156 indices: TakeIndexer, 

157 *, 

158 allow_fill: bool = False, 

159 fill_value: Any = None, 

160 axis: int = 0, 

161 ) -> NDArrayBackedExtensionArrayT: 

162 if allow_fill: 

163 fill_value = self._validate_scalar(fill_value) 

164 

165 new_data = take( 

166 self._ndarray, 

167 indices, 

168 allow_fill=allow_fill, 

169 fill_value=fill_value, 

170 axis=axis, 

171 ) 

172 return self._from_backing_data(new_data) 

173 

174 # ------------------------------------------------------------------------ 

175 

176 def equals(self, other) -> bool: 

177 if type(self) is not type(other): 

178 return False 

179 if not is_dtype_equal(self.dtype, other.dtype): 

180 return False 

181 return bool(array_equivalent(self._ndarray, other._ndarray)) 

182 

183 @classmethod 

184 def _from_factorized(cls, values, original): 

185 assert values.dtype == original._ndarray.dtype 

186 return original._from_backing_data(values) 

187 

188 def _values_for_argsort(self) -> np.ndarray: 

189 return self._ndarray 

190 

191 def _values_for_factorize(self): 

192 return self._ndarray, self._internal_fill_value 

193 

194 # Signature of "argmin" incompatible with supertype "ExtensionArray" 

195 def argmin(self, axis: int = 0, skipna: bool = True): # type: ignore[override] 

196 # override base class by adding axis keyword 

197 validate_bool_kwarg(skipna, "skipna") 

198 if not skipna and self._hasna: 

199 raise NotImplementedError 

200 return nargminmax(self, "argmin", axis=axis) 

201 

202 # Signature of "argmax" incompatible with supertype "ExtensionArray" 

203 def argmax(self, axis: int = 0, skipna: bool = True): # type: ignore[override] 

204 # override base class by adding axis keyword 

205 validate_bool_kwarg(skipna, "skipna") 

206 if not skipna and self._hasna: 

207 raise NotImplementedError 

208 return nargminmax(self, "argmax", axis=axis) 

209 

210 def unique(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT: 

211 new_data = unique(self._ndarray) 

212 return self._from_backing_data(new_data) 

213 

214 @classmethod 

215 @doc(ExtensionArray._concat_same_type) 

216 def _concat_same_type( 

217 cls: type[NDArrayBackedExtensionArrayT], 

218 to_concat: Sequence[NDArrayBackedExtensionArrayT], 

219 axis: int = 0, 

220 ) -> NDArrayBackedExtensionArrayT: 

221 dtypes = {str(x.dtype) for x in to_concat} 

222 if len(dtypes) != 1: 

223 raise ValueError("to_concat must have the same dtype (tz)", dtypes) 

224 

225 new_values = [x._ndarray for x in to_concat] 

226 new_arr = np.concatenate(new_values, axis=axis) 

227 return to_concat[0]._from_backing_data(new_arr) 

228 

229 @doc(ExtensionArray.searchsorted) 

230 def searchsorted( 

231 self, 

232 value: NumpyValueArrayLike | ExtensionArray, 

233 side: Literal["left", "right"] = "left", 

234 sorter: NumpySorter = None, 

235 ) -> npt.NDArray[np.intp] | np.intp: 

236 # TODO(2.0): use _validate_setitem_value once dt64tz mismatched-timezone 

237 # deprecation is enforced 

238 npvalue = self._validate_searchsorted_value(value) 

239 return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter) 

240 

241 def _validate_searchsorted_value( 

242 self, value: NumpyValueArrayLike | ExtensionArray 

243 ) -> NumpyValueArrayLike: 

244 # TODO(2.0): after deprecation in datetimelikearraymixin is enforced, 

245 # we can remove this and use _validate_setitem_value directly 

246 if isinstance(value, ExtensionArray): 

247 return value.to_numpy() 

248 else: 

249 return value 

250 

251 @doc(ExtensionArray.shift) 

252 def shift(self, periods=1, fill_value=None, axis=0): 

253 

254 fill_value = self._validate_shift_value(fill_value) 

255 new_values = shift(self._ndarray, periods, axis, fill_value) 

256 

257 return self._from_backing_data(new_values) 

258 

259 def _validate_shift_value(self, fill_value): 

260 # TODO(2.0): after deprecation in datetimelikearraymixin is enforced, 

261 # we can remove this and use validate_fill_value directly 

262 return self._validate_scalar(fill_value) 

263 

264 def __setitem__(self, key, value) -> None: 

265 key = check_array_indexer(self, key) 

266 value = self._validate_setitem_value(value) 

267 self._ndarray[key] = value 

268 

269 def _validate_setitem_value(self, value): 

270 return value 

271 

272 @overload 

273 def __getitem__(self, key: ScalarIndexer) -> Any: 

274 ... 

275 

276 @overload 

277 def __getitem__( 

278 self: NDArrayBackedExtensionArrayT, 

279 key: SequenceIndexer | PositionalIndexerTuple, 

280 ) -> NDArrayBackedExtensionArrayT: 

281 ... 

282 

283 def __getitem__( 

284 self: NDArrayBackedExtensionArrayT, 

285 key: PositionalIndexer2D, 

286 ) -> NDArrayBackedExtensionArrayT | Any: 

287 if lib.is_integer(key): 

288 # fast-path 

289 result = self._ndarray[key] 

290 if self.ndim == 1: 

291 return self._box_func(result) 

292 return self._from_backing_data(result) 

293 

294 # error: Incompatible types in assignment (expression has type "ExtensionArray", 

295 # variable has type "Union[int, slice, ndarray]") 

296 key = extract_array(key, extract_numpy=True) # type: ignore[assignment] 

297 key = check_array_indexer(self, key) 

298 result = self._ndarray[key] 

299 if lib.is_scalar(result): 

300 return self._box_func(result) 

301 

302 result = self._from_backing_data(result) 

303 return result 

304 

305 def _fill_mask_inplace( 

306 self, method: str, limit, mask: npt.NDArray[np.bool_] 

307 ) -> None: 

308 # (for now) when self.ndim == 2, we assume axis=0 

309 func = missing.get_fill_func(method, ndim=self.ndim) 

310 func(self._ndarray.T, limit=limit, mask=mask.T) 

311 return 

312 

313 @doc(ExtensionArray.fillna) 

314 def fillna( 

315 self: NDArrayBackedExtensionArrayT, value=None, method=None, limit=None 

316 ) -> NDArrayBackedExtensionArrayT: 

317 value, method = validate_fillna_kwargs( 

318 value, method, validate_scalar_dict_value=False 

319 ) 

320 

321 mask = self.isna() 

322 # error: Argument 2 to "check_value_size" has incompatible type 

323 # "ExtensionArray"; expected "ndarray" 

324 value = missing.check_value_size( 

325 value, mask, len(self) # type: ignore[arg-type] 

326 ) 

327 

328 if mask.any(): 

329 if method is not None: 

330 # TODO: check value is None 

331 # (for now) when self.ndim == 2, we assume axis=0 

332 func = missing.get_fill_func(method, ndim=self.ndim) 

333 npvalues = self._ndarray.T.copy() 

334 func(npvalues, limit=limit, mask=mask.T) 

335 npvalues = npvalues.T 

336 

337 # TODO: PandasArray didn't used to copy, need tests for this 

338 new_values = self._from_backing_data(npvalues) 

339 else: 

340 # fill with value 

341 new_values = self.copy() 

342 new_values[mask] = value 

343 else: 

344 # We validate the fill_value even if there is nothing to fill 

345 if value is not None: 

346 self._validate_setitem_value(value) 

347 

348 new_values = self.copy() 

349 return new_values 

350 

351 # ------------------------------------------------------------------------ 

352 # Reductions 

353 

354 def _wrap_reduction_result(self, axis: int | None, result): 

355 if axis is None or self.ndim == 1: 

356 return self._box_func(result) 

357 return self._from_backing_data(result) 

358 

359 # ------------------------------------------------------------------------ 

360 # __array_function__ methods 

361 

362 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: 

363 """ 

364 Analogue to np.putmask(self, mask, value) 

365 

366 Parameters 

367 ---------- 

368 mask : np.ndarray[bool] 

369 value : scalar or listlike 

370 

371 Raises 

372 ------ 

373 TypeError 

374 If value cannot be cast to self.dtype. 

375 """ 

376 value = self._validate_setitem_value(value) 

377 

378 np.putmask(self._ndarray, mask, value) 

379 

380 def _where( 

381 self: NDArrayBackedExtensionArrayT, mask: npt.NDArray[np.bool_], value 

382 ) -> NDArrayBackedExtensionArrayT: 

383 """ 

384 Analogue to np.where(mask, self, value) 

385 

386 Parameters 

387 ---------- 

388 mask : np.ndarray[bool] 

389 value : scalar or listlike 

390 

391 Raises 

392 ------ 

393 TypeError 

394 If value cannot be cast to self.dtype. 

395 """ 

396 value = self._validate_setitem_value(value) 

397 

398 res_values = np.where(mask, self._ndarray, value) 

399 return self._from_backing_data(res_values) 

400 

401 # ------------------------------------------------------------------------ 

402 # Index compat methods 

403 

404 def insert( 

405 self: NDArrayBackedExtensionArrayT, loc: int, item 

406 ) -> NDArrayBackedExtensionArrayT: 

407 """ 

408 Make new ExtensionArray inserting new item at location. Follows 

409 Python list.append semantics for negative values. 

410 

411 Parameters 

412 ---------- 

413 loc : int 

414 item : object 

415 

416 Returns 

417 ------- 

418 type(self) 

419 """ 

420 loc = validate_insert_loc(loc, len(self)) 

421 

422 code = self._validate_scalar(item) 

423 

424 new_vals = np.concatenate( 

425 ( 

426 self._ndarray[:loc], 

427 np.asarray([code], dtype=self._ndarray.dtype), 

428 self._ndarray[loc:], 

429 ) 

430 ) 

431 return self._from_backing_data(new_vals) 

432 

433 # ------------------------------------------------------------------------ 

434 # Additional array methods 

435 # These are not part of the EA API, but we implement them because 

436 # pandas assumes they're there. 

437 

438 def value_counts(self, dropna: bool = True) -> Series: 

439 """ 

440 Return a Series containing counts of unique values. 

441 

442 Parameters 

443 ---------- 

444 dropna : bool, default True 

445 Don't include counts of NA values. 

446 

447 Returns 

448 ------- 

449 Series 

450 """ 

451 if self.ndim != 1: 

452 raise NotImplementedError 

453 

454 from pandas import ( 

455 Index, 

456 Series, 

457 ) 

458 

459 if dropna: 

460 # error: Unsupported operand type for ~ ("ExtensionArray") 

461 values = self[~self.isna()]._ndarray # type: ignore[operator] 

462 else: 

463 values = self._ndarray 

464 

465 result = value_counts(values, sort=False, dropna=dropna) 

466 

467 index_arr = self._from_backing_data(np.asarray(result.index._data)) 

468 index = Index(index_arr, name=result.index.name) 

469 return Series(result._values, index=index, name=result.name) 

470 

471 def _quantile( 

472 self: NDArrayBackedExtensionArrayT, 

473 qs: npt.NDArray[np.float64], 

474 interpolation: str, 

475 ) -> NDArrayBackedExtensionArrayT: 

476 # TODO: disable for Categorical if not ordered? 

477 

478 mask = np.asarray(self.isna()) 

479 arr = self._ndarray 

480 fill_value = self._internal_fill_value 

481 

482 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation) 

483 

484 res_values = self._cast_quantile_result(res_values) 

485 return self._from_backing_data(res_values) 

486 

487 # TODO: see if we can share this with other dispatch-wrapping methods 

488 def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray: 

489 """ 

490 Cast the result of quantile_with_mask to an appropriate dtype 

491 to pass to _from_backing_data in _quantile. 

492 """ 

493 return res_values 

494 

495 # ------------------------------------------------------------------------ 

496 # numpy-like methods 

497 

498 @classmethod 

499 def _empty( 

500 cls: type_t[NDArrayBackedExtensionArrayT], shape: Shape, dtype: ExtensionDtype 

501 ) -> NDArrayBackedExtensionArrayT: 

502 """ 

503 Analogous to np.empty(shape, dtype=dtype) 

504 

505 Parameters 

506 ---------- 

507 shape : tuple[int] 

508 dtype : ExtensionDtype 

509 """ 

510 # The base implementation uses a naive approach to find the dtype 

511 # for the backing ndarray 

512 arr = cls._from_sequence([], dtype=dtype) 

513 backing = np.empty(shape, dtype=arr._ndarray.dtype) 

514 return arr._from_backing_data(backing)