Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/dtypes/astype.py: 11%

150 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Functions for implementing 'astype' methods according to pandas conventions, 

3particularly ones that differ from numpy. 

4""" 

5from __future__ import annotations 

6 

7import inspect 

8from typing import ( 

9 TYPE_CHECKING, 

10 cast, 

11 overload, 

12) 

13import warnings 

14 

15import numpy as np 

16 

17from pandas._libs import lib 

18from pandas._libs.tslibs import is_unitless 

19from pandas._libs.tslibs.timedeltas import array_to_timedelta64 

20from pandas._typing import ( 

21 ArrayLike, 

22 DtypeObj, 

23 IgnoreRaise, 

24) 

25from pandas.errors import IntCastingNaNError 

26from pandas.util._exceptions import find_stack_level 

27 

28from pandas.core.dtypes.common import ( 

29 is_datetime64_dtype, 

30 is_datetime64tz_dtype, 

31 is_dtype_equal, 

32 is_integer_dtype, 

33 is_object_dtype, 

34 is_timedelta64_dtype, 

35 pandas_dtype, 

36) 

37from pandas.core.dtypes.dtypes import ( 

38 DatetimeTZDtype, 

39 ExtensionDtype, 

40 PandasDtype, 

41) 

42from pandas.core.dtypes.missing import isna 

43 

44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 from pandas.core.arrays import ( 

46 DatetimeArray, 

47 ExtensionArray, 

48 ) 

49 

50 

51_dtype_obj = np.dtype(object) 

52 

53 

54@overload 

55def astype_nansafe( 

56 arr: np.ndarray, dtype: np.dtype, copy: bool = ..., skipna: bool = ... 

57) -> np.ndarray: 

58 ... 

59 

60 

61@overload 

62def astype_nansafe( 

63 arr: np.ndarray, dtype: ExtensionDtype, copy: bool = ..., skipna: bool = ... 

64) -> ExtensionArray: 

65 ... 

66 

67 

68def astype_nansafe( 

69 arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False 

70) -> ArrayLike: 

71 """ 

72 Cast the elements of an array to a given dtype a nan-safe manner. 

73 

74 Parameters 

75 ---------- 

76 arr : ndarray 

77 dtype : np.dtype or ExtensionDtype 

78 copy : bool, default True 

79 If False, a view will be attempted but may fail, if 

80 e.g. the item sizes don't align. 

81 skipna: bool, default False 

82 Whether or not we should skip NaN when casting as a string-type. 

83 

84 Raises 

85 ------ 

86 ValueError 

87 The dtype was a datetime64/timedelta64 dtype, but it had no unit. 

88 """ 

89 

90 # We get here with 0-dim from sparse 

91 arr = np.atleast_1d(arr) 

92 

93 # dispatch on extension dtype if needed 

94 if isinstance(dtype, ExtensionDtype): 

95 return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) 

96 

97 elif not isinstance(dtype, np.dtype): # pragma: no cover 

98 raise ValueError("dtype must be np.dtype or ExtensionDtype") 

99 

100 if arr.dtype.kind in ["m", "M"] and ( 

101 issubclass(dtype.type, str) or dtype == _dtype_obj 

102 ): 

103 from pandas.core.construction import ensure_wrapped_if_datetimelike 

104 

105 arr = ensure_wrapped_if_datetimelike(arr) 

106 return arr.astype(dtype, copy=copy) 

107 

108 if issubclass(dtype.type, str): 

109 shape = arr.shape 

110 if arr.ndim > 1: 

111 arr = arr.ravel() 

112 return lib.ensure_string_array( 

113 arr, skipna=skipna, convert_na_value=False 

114 ).reshape(shape) 

115 

116 elif is_datetime64_dtype(arr.dtype): 

117 if dtype == np.int64: 

118 if isna(arr).any(): 

119 raise ValueError("Cannot convert NaT values to integer") 

120 return arr.view(dtype) 

121 

122 # allow frequency conversions 

123 if dtype.kind == "M": 

124 return arr.astype(dtype) 

125 

126 raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") 

127 

128 elif is_timedelta64_dtype(arr.dtype): 

129 if dtype == np.int64: 

130 if isna(arr).any(): 

131 raise ValueError("Cannot convert NaT values to integer") 

132 return arr.view(dtype) 

133 

134 elif dtype.kind == "m": 

135 return astype_td64_unit_conversion(arr, dtype, copy=copy) 

136 

137 raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") 

138 

139 elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype): 

140 return _astype_float_to_int_nansafe(arr, dtype, copy) 

141 

142 elif is_object_dtype(arr.dtype): 

143 

144 # if we have a datetime/timedelta array of objects 

145 # then coerce to a proper dtype and recall astype_nansafe 

146 

147 if is_datetime64_dtype(dtype): 

148 from pandas import to_datetime 

149 

150 return astype_nansafe( 

151 to_datetime(arr.ravel()).values.reshape(arr.shape), 

152 dtype, 

153 copy=copy, 

154 ) 

155 elif is_timedelta64_dtype(dtype): 

156 # bc we know arr.dtype == object, this is equivalent to 

157 # `np.asarray(to_timedelta(arr))`, but using a lower-level API that 

158 # does not require a circular import. 

159 return array_to_timedelta64(arr).view("m8[ns]").astype(dtype, copy=False) 

160 

161 if dtype.name in ("datetime64", "timedelta64"): 

162 msg = ( 

163 f"The '{dtype.name}' dtype has no unit. Please pass in " 

164 f"'{dtype.name}[ns]' instead." 

165 ) 

166 raise ValueError(msg) 

167 

168 if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype): 

169 # Explicit copy, or required since NumPy can't view from / to object. 

170 return arr.astype(dtype, copy=True) 

171 

172 return arr.astype(dtype, copy=copy) 

173 

174 

175def _astype_float_to_int_nansafe( 

176 values: np.ndarray, dtype: np.dtype, copy: bool 

177) -> np.ndarray: 

178 """ 

179 astype with a check preventing converting NaN to an meaningless integer value. 

180 """ 

181 if not np.isfinite(values).all(): 

182 raise IntCastingNaNError( 

183 "Cannot convert non-finite values (NA or inf) to integer" 

184 ) 

185 if dtype.kind == "u": 

186 # GH#45151 

187 if not (values >= 0).all(): 

188 raise ValueError(f"Cannot losslessly cast from {values.dtype} to {dtype}") 

189 return values.astype(dtype, copy=copy) 

190 

191 

192def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike: 

193 """ 

194 Cast array (ndarray or ExtensionArray) to the new dtype. 

195 

196 Parameters 

197 ---------- 

198 values : ndarray or ExtensionArray 

199 dtype : dtype object 

200 copy : bool, default False 

201 copy if indicated 

202 

203 Returns 

204 ------- 

205 ndarray or ExtensionArray 

206 """ 

207 if ( 

208 values.dtype.kind in ["m", "M"] 

209 and dtype.kind in ["i", "u"] 

210 and isinstance(dtype, np.dtype) 

211 and dtype.itemsize != 8 

212 ): 

213 # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced 

214 msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" 

215 raise TypeError(msg) 

216 

217 if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): 

218 return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) 

219 

220 if is_dtype_equal(values.dtype, dtype): 

221 if copy: 

222 return values.copy() 

223 return values 

224 

225 if not isinstance(values, np.ndarray): 

226 # i.e. ExtensionArray 

227 values = values.astype(dtype, copy=copy) 

228 

229 else: 

230 values = astype_nansafe(values, dtype, copy=copy) 

231 

232 # in pandas we don't store numpy str dtypes, so convert to object 

233 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str): 

234 values = np.array(values, dtype=object) 

235 

236 return values 

237 

238 

239def astype_array_safe( 

240 values: ArrayLike, dtype, copy: bool = False, errors: IgnoreRaise = "raise" 

241) -> ArrayLike: 

242 """ 

243 Cast array (ndarray or ExtensionArray) to the new dtype. 

244 

245 This basically is the implementation for DataFrame/Series.astype and 

246 includes all custom logic for pandas (NaN-safety, converting str to object, 

247 not allowing ) 

248 

249 Parameters 

250 ---------- 

251 values : ndarray or ExtensionArray 

252 dtype : str, dtype convertible 

253 copy : bool, default False 

254 copy if indicated 

255 errors : str, {'raise', 'ignore'}, default 'raise' 

256 - ``raise`` : allow exceptions to be raised 

257 - ``ignore`` : suppress exceptions. On error return original object 

258 

259 Returns 

260 ------- 

261 ndarray or ExtensionArray 

262 """ 

263 errors_legal_values = ("raise", "ignore") 

264 

265 if errors not in errors_legal_values: 

266 invalid_arg = ( 

267 "Expected value of kwarg 'errors' to be one of " 

268 f"{list(errors_legal_values)}. Supplied value is '{errors}'" 

269 ) 

270 raise ValueError(invalid_arg) 

271 

272 if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): 

273 msg = ( 

274 f"Expected an instance of {dtype.__name__}, " 

275 "but got the class instead. Try instantiating 'dtype'." 

276 ) 

277 raise TypeError(msg) 

278 

279 dtype = pandas_dtype(dtype) 

280 if isinstance(dtype, PandasDtype): 

281 # Ensure we don't end up with a PandasArray 

282 dtype = dtype.numpy_dtype 

283 

284 if ( 

285 is_datetime64_dtype(values.dtype) 

286 # need to do np.dtype check instead of is_datetime64_dtype 

287 # otherwise pyright complains 

288 and isinstance(dtype, np.dtype) 

289 and dtype.kind == "M" 

290 and not is_unitless(dtype) 

291 and not is_dtype_equal(dtype, values.dtype) 

292 ): 

293 # unit conversion, we would re-cast to nanosecond, so this is 

294 # effectively just a copy (regardless of copy kwd) 

295 # TODO(2.0): remove special-case 

296 return values.copy() 

297 

298 try: 

299 new_values = astype_array(values, dtype, copy=copy) 

300 except (ValueError, TypeError): 

301 # e.g. astype_nansafe can fail on object-dtype of strings 

302 # trying to convert to float 

303 if errors == "ignore": 

304 new_values = values 

305 else: 

306 raise 

307 

308 return new_values 

309 

310 

311def astype_td64_unit_conversion( 

312 values: np.ndarray, dtype: np.dtype, copy: bool 

313) -> np.ndarray: 

314 """ 

315 By pandas convention, converting to non-nano timedelta64 

316 returns an int64-dtyped array with ints representing multiples 

317 of the desired timedelta unit. This is essentially division. 

318 

319 Parameters 

320 ---------- 

321 values : np.ndarray[timedelta64[ns]] 

322 dtype : np.dtype 

323 timedelta64 with unit not-necessarily nano 

324 copy : bool 

325 

326 Returns 

327 ------- 

328 np.ndarray 

329 """ 

330 if is_dtype_equal(values.dtype, dtype): 

331 if copy: 

332 return values.copy() 

333 return values 

334 

335 # otherwise we are converting to non-nano 

336 result = values.astype(dtype, copy=False) # avoid double-copying 

337 result = result.astype(np.float64) 

338 

339 mask = isna(values) 

340 np.putmask(result, mask, np.nan) 

341 return result 

342 

343 

344def astype_dt64_to_dt64tz( 

345 values: ArrayLike, dtype: DtypeObj, copy: bool, via_utc: bool = False 

346) -> DatetimeArray: 

347 # GH#33401 we have inconsistent behaviors between 

348 # Datetimeindex[naive].astype(tzaware) 

349 # Series[dt64].astype(tzaware) 

350 # This collects them in one place to prevent further fragmentation. 

351 

352 from pandas.core.construction import ensure_wrapped_if_datetimelike 

353 

354 values = ensure_wrapped_if_datetimelike(values) 

355 values = cast("DatetimeArray", values) 

356 aware = isinstance(dtype, DatetimeTZDtype) 

357 

358 if via_utc: 

359 # Series.astype behavior 

360 

361 # caller is responsible for checking this 

362 assert values.tz is None and aware 

363 dtype = cast(DatetimeTZDtype, dtype) 

364 

365 if copy: 

366 # this should be the only copy 

367 values = values.copy() 

368 

369 warnings.warn( 

370 "Using .astype to convert from timezone-naive dtype to " 

371 "timezone-aware dtype is deprecated and will raise in a " 

372 "future version. Use ser.dt.tz_localize instead.", 

373 FutureWarning, 

374 stacklevel=find_stack_level(), 

375 ) 

376 

377 # GH#33401 this doesn't match DatetimeArray.astype, which 

378 # goes through the `not via_utc` path 

379 return values.tz_localize("UTC").tz_convert(dtype.tz) 

380 

381 else: 

382 # DatetimeArray/DatetimeIndex.astype behavior 

383 if values.tz is None and aware: 

384 dtype = cast(DatetimeTZDtype, dtype) 

385 warnings.warn( 

386 "Using .astype to convert from timezone-naive dtype to " 

387 "timezone-aware dtype is deprecated and will raise in a " 

388 "future version. Use obj.tz_localize instead.", 

389 FutureWarning, 

390 stacklevel=find_stack_level(), 

391 ) 

392 

393 return values.tz_localize(dtype.tz) 

394 

395 elif aware: 

396 # GH#18951: datetime64_tz dtype but not equal means different tz 

397 dtype = cast(DatetimeTZDtype, dtype) 

398 result = values.tz_convert(dtype.tz) 

399 if copy: 

400 result = result.copy() 

401 return result 

402 

403 elif values.tz is not None: 

404 warnings.warn( 

405 "Using .astype to convert from timezone-aware dtype to " 

406 "timezone-naive dtype is deprecated and will raise in a " 

407 "future version. Use obj.tz_localize(None) or " 

408 "obj.tz_convert('UTC').tz_localize(None) instead", 

409 FutureWarning, 

410 stacklevel=find_stack_level(), 

411 ) 

412 

413 result = values.tz_convert("UTC").tz_localize(None) 

414 if copy: 

415 result = result.copy() 

416 return result 

417 

418 raise NotImplementedError("dtype_equal case should be handled elsewhere")