Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/dtypes/astype.py: 11%
150 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Functions for implementing 'astype' methods according to pandas conventions,
3particularly ones that differ from numpy.
4"""
5from __future__ import annotations
7import inspect
8from typing import (
9 TYPE_CHECKING,
10 cast,
11 overload,
12)
13import warnings
15import numpy as np
17from pandas._libs import lib
18from pandas._libs.tslibs import is_unitless
19from pandas._libs.tslibs.timedeltas import array_to_timedelta64
20from pandas._typing import (
21 ArrayLike,
22 DtypeObj,
23 IgnoreRaise,
24)
25from pandas.errors import IntCastingNaNError
26from pandas.util._exceptions import find_stack_level
28from pandas.core.dtypes.common import (
29 is_datetime64_dtype,
30 is_datetime64tz_dtype,
31 is_dtype_equal,
32 is_integer_dtype,
33 is_object_dtype,
34 is_timedelta64_dtype,
35 pandas_dtype,
36)
37from pandas.core.dtypes.dtypes import (
38 DatetimeTZDtype,
39 ExtensionDtype,
40 PandasDtype,
41)
42from pandas.core.dtypes.missing import isna
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from pandas.core.arrays import (
46 DatetimeArray,
47 ExtensionArray,
48 )
51_dtype_obj = np.dtype(object)
54@overload
55def astype_nansafe(
56 arr: np.ndarray, dtype: np.dtype, copy: bool = ..., skipna: bool = ...
57) -> np.ndarray:
58 ...
61@overload
62def astype_nansafe(
63 arr: np.ndarray, dtype: ExtensionDtype, copy: bool = ..., skipna: bool = ...
64) -> ExtensionArray:
65 ...
68def astype_nansafe(
69 arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False
70) -> ArrayLike:
71 """
72 Cast the elements of an array to a given dtype a nan-safe manner.
74 Parameters
75 ----------
76 arr : ndarray
77 dtype : np.dtype or ExtensionDtype
78 copy : bool, default True
79 If False, a view will be attempted but may fail, if
80 e.g. the item sizes don't align.
81 skipna: bool, default False
82 Whether or not we should skip NaN when casting as a string-type.
84 Raises
85 ------
86 ValueError
87 The dtype was a datetime64/timedelta64 dtype, but it had no unit.
88 """
90 # We get here with 0-dim from sparse
91 arr = np.atleast_1d(arr)
93 # dispatch on extension dtype if needed
94 if isinstance(dtype, ExtensionDtype):
95 return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)
97 elif not isinstance(dtype, np.dtype): # pragma: no cover
98 raise ValueError("dtype must be np.dtype or ExtensionDtype")
100 if arr.dtype.kind in ["m", "M"] and (
101 issubclass(dtype.type, str) or dtype == _dtype_obj
102 ):
103 from pandas.core.construction import ensure_wrapped_if_datetimelike
105 arr = ensure_wrapped_if_datetimelike(arr)
106 return arr.astype(dtype, copy=copy)
108 if issubclass(dtype.type, str):
109 shape = arr.shape
110 if arr.ndim > 1:
111 arr = arr.ravel()
112 return lib.ensure_string_array(
113 arr, skipna=skipna, convert_na_value=False
114 ).reshape(shape)
116 elif is_datetime64_dtype(arr.dtype):
117 if dtype == np.int64:
118 if isna(arr).any():
119 raise ValueError("Cannot convert NaT values to integer")
120 return arr.view(dtype)
122 # allow frequency conversions
123 if dtype.kind == "M":
124 return arr.astype(dtype)
126 raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")
128 elif is_timedelta64_dtype(arr.dtype):
129 if dtype == np.int64:
130 if isna(arr).any():
131 raise ValueError("Cannot convert NaT values to integer")
132 return arr.view(dtype)
134 elif dtype.kind == "m":
135 return astype_td64_unit_conversion(arr, dtype, copy=copy)
137 raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")
139 elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype):
140 return _astype_float_to_int_nansafe(arr, dtype, copy)
142 elif is_object_dtype(arr.dtype):
144 # if we have a datetime/timedelta array of objects
145 # then coerce to a proper dtype and recall astype_nansafe
147 if is_datetime64_dtype(dtype):
148 from pandas import to_datetime
150 return astype_nansafe(
151 to_datetime(arr.ravel()).values.reshape(arr.shape),
152 dtype,
153 copy=copy,
154 )
155 elif is_timedelta64_dtype(dtype):
156 # bc we know arr.dtype == object, this is equivalent to
157 # `np.asarray(to_timedelta(arr))`, but using a lower-level API that
158 # does not require a circular import.
159 return array_to_timedelta64(arr).view("m8[ns]").astype(dtype, copy=False)
161 if dtype.name in ("datetime64", "timedelta64"):
162 msg = (
163 f"The '{dtype.name}' dtype has no unit. Please pass in "
164 f"'{dtype.name}[ns]' instead."
165 )
166 raise ValueError(msg)
168 if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype):
169 # Explicit copy, or required since NumPy can't view from / to object.
170 return arr.astype(dtype, copy=True)
172 return arr.astype(dtype, copy=copy)
175def _astype_float_to_int_nansafe(
176 values: np.ndarray, dtype: np.dtype, copy: bool
177) -> np.ndarray:
178 """
179 astype with a check preventing converting NaN to an meaningless integer value.
180 """
181 if not np.isfinite(values).all():
182 raise IntCastingNaNError(
183 "Cannot convert non-finite values (NA or inf) to integer"
184 )
185 if dtype.kind == "u":
186 # GH#45151
187 if not (values >= 0).all():
188 raise ValueError(f"Cannot losslessly cast from {values.dtype} to {dtype}")
189 return values.astype(dtype, copy=copy)
192def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike:
193 """
194 Cast array (ndarray or ExtensionArray) to the new dtype.
196 Parameters
197 ----------
198 values : ndarray or ExtensionArray
199 dtype : dtype object
200 copy : bool, default False
201 copy if indicated
203 Returns
204 -------
205 ndarray or ExtensionArray
206 """
207 if (
208 values.dtype.kind in ["m", "M"]
209 and dtype.kind in ["i", "u"]
210 and isinstance(dtype, np.dtype)
211 and dtype.itemsize != 8
212 ):
213 # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced
214 msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]"
215 raise TypeError(msg)
217 if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
218 return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True)
220 if is_dtype_equal(values.dtype, dtype):
221 if copy:
222 return values.copy()
223 return values
225 if not isinstance(values, np.ndarray):
226 # i.e. ExtensionArray
227 values = values.astype(dtype, copy=copy)
229 else:
230 values = astype_nansafe(values, dtype, copy=copy)
232 # in pandas we don't store numpy str dtypes, so convert to object
233 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
234 values = np.array(values, dtype=object)
236 return values
239def astype_array_safe(
240 values: ArrayLike, dtype, copy: bool = False, errors: IgnoreRaise = "raise"
241) -> ArrayLike:
242 """
243 Cast array (ndarray or ExtensionArray) to the new dtype.
245 This basically is the implementation for DataFrame/Series.astype and
246 includes all custom logic for pandas (NaN-safety, converting str to object,
247 not allowing )
249 Parameters
250 ----------
251 values : ndarray or ExtensionArray
252 dtype : str, dtype convertible
253 copy : bool, default False
254 copy if indicated
255 errors : str, {'raise', 'ignore'}, default 'raise'
256 - ``raise`` : allow exceptions to be raised
257 - ``ignore`` : suppress exceptions. On error return original object
259 Returns
260 -------
261 ndarray or ExtensionArray
262 """
263 errors_legal_values = ("raise", "ignore")
265 if errors not in errors_legal_values:
266 invalid_arg = (
267 "Expected value of kwarg 'errors' to be one of "
268 f"{list(errors_legal_values)}. Supplied value is '{errors}'"
269 )
270 raise ValueError(invalid_arg)
272 if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype):
273 msg = (
274 f"Expected an instance of {dtype.__name__}, "
275 "but got the class instead. Try instantiating 'dtype'."
276 )
277 raise TypeError(msg)
279 dtype = pandas_dtype(dtype)
280 if isinstance(dtype, PandasDtype):
281 # Ensure we don't end up with a PandasArray
282 dtype = dtype.numpy_dtype
284 if (
285 is_datetime64_dtype(values.dtype)
286 # need to do np.dtype check instead of is_datetime64_dtype
287 # otherwise pyright complains
288 and isinstance(dtype, np.dtype)
289 and dtype.kind == "M"
290 and not is_unitless(dtype)
291 and not is_dtype_equal(dtype, values.dtype)
292 ):
293 # unit conversion, we would re-cast to nanosecond, so this is
294 # effectively just a copy (regardless of copy kwd)
295 # TODO(2.0): remove special-case
296 return values.copy()
298 try:
299 new_values = astype_array(values, dtype, copy=copy)
300 except (ValueError, TypeError):
301 # e.g. astype_nansafe can fail on object-dtype of strings
302 # trying to convert to float
303 if errors == "ignore":
304 new_values = values
305 else:
306 raise
308 return new_values
311def astype_td64_unit_conversion(
312 values: np.ndarray, dtype: np.dtype, copy: bool
313) -> np.ndarray:
314 """
315 By pandas convention, converting to non-nano timedelta64
316 returns an int64-dtyped array with ints representing multiples
317 of the desired timedelta unit. This is essentially division.
319 Parameters
320 ----------
321 values : np.ndarray[timedelta64[ns]]
322 dtype : np.dtype
323 timedelta64 with unit not-necessarily nano
324 copy : bool
326 Returns
327 -------
328 np.ndarray
329 """
330 if is_dtype_equal(values.dtype, dtype):
331 if copy:
332 return values.copy()
333 return values
335 # otherwise we are converting to non-nano
336 result = values.astype(dtype, copy=False) # avoid double-copying
337 result = result.astype(np.float64)
339 mask = isna(values)
340 np.putmask(result, mask, np.nan)
341 return result
344def astype_dt64_to_dt64tz(
345 values: ArrayLike, dtype: DtypeObj, copy: bool, via_utc: bool = False
346) -> DatetimeArray:
347 # GH#33401 we have inconsistent behaviors between
348 # Datetimeindex[naive].astype(tzaware)
349 # Series[dt64].astype(tzaware)
350 # This collects them in one place to prevent further fragmentation.
352 from pandas.core.construction import ensure_wrapped_if_datetimelike
354 values = ensure_wrapped_if_datetimelike(values)
355 values = cast("DatetimeArray", values)
356 aware = isinstance(dtype, DatetimeTZDtype)
358 if via_utc:
359 # Series.astype behavior
361 # caller is responsible for checking this
362 assert values.tz is None and aware
363 dtype = cast(DatetimeTZDtype, dtype)
365 if copy:
366 # this should be the only copy
367 values = values.copy()
369 warnings.warn(
370 "Using .astype to convert from timezone-naive dtype to "
371 "timezone-aware dtype is deprecated and will raise in a "
372 "future version. Use ser.dt.tz_localize instead.",
373 FutureWarning,
374 stacklevel=find_stack_level(),
375 )
377 # GH#33401 this doesn't match DatetimeArray.astype, which
378 # goes through the `not via_utc` path
379 return values.tz_localize("UTC").tz_convert(dtype.tz)
381 else:
382 # DatetimeArray/DatetimeIndex.astype behavior
383 if values.tz is None and aware:
384 dtype = cast(DatetimeTZDtype, dtype)
385 warnings.warn(
386 "Using .astype to convert from timezone-naive dtype to "
387 "timezone-aware dtype is deprecated and will raise in a "
388 "future version. Use obj.tz_localize instead.",
389 FutureWarning,
390 stacklevel=find_stack_level(),
391 )
393 return values.tz_localize(dtype.tz)
395 elif aware:
396 # GH#18951: datetime64_tz dtype but not equal means different tz
397 dtype = cast(DatetimeTZDtype, dtype)
398 result = values.tz_convert(dtype.tz)
399 if copy:
400 result = result.copy()
401 return result
403 elif values.tz is not None:
404 warnings.warn(
405 "Using .astype to convert from timezone-aware dtype to "
406 "timezone-naive dtype is deprecated and will raise in a "
407 "future version. Use obj.tz_localize(None) or "
408 "obj.tz_convert('UTC').tz_localize(None) instead",
409 FutureWarning,
410 stacklevel=find_stack_level(),
411 )
413 result = values.tz_convert("UTC").tz_localize(None)
414 if copy:
415 result = result.copy()
416 return result
418 raise NotImplementedError("dtype_equal case should be handled elsewhere")