Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/construction.py: 9%
253 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Constructor functions intended to be shared by pd.array, Series.__init__,
3and Index.__new__.
5These should not depend on core.internals.
6"""
7from __future__ import annotations
9from typing import (
10 TYPE_CHECKING,
11 Any,
12 Optional,
13 Sequence,
14 Union,
15 cast,
16 overload,
17)
18import warnings
20import numpy as np
21import numpy.ma as ma
23from pandas._libs import lib
24from pandas._libs.tslibs.period import Period
25from pandas._typing import (
26 AnyArrayLike,
27 ArrayLike,
28 Dtype,
29 DtypeObj,
30 T,
31)
32from pandas.errors import IntCastingNaNError
33from pandas.util._exceptions import find_stack_level
35from pandas.core.dtypes.base import (
36 ExtensionDtype,
37 _registry as registry,
38)
39from pandas.core.dtypes.cast import (
40 construct_1d_arraylike_from_scalar,
41 construct_1d_object_array_from_listlike,
42 maybe_cast_to_datetime,
43 maybe_cast_to_integer_array,
44 maybe_convert_platform,
45 maybe_infer_to_datetimelike,
46 maybe_upcast,
47 sanitize_to_nanoseconds,
48)
49from pandas.core.dtypes.common import (
50 is_datetime64_ns_dtype,
51 is_extension_array_dtype,
52 is_float_dtype,
53 is_integer_dtype,
54 is_list_like,
55 is_object_dtype,
56 is_timedelta64_ns_dtype,
57)
58from pandas.core.dtypes.dtypes import (
59 DatetimeTZDtype,
60 PandasDtype,
61)
62from pandas.core.dtypes.generic import (
63 ABCExtensionArray,
64 ABCIndex,
65 ABCPandasArray,
66 ABCRangeIndex,
67 ABCSeries,
68)
69from pandas.core.dtypes.missing import isna
71import pandas.core.common as com
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from pandas import (
75 ExtensionArray,
76 Index,
77 Series,
78 )
81def array(
82 data: Sequence[object] | AnyArrayLike,
83 dtype: Dtype | None = None,
84 copy: bool = True,
85) -> ExtensionArray:
86 """
87 Create an array.
89 Parameters
90 ----------
91 data : Sequence of objects
92 The scalars inside `data` should be instances of the
93 scalar type for `dtype`. It's expected that `data`
94 represents a 1-dimensional array of data.
96 When `data` is an Index or Series, the underlying array
97 will be extracted from `data`.
99 dtype : str, np.dtype, or ExtensionDtype, optional
100 The dtype to use for the array. This may be a NumPy
101 dtype or an extension type registered with pandas using
102 :meth:`pandas.api.extensions.register_extension_dtype`.
104 If not specified, there are two possibilities:
106 1. When `data` is a :class:`Series`, :class:`Index`, or
107 :class:`ExtensionArray`, the `dtype` will be taken
108 from the data.
109 2. Otherwise, pandas will attempt to infer the `dtype`
110 from the data.
112 Note that when `data` is a NumPy array, ``data.dtype`` is
113 *not* used for inferring the array type. This is because
114 NumPy cannot represent all the types of data that can be
115 held in extension arrays.
117 Currently, pandas will infer an extension dtype for sequences of
119 ============================== =======================================
120 Scalar Type Array Type
121 ============================== =======================================
122 :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray`
123 :class:`pandas.Period` :class:`pandas.arrays.PeriodArray`
124 :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray`
125 :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray`
126 :class:`int` :class:`pandas.arrays.IntegerArray`
127 :class:`float` :class:`pandas.arrays.FloatingArray`
128 :class:`str` :class:`pandas.arrays.StringArray` or
129 :class:`pandas.arrays.ArrowStringArray`
130 :class:`bool` :class:`pandas.arrays.BooleanArray`
131 ============================== =======================================
133 The ExtensionArray created when the scalar type is :class:`str` is determined by
134 ``pd.options.mode.string_storage`` if the dtype is not explicitly given.
136 For all other cases, NumPy's usual inference rules will be used.
138 .. versionchanged:: 1.0.0
140 Pandas infers nullable-integer dtype for integer data,
141 string dtype for string data, and nullable-boolean dtype
142 for boolean data.
144 .. versionchanged:: 1.2.0
146 Pandas now also infers nullable-floating dtype for float-like
147 input data
149 copy : bool, default True
150 Whether to copy the data, even if not necessary. Depending
151 on the type of `data`, creating the new array may require
152 copying data, even if ``copy=False``.
154 Returns
155 -------
156 ExtensionArray
157 The newly created array.
159 Raises
160 ------
161 ValueError
162 When `data` is not 1-dimensional.
164 See Also
165 --------
166 numpy.array : Construct a NumPy array.
167 Series : Construct a pandas Series.
168 Index : Construct a pandas Index.
169 arrays.PandasArray : ExtensionArray wrapping a NumPy array.
170 Series.array : Extract the array stored within a Series.
172 Notes
173 -----
174 Omitting the `dtype` argument means pandas will attempt to infer the
175 best array type from the values in the data. As new array types are
176 added by pandas and 3rd party libraries, the "best" array type may
177 change. We recommend specifying `dtype` to ensure that
179 1. the correct array type for the data is returned
180 2. the returned array type doesn't change as new extension types
181 are added by pandas and third-party libraries
183 Additionally, if the underlying memory representation of the returned
184 array matters, we recommend specifying the `dtype` as a concrete object
185 rather than a string alias or allowing it to be inferred. For example,
186 a future version of pandas or a 3rd-party library may include a
187 dedicated ExtensionArray for string data. In this event, the following
188 would no longer return a :class:`arrays.PandasArray` backed by a NumPy
189 array.
191 >>> pd.array(['a', 'b'], dtype=str)
192 <PandasArray>
193 ['a', 'b']
194 Length: 2, dtype: str32
196 This would instead return the new ExtensionArray dedicated for string
197 data. If you really need the new array to be backed by a NumPy array,
198 specify that in the dtype.
200 >>> pd.array(['a', 'b'], dtype=np.dtype("<U1"))
201 <PandasArray>
202 ['a', 'b']
203 Length: 2, dtype: str32
205 Finally, Pandas has arrays that mostly overlap with NumPy
207 * :class:`arrays.DatetimeArray`
208 * :class:`arrays.TimedeltaArray`
210 When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is
211 passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray``
212 rather than a ``PandasArray``. This is for symmetry with the case of
213 timezone-aware data, which NumPy does not natively support.
215 >>> pd.array(['2015', '2016'], dtype='datetime64[ns]')
216 <DatetimeArray>
217 ['2015-01-01 00:00:00', '2016-01-01 00:00:00']
218 Length: 2, dtype: datetime64[ns]
220 >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]')
221 <TimedeltaArray>
222 ['0 days 01:00:00', '0 days 02:00:00']
223 Length: 2, dtype: timedelta64[ns]
225 Examples
226 --------
227 If a dtype is not specified, pandas will infer the best dtype from the values.
228 See the description of `dtype` for the types pandas infers for.
230 >>> pd.array([1, 2])
231 <IntegerArray>
232 [1, 2]
233 Length: 2, dtype: Int64
235 >>> pd.array([1, 2, np.nan])
236 <IntegerArray>
237 [1, 2, <NA>]
238 Length: 3, dtype: Int64
240 >>> pd.array([1.1, 2.2])
241 <FloatingArray>
242 [1.1, 2.2]
243 Length: 2, dtype: Float64
245 >>> pd.array(["a", None, "c"])
246 <StringArray>
247 ['a', <NA>, 'c']
248 Length: 3, dtype: string
250 >>> with pd.option_context("string_storage", "pyarrow"):
251 ... arr = pd.array(["a", None, "c"])
252 ...
253 >>> arr
254 <ArrowStringArray>
255 ['a', <NA>, 'c']
256 Length: 3, dtype: string
258 >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
259 <PeriodArray>
260 ['2000-01-01', '2000-01-01']
261 Length: 2, dtype: period[D]
263 You can use the string alias for `dtype`
265 >>> pd.array(['a', 'b', 'a'], dtype='category')
266 ['a', 'b', 'a']
267 Categories (2, object): ['a', 'b']
269 Or specify the actual dtype
271 >>> pd.array(['a', 'b', 'a'],
272 ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True))
273 ['a', 'b', 'a']
274 Categories (3, object): ['a' < 'b' < 'c']
276 If pandas does not infer a dedicated extension type a
277 :class:`arrays.PandasArray` is returned.
279 >>> pd.array([1 + 1j, 3 + 2j])
280 <PandasArray>
281 [(1+1j), (3+2j)]
282 Length: 2, dtype: complex128
284 As mentioned in the "Notes" section, new extension types may be added
285 in the future (by pandas or 3rd party libraries), causing the return
286 value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype`
287 as a NumPy dtype if you need to ensure there's no future change in
288 behavior.
290 >>> pd.array([1, 2], dtype=np.dtype("int32"))
291 <PandasArray>
292 [1, 2]
293 Length: 2, dtype: int32
295 `data` must be 1-dimensional. A ValueError is raised when the input
296 has the wrong dimensionality.
298 >>> pd.array(1)
299 Traceback (most recent call last):
300 ...
301 ValueError: Cannot pass scalar '1' to 'pandas.array'.
302 """
303 from pandas.core.arrays import (
304 BooleanArray,
305 DatetimeArray,
306 ExtensionArray,
307 FloatingArray,
308 IntegerArray,
309 IntervalArray,
310 PandasArray,
311 PeriodArray,
312 TimedeltaArray,
313 )
314 from pandas.core.arrays.string_ import StringDtype
316 if lib.is_scalar(data):
317 msg = f"Cannot pass scalar '{data}' to 'pandas.array'."
318 raise ValueError(msg)
320 if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)):
321 # Note: we exclude np.ndarray here, will do type inference on it
322 dtype = data.dtype
324 data = extract_array(data, extract_numpy=True)
326 # this returns None for not-found dtypes.
327 if isinstance(dtype, str):
328 dtype = registry.find(dtype) or dtype
330 if is_extension_array_dtype(dtype):
331 cls = cast(ExtensionDtype, dtype).construct_array_type()
332 return cls._from_sequence(data, dtype=dtype, copy=copy)
334 if dtype is None:
335 inferred_dtype = lib.infer_dtype(data, skipna=True)
336 if inferred_dtype == "period":
337 period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data)
338 return PeriodArray._from_sequence(period_data, copy=copy)
340 elif inferred_dtype == "interval":
341 return IntervalArray(data, copy=copy)
343 elif inferred_dtype.startswith("datetime"):
344 # datetime, datetime64
345 try:
346 return DatetimeArray._from_sequence(data, copy=copy)
347 except ValueError:
348 # Mixture of timezones, fall back to PandasArray
349 pass
351 elif inferred_dtype.startswith("timedelta"):
352 # timedelta, timedelta64
353 return TimedeltaArray._from_sequence(data, copy=copy)
355 elif inferred_dtype == "string":
356 # StringArray/ArrowStringArray depending on pd.options.mode.string_storage
357 return StringDtype().construct_array_type()._from_sequence(data, copy=copy)
359 elif inferred_dtype == "integer":
360 return IntegerArray._from_sequence(data, copy=copy)
362 elif (
363 inferred_dtype in ("floating", "mixed-integer-float")
364 and getattr(data, "dtype", None) != np.float16
365 ):
366 # GH#44715 Exclude np.float16 bc FloatingArray does not support it;
367 # we will fall back to PandasArray.
368 return FloatingArray._from_sequence(data, copy=copy)
370 elif inferred_dtype == "boolean":
371 return BooleanArray._from_sequence(data, copy=copy)
373 # Pandas overrides NumPy for
374 # 1. datetime64[ns]
375 # 2. timedelta64[ns]
376 # so that a DatetimeArray is returned.
377 if is_datetime64_ns_dtype(dtype):
378 return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)
379 elif is_timedelta64_ns_dtype(dtype):
380 return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)
382 return PandasArray._from_sequence(data, dtype=dtype, copy=copy)
385@overload
386def extract_array(
387 obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ...
388) -> ArrayLike:
389 ...
392@overload
393def extract_array(
394 obj: T, extract_numpy: bool = ..., extract_range: bool = ...
395) -> T | ArrayLike:
396 ...
399def extract_array(
400 obj: T, extract_numpy: bool = False, extract_range: bool = False
401) -> T | ArrayLike:
402 """
403 Extract the ndarray or ExtensionArray from a Series or Index.
405 For all other types, `obj` is just returned as is.
407 Parameters
408 ----------
409 obj : object
410 For Series / Index, the underlying ExtensionArray is unboxed.
412 extract_numpy : bool, default False
413 Whether to extract the ndarray from a PandasArray.
415 extract_range : bool, default False
416 If we have a RangeIndex, return range._values if True
417 (which is a materialized integer ndarray), otherwise return unchanged.
419 Returns
420 -------
421 arr : object
423 Examples
424 --------
425 >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category'))
426 ['a', 'b', 'c']
427 Categories (3, object): ['a', 'b', 'c']
429 Other objects like lists, arrays, and DataFrames are just passed through.
431 >>> extract_array([1, 2, 3])
432 [1, 2, 3]
434 For an ndarray-backed Series / Index the ndarray is returned.
436 >>> extract_array(pd.Series([1, 2, 3]))
437 array([1, 2, 3])
439 To extract all the way down to the ndarray, pass ``extract_numpy=True``.
441 >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True)
442 array([1, 2, 3])
443 """
444 if isinstance(obj, (ABCIndex, ABCSeries)):
445 if isinstance(obj, ABCRangeIndex):
446 if extract_range:
447 return obj._values
448 # https://github.com/python/mypy/issues/1081
449 # error: Incompatible return value type (got "RangeIndex", expected
450 # "Union[T, Union[ExtensionArray, ndarray[Any, Any]]]")
451 return obj # type: ignore[return-value]
453 return obj._values
455 elif extract_numpy and isinstance(obj, ABCPandasArray):
456 return obj.to_numpy()
458 return obj
461def ensure_wrapped_if_datetimelike(arr):
462 """
463 Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.
464 """
465 if isinstance(arr, np.ndarray):
466 if arr.dtype.kind == "M":
467 from pandas.core.arrays import DatetimeArray
469 return DatetimeArray._from_sequence(arr)
471 elif arr.dtype.kind == "m":
472 from pandas.core.arrays import TimedeltaArray
474 return TimedeltaArray._from_sequence(arr)
476 return arr
479def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:
480 """
481 Convert numpy MaskedArray to ensure mask is softened.
482 """
483 mask = ma.getmaskarray(data)
484 if mask.any():
485 data, fill_value = maybe_upcast(data, copy=True)
486 data.soften_mask() # set hardmask False if it was True
487 data[mask] = fill_value
488 else:
489 data = data.copy()
490 return data
493def sanitize_array(
494 data,
495 index: Index | None,
496 dtype: DtypeObj | None = None,
497 copy: bool = False,
498 raise_cast_failure: bool = True,
499 *,
500 allow_2d: bool = False,
501) -> ArrayLike:
502 """
503 Sanitize input data to an ndarray or ExtensionArray, copy if specified,
504 coerce to the dtype if specified.
506 Parameters
507 ----------
508 data : Any
509 index : Index or None, default None
510 dtype : np.dtype, ExtensionDtype, or None, default None
511 copy : bool, default False
512 raise_cast_failure : bool, default True
513 allow_2d : bool, default False
514 If False, raise if we have a 2D Arraylike.
516 Returns
517 -------
518 np.ndarray or ExtensionArray
520 Notes
521 -----
522 raise_cast_failure=False is only intended to be True when called from the
523 DataFrame constructor, as the dtype keyword there may be interpreted as only
524 applying to a subset of columns, see GH#24435.
525 """
526 if isinstance(data, ma.MaskedArray):
527 data = sanitize_masked_array(data)
529 if isinstance(dtype, PandasDtype):
530 # Avoid ending up with a PandasArray
531 dtype = dtype.numpy_dtype
533 # extract ndarray or ExtensionArray, ensure we have no PandasArray
534 data = extract_array(data, extract_numpy=True, extract_range=True)
536 if isinstance(data, np.ndarray) and data.ndim == 0:
537 if dtype is None:
538 dtype = data.dtype
539 data = lib.item_from_zerodim(data)
540 elif isinstance(data, range):
541 # GH#16804
542 data = range_to_ndarray(data)
543 copy = False
545 if not is_list_like(data):
546 if index is None:
547 raise ValueError("index must be specified when data is not list-like")
548 data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
549 return data
551 # GH#846
552 if isinstance(data, np.ndarray):
553 if isinstance(data, np.matrix):
554 data = data.A
556 if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype):
557 # possibility of nan -> garbage
558 try:
559 # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int
560 # casting aligning with IntCastingNaNError below
561 with np.errstate(invalid="ignore"):
562 subarr = _try_cast(data, dtype, copy, True)
563 except IntCastingNaNError:
564 warnings.warn(
565 "In a future version, passing float-dtype values containing NaN "
566 "and an integer dtype will raise IntCastingNaNError "
567 "(subclass of ValueError) instead of silently ignoring the "
568 "passed dtype. To retain the old behavior, call Series(arr) or "
569 "DataFrame(arr) without passing a dtype.",
570 FutureWarning,
571 stacklevel=find_stack_level(),
572 )
573 subarr = np.array(data, copy=copy)
574 except ValueError:
575 if not raise_cast_failure:
576 # i.e. called via DataFrame constructor
577 warnings.warn(
578 "In a future version, passing float-dtype values and an "
579 "integer dtype to DataFrame will retain floating dtype "
580 "if they cannot be cast losslessly (matching Series behavior). "
581 "To retain the old behavior, use DataFrame(data).astype(dtype)",
582 FutureWarning,
583 stacklevel=find_stack_level(),
584 )
585 # GH#40110 until the deprecation is enforced, we _dont_
586 # ignore the dtype for DataFrame, and _do_ cast even though
587 # it is lossy.
588 dtype = cast(np.dtype, dtype)
589 return np.array(data, dtype=dtype, copy=copy)
591 # We ignore the dtype arg and return floating values,
592 # e.g. test_constructor_floating_data_int_dtype
593 # TODO: where is the discussion that documents the reason for this?
594 subarr = np.array(data, copy=copy)
595 else:
596 # we will try to copy by-definition here
597 subarr = _try_cast(data, dtype, copy, raise_cast_failure)
599 elif isinstance(data, ABCExtensionArray):
600 # it is already ensured above this is not a PandasArray
601 subarr = data
603 if dtype is not None:
604 subarr = subarr.astype(dtype, copy=copy)
605 elif copy:
606 subarr = subarr.copy()
608 else:
609 if isinstance(data, (set, frozenset)):
610 # Raise only for unordered sets, e.g., not for dict_keys
611 raise TypeError(f"'{type(data).__name__}' type is unordered")
613 # materialize e.g. generators, convert e.g. tuples, abc.ValueView
614 if hasattr(data, "__array__"):
615 # e.g. dask array GH#38645
616 data = np.array(data, copy=copy)
617 else:
618 data = list(data)
620 if dtype is not None or len(data) == 0:
621 try:
622 subarr = _try_cast(data, dtype, copy, raise_cast_failure)
623 except ValueError:
624 if is_integer_dtype(dtype):
625 casted = np.array(data, copy=False)
626 if casted.dtype.kind == "f":
627 # GH#40110 match the behavior we have if we passed
628 # a ndarray[float] to begin with
629 return sanitize_array(
630 casted,
631 index,
632 dtype,
633 copy=False,
634 raise_cast_failure=raise_cast_failure,
635 allow_2d=allow_2d,
636 )
637 else:
638 raise
639 else:
640 raise
641 else:
642 subarr = maybe_convert_platform(data)
643 if subarr.dtype == object:
644 subarr = cast(np.ndarray, subarr)
645 subarr = maybe_infer_to_datetimelike(subarr)
647 subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)
649 if isinstance(subarr, np.ndarray):
650 # at this point we should have dtype be None or subarr.dtype == dtype
651 dtype = cast(np.dtype, dtype)
652 subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)
654 return subarr
657def range_to_ndarray(rng: range) -> np.ndarray:
658 """
659 Cast a range object to ndarray.
660 """
661 # GH#30171 perf avoid realizing range as a list in np.array
662 try:
663 arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64")
664 except OverflowError:
665 # GH#30173 handling for ranges that overflow int64
666 if (rng.start >= 0 and rng.step > 0) or (rng.stop >= 0 and rng.step < 0):
667 try:
668 arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64")
669 except OverflowError:
670 arr = construct_1d_object_array_from_listlike(list(rng))
671 else:
672 arr = construct_1d_object_array_from_listlike(list(rng))
673 return arr
676def _sanitize_ndim(
677 result: ArrayLike,
678 data,
679 dtype: DtypeObj | None,
680 index: Index | None,
681 *,
682 allow_2d: bool = False,
683) -> ArrayLike:
684 """
685 Ensure we have a 1-dimensional result array.
686 """
687 if getattr(result, "ndim", 0) == 0:
688 raise ValueError("result should be arraylike with ndim > 0")
690 elif result.ndim == 1:
691 # the result that we want
692 result = _maybe_repeat(result, index)
694 elif result.ndim > 1:
695 if isinstance(data, np.ndarray):
696 if allow_2d:
697 return result
698 raise ValueError("Data must be 1-dimensional")
699 if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype):
700 # i.e. PandasDtype("O")
702 result = com.asarray_tuplesafe(data, dtype=np.dtype("object"))
703 cls = dtype.construct_array_type()
704 result = cls._from_sequence(result, dtype=dtype)
705 else:
706 # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type
707 # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str,
708 # dtype[Any], None]"
709 result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type]
710 return result
713def _sanitize_str_dtypes(
714 result: np.ndarray, data, dtype: np.dtype | None, copy: bool
715) -> np.ndarray:
716 """
717 Ensure we have a dtype that is supported by pandas.
718 """
720 # This is to prevent mixed-type Series getting all casted to
721 # NumPy string type, e.g. NaN --> '-1#IND'.
722 if issubclass(result.dtype.type, str):
723 # GH#16605
724 # If not empty convert the data to dtype
725 # GH#19853: If data is a scalar, result has already the result
726 if not lib.is_scalar(data):
727 if not np.all(isna(data)):
728 data = np.array(data, dtype=dtype, copy=False)
729 result = np.array(data, dtype=object, copy=copy)
730 return result
733def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:
734 """
735 If we have a length-1 array and an index describing how long we expect
736 the result to be, repeat the array.
737 """
738 if index is not None:
739 if 1 == len(arr) != len(index):
740 arr = arr.repeat(len(index))
741 return arr
744def _try_cast(
745 arr: list | np.ndarray,
746 dtype: DtypeObj | None,
747 copy: bool,
748 raise_cast_failure: bool,
749) -> ArrayLike:
750 """
751 Convert input to numpy ndarray and optionally cast to a given dtype.
753 Parameters
754 ----------
755 arr : ndarray or list
756 Excludes: ExtensionArray, Series, Index.
757 dtype : np.dtype, ExtensionDtype or None
758 copy : bool
759 If False, don't copy the data if not needed.
760 raise_cast_failure : bool
761 If True, and if a dtype is specified, raise errors during casting.
762 Otherwise an object array is returned.
764 Returns
765 -------
766 np.ndarray or ExtensionArray
767 """
768 is_ndarray = isinstance(arr, np.ndarray)
770 if dtype is None:
771 # perf shortcut as this is the most common case
772 if is_ndarray:
773 arr = cast(np.ndarray, arr)
774 if arr.dtype != object:
775 return sanitize_to_nanoseconds(arr, copy=copy)
777 out = maybe_infer_to_datetimelike(arr)
778 if out is arr and copy:
779 out = out.copy()
780 return out
782 else:
783 # i.e. list
784 varr = np.array(arr, copy=False)
785 # filter out cases that we _dont_ want to go through
786 # maybe_infer_to_datetimelike
787 if varr.dtype != object or varr.size == 0:
788 return varr
789 return maybe_infer_to_datetimelike(varr)
791 elif isinstance(dtype, ExtensionDtype):
792 # create an extension array from its dtype
793 if isinstance(dtype, DatetimeTZDtype):
794 # We can't go through _from_sequence because it handles dt64naive
795 # data differently; _from_sequence treats naive as wall times,
796 # while maybe_cast_to_datetime treats it as UTC
797 # see test_maybe_promote_any_numpy_dtype_with_datetimetz
798 # TODO(2.0): with deprecations enforced, should be able to remove
799 # special case.
800 return maybe_cast_to_datetime(arr, dtype)
801 # TODO: copy?
803 array_type = dtype.construct_array_type()._from_sequence
804 subarr = array_type(arr, dtype=dtype, copy=copy)
805 return subarr
807 elif is_object_dtype(dtype):
808 if not is_ndarray:
809 subarr = construct_1d_object_array_from_listlike(arr)
810 return subarr
811 return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)
813 elif dtype.kind == "U":
814 # TODO: test cases with arr.dtype.kind in ["m", "M"]
815 if is_ndarray:
816 arr = cast(np.ndarray, arr)
817 shape = arr.shape
818 if arr.ndim > 1:
819 arr = arr.ravel()
820 else:
821 shape = (len(arr),)
822 return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape(
823 shape
824 )
826 elif dtype.kind in ["m", "M"]:
827 return maybe_cast_to_datetime(arr, dtype)
829 try:
830 # GH#15832: Check if we are requesting a numeric dtype and
831 # that we can convert the data to the requested dtype.
832 if is_integer_dtype(dtype):
833 # this will raise if we have e.g. floats
835 subarr = maybe_cast_to_integer_array(arr, dtype)
836 else:
837 # 4 tests fail if we move this to a try/except/else; see
838 # test_constructor_compound_dtypes, test_constructor_cast_failure
839 # test_constructor_dict_cast2, test_loc_setitem_dtype
840 subarr = np.array(arr, dtype=dtype, copy=copy)
842 except (ValueError, TypeError):
843 if raise_cast_failure:
844 raise
845 else:
846 # we only get here with raise_cast_failure False, which means
847 # called via the DataFrame constructor
848 # GH#24435
849 warnings.warn(
850 f"Could not cast to {dtype}, falling back to object. This "
851 "behavior is deprecated. In a future version, when a dtype is "
852 "passed to 'DataFrame', either all columns will be cast to that "
853 "dtype, or a TypeError will be raised.",
854 FutureWarning,
855 stacklevel=find_stack_level(),
856 )
857 subarr = np.array(arr, dtype=object, copy=copy)
858 return subarr
861def is_empty_data(data: Any) -> bool:
862 """
863 Utility to check if a Series is instantiated with empty data,
864 which does not contain dtype information.
866 Parameters
867 ----------
868 data : array-like, Iterable, dict, or scalar value
869 Contains data stored in Series.
871 Returns
872 -------
873 bool
874 """
875 is_none = data is None
876 is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype")
877 is_simple_empty = is_list_like_without_dtype and not data
878 return is_none or is_simple_empty
881def create_series_with_explicit_dtype(
882 data: Any = None,
883 index: ArrayLike | Index | None = None,
884 dtype: Dtype | None = None,
885 name: str | None = None,
886 copy: bool = False,
887 fastpath: bool = False,
888 dtype_if_empty: Dtype = object,
889) -> Series:
890 """
891 Helper to pass an explicit dtype when instantiating an empty Series.
893 This silences a DeprecationWarning described in GitHub-17261.
895 Parameters
896 ----------
897 data : Mirrored from Series.__init__
898 index : Mirrored from Series.__init__
899 dtype : Mirrored from Series.__init__
900 name : Mirrored from Series.__init__
901 copy : Mirrored from Series.__init__
902 fastpath : Mirrored from Series.__init__
903 dtype_if_empty : str, numpy.dtype, or ExtensionDtype
904 This dtype will be passed explicitly if an empty Series will
905 be instantiated.
907 Returns
908 -------
909 Series
910 """
911 from pandas.core.series import Series
913 if is_empty_data(data) and dtype is None:
914 dtype = dtype_if_empty
915 return Series(
916 data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath
917 )