Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/construction.py: 9%

1"""

2Constructor functions intended to be shared by pd.array, Series.__init__,

3and Index.__new__.

5These should not depend on core.internals.

6"""

7from __future__ import annotations

9from typing import (

10 TYPE_CHECKING,

11 Any,

12 Optional,

13 Sequence,

14 Union,

15 cast,

16 overload,

17)

18import warnings

20import numpy as np

21import numpy.ma as ma

23from pandas._libs import lib

24from pandas._libs.tslibs.period import Period

25from pandas._typing import (

26 AnyArrayLike,

27 ArrayLike,

28 Dtype,

29 DtypeObj,

30 T,

31)

32from pandas.errors import IntCastingNaNError

33from pandas.util._exceptions import find_stack_level

35from pandas.core.dtypes.base import (

36 ExtensionDtype,

37 _registry as registry,

38)

39from pandas.core.dtypes.cast import (

40 construct_1d_arraylike_from_scalar,

41 construct_1d_object_array_from_listlike,

42 maybe_cast_to_datetime,

43 maybe_cast_to_integer_array,

44 maybe_convert_platform,

45 maybe_infer_to_datetimelike,

46 maybe_upcast,

47 sanitize_to_nanoseconds,

48)

49from pandas.core.dtypes.common import (

50 is_datetime64_ns_dtype,

51 is_extension_array_dtype,

52 is_float_dtype,

53 is_integer_dtype,

54 is_list_like,

55 is_object_dtype,

56 is_timedelta64_ns_dtype,

57)

58from pandas.core.dtypes.dtypes import (

59 DatetimeTZDtype,

60 PandasDtype,

61)

62from pandas.core.dtypes.generic import (

63 ABCExtensionArray,

64 ABCIndex,

65 ABCPandasArray,

66 ABCRangeIndex,

67 ABCSeries,

68)

69from pandas.core.dtypes.missing import isna

71import pandas.core.common as com

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from pandas import (

75 ExtensionArray,

76 Index,

77 Series,

78 )

81def array(

82 data: Sequence[object] | AnyArrayLike,

83 dtype: Dtype | None = None,

84 copy: bool = True,

85) -> ExtensionArray:

86 """

87 Create an array.

89 Parameters

90 ----------

91 data : Sequence of objects

92 The scalars inside `data` should be instances of the

93 scalar type for `dtype`. It's expected that `data`

94 represents a 1-dimensional array of data.

96 When `data` is an Index or Series, the underlying array

97 will be extracted from `data`.

99 dtype : str, np.dtype, or ExtensionDtype, optional

100 The dtype to use for the array. This may be a NumPy

101 dtype or an extension type registered with pandas using

102 :meth:`pandas.api.extensions.register_extension_dtype`.

103

104 If not specified, there are two possibilities:

105

106 1. When `data` is a :class:`Series`, :class:`Index`, or

107 :class:`ExtensionArray`, the `dtype` will be taken

108 from the data.

109 2. Otherwise, pandas will attempt to infer the `dtype`

110 from the data.

111

112 Note that when `data` is a NumPy array, ``data.dtype`` is

113 *not* used for inferring the array type. This is because

114 NumPy cannot represent all the types of data that can be

115 held in extension arrays.

116

117 Currently, pandas will infer an extension dtype for sequences of

118

119 ============================== =======================================

120 Scalar Type Array Type

121 ============================== =======================================

122 :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray`

123 :class:`pandas.Period` :class:`pandas.arrays.PeriodArray`

124 :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray`

125 :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray`

126 :class:`int` :class:`pandas.arrays.IntegerArray`

127 :class:`float` :class:`pandas.arrays.FloatingArray`

128 :class:`str` :class:`pandas.arrays.StringArray` or

129 :class:`pandas.arrays.ArrowStringArray`

130 :class:`bool` :class:`pandas.arrays.BooleanArray`

131 ============================== =======================================

132

133 The ExtensionArray created when the scalar type is :class:`str` is determined by

134 ``pd.options.mode.string_storage`` if the dtype is not explicitly given.

135

136 For all other cases, NumPy's usual inference rules will be used.

137

138 .. versionchanged:: 1.0.0

139

140 Pandas infers nullable-integer dtype for integer data,

141 string dtype for string data, and nullable-boolean dtype

142 for boolean data.

143

144 .. versionchanged:: 1.2.0

145

146 Pandas now also infers nullable-floating dtype for float-like

147 input data

148

149 copy : bool, default True

150 Whether to copy the data, even if not necessary. Depending

151 on the type of `data`, creating the new array may require

152 copying data, even if ``copy=False``.

153

154 Returns

155 -------

156 ExtensionArray

157 The newly created array.

158

159 Raises

160 ------

161 ValueError

162 When `data` is not 1-dimensional.

163

164 See Also

165 --------

166 numpy.array : Construct a NumPy array.

167 Series : Construct a pandas Series.

168 Index : Construct a pandas Index.

169 arrays.PandasArray : ExtensionArray wrapping a NumPy array.

170 Series.array : Extract the array stored within a Series.

171

172 Notes

173 -----

174 Omitting the `dtype` argument means pandas will attempt to infer the

175 best array type from the values in the data. As new array types are

176 added by pandas and 3rd party libraries, the "best" array type may

177 change. We recommend specifying `dtype` to ensure that

178

179 1. the correct array type for the data is returned

180 2. the returned array type doesn't change as new extension types

181 are added by pandas and third-party libraries

182

183 Additionally, if the underlying memory representation of the returned

184 array matters, we recommend specifying the `dtype` as a concrete object

185 rather than a string alias or allowing it to be inferred. For example,

186 a future version of pandas or a 3rd-party library may include a

187 dedicated ExtensionArray for string data. In this event, the following

188 would no longer return a :class:`arrays.PandasArray` backed by a NumPy

189 array.

190

191 >>> pd.array(['a', 'b'], dtype=str)

192 <PandasArray>

193 ['a', 'b']

194 Length: 2, dtype: str32

195

196 This would instead return the new ExtensionArray dedicated for string

197 data. If you really need the new array to be backed by a NumPy array,

198 specify that in the dtype.

199

200 >>> pd.array(['a', 'b'], dtype=np.dtype("<U1"))

201 <PandasArray>

202 ['a', 'b']

203 Length: 2, dtype: str32

204

205 Finally, Pandas has arrays that mostly overlap with NumPy

206

207 * :class:`arrays.DatetimeArray`

208 * :class:`arrays.TimedeltaArray`

209

210 When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is

211 passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray``

212 rather than a ``PandasArray``. This is for symmetry with the case of

213 timezone-aware data, which NumPy does not natively support.

214

215 >>> pd.array(['2015', '2016'], dtype='datetime64[ns]')

216 <DatetimeArray>

217 ['2015-01-01 00:00:00', '2016-01-01 00:00:00']

218 Length: 2, dtype: datetime64[ns]

219

220 >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]')

221 <TimedeltaArray>

222 ['0 days 01:00:00', '0 days 02:00:00']

223 Length: 2, dtype: timedelta64[ns]

224

225 Examples

226 --------

227 If a dtype is not specified, pandas will infer the best dtype from the values.

228 See the description of `dtype` for the types pandas infers for.

229

230 >>> pd.array([1, 2])

231 <IntegerArray>

232 [1, 2]

233 Length: 2, dtype: Int64

234

235 >>> pd.array([1, 2, np.nan])

236 <IntegerArray>

237 [1, 2, <NA>]

238 Length: 3, dtype: Int64

239

240 >>> pd.array([1.1, 2.2])

241 <FloatingArray>

242 [1.1, 2.2]

243 Length: 2, dtype: Float64

244

245 >>> pd.array(["a", None, "c"])

246 <StringArray>

247 ['a', <NA>, 'c']

248 Length: 3, dtype: string

249

250 >>> with pd.option_context("string_storage", "pyarrow"):

251 ... arr = pd.array(["a", None, "c"])

252 ...

253 >>> arr

254 <ArrowStringArray>

255 ['a', <NA>, 'c']

256 Length: 3, dtype: string

257

258 >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])

259 <PeriodArray>

260 ['2000-01-01', '2000-01-01']

261 Length: 2, dtype: period[D]

262

263 You can use the string alias for `dtype`

264

265 >>> pd.array(['a', 'b', 'a'], dtype='category')

266 ['a', 'b', 'a']

267 Categories (2, object): ['a', 'b']

268

269 Or specify the actual dtype

270

271 >>> pd.array(['a', 'b', 'a'],

272 ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True))

273 ['a', 'b', 'a']

274 Categories (3, object): ['a' < 'b' < 'c']

275

276 If pandas does not infer a dedicated extension type a

277 :class:`arrays.PandasArray` is returned.

278

279 >>> pd.array([1 + 1j, 3 + 2j])

280 <PandasArray>

281 [(1+1j), (3+2j)]

282 Length: 2, dtype: complex128

283

284 As mentioned in the "Notes" section, new extension types may be added

285 in the future (by pandas or 3rd party libraries), causing the return

286 value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype`

287 as a NumPy dtype if you need to ensure there's no future change in

288 behavior.

289

290 >>> pd.array([1, 2], dtype=np.dtype("int32"))

291 <PandasArray>

292 [1, 2]

293 Length: 2, dtype: int32

294

295 `data` must be 1-dimensional. A ValueError is raised when the input

296 has the wrong dimensionality.

297

298 >>> pd.array(1)

299 Traceback (most recent call last):

300 ...

301 ValueError: Cannot pass scalar '1' to 'pandas.array'.

302 """

303 from pandas.core.arrays import (

304 BooleanArray,

305 DatetimeArray,

306 ExtensionArray,

307 FloatingArray,

308 IntegerArray,

309 IntervalArray,

310 PandasArray,

311 PeriodArray,

312 TimedeltaArray,

313 )

314 from pandas.core.arrays.string_ import StringDtype

315

316 if lib.is_scalar(data):

317 msg = f"Cannot pass scalar '{data}' to 'pandas.array'."

318 raise ValueError(msg)

319

320 if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)):

321 # Note: we exclude np.ndarray here, will do type inference on it

322 dtype = data.dtype

323

324 data = extract_array(data, extract_numpy=True)

325

326 # this returns None for not-found dtypes.

327 if isinstance(dtype, str):

328 dtype = registry.find(dtype) or dtype

329

330 if is_extension_array_dtype(dtype):

331 cls = cast(ExtensionDtype, dtype).construct_array_type()

332 return cls._from_sequence(data, dtype=dtype, copy=copy)

333

334 if dtype is None:

335 inferred_dtype = lib.infer_dtype(data, skipna=True)

336 if inferred_dtype == "period":

337 period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data)

338 return PeriodArray._from_sequence(period_data, copy=copy)

339

340 elif inferred_dtype == "interval":

341 return IntervalArray(data, copy=copy)

342

343 elif inferred_dtype.startswith("datetime"):

344 # datetime, datetime64

345 try:

346 return DatetimeArray._from_sequence(data, copy=copy)

347 except ValueError:

348 # Mixture of timezones, fall back to PandasArray

349 pass

350

351 elif inferred_dtype.startswith("timedelta"):

352 # timedelta, timedelta64

353 return TimedeltaArray._from_sequence(data, copy=copy)

354

355 elif inferred_dtype == "string":

356 # StringArray/ArrowStringArray depending on pd.options.mode.string_storage

357 return StringDtype().construct_array_type()._from_sequence(data, copy=copy)

358

359 elif inferred_dtype == "integer":

360 return IntegerArray._from_sequence(data, copy=copy)

361

362 elif (

363 inferred_dtype in ("floating", "mixed-integer-float")

364 and getattr(data, "dtype", None) != np.float16

365 ):

366 # GH#44715 Exclude np.float16 bc FloatingArray does not support it;

367 # we will fall back to PandasArray.

368 return FloatingArray._from_sequence(data, copy=copy)

369

370 elif inferred_dtype == "boolean":

371 return BooleanArray._from_sequence(data, copy=copy)

372

373 # Pandas overrides NumPy for

374 # 1. datetime64[ns]

375 # 2. timedelta64[ns]

376 # so that a DatetimeArray is returned.

377 if is_datetime64_ns_dtype(dtype):

378 return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)

379 elif is_timedelta64_ns_dtype(dtype):

380 return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)

381

382 return PandasArray._from_sequence(data, dtype=dtype, copy=copy)

383

384

385@overload

386def extract_array(

387 obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ...

388) -> ArrayLike:

389 ...

390

391

392@overload

393def extract_array(

394 obj: T, extract_numpy: bool = ..., extract_range: bool = ...

395) -> T | ArrayLike:

396 ...

397

398

399def extract_array(

400 obj: T, extract_numpy: bool = False, extract_range: bool = False

401) -> T | ArrayLike:

402 """

403 Extract the ndarray or ExtensionArray from a Series or Index.

404

405 For all other types, `obj` is just returned as is.

406

407 Parameters

408 ----------

409 obj : object

410 For Series / Index, the underlying ExtensionArray is unboxed.

411

412 extract_numpy : bool, default False

413 Whether to extract the ndarray from a PandasArray.

414

415 extract_range : bool, default False

416 If we have a RangeIndex, return range._values if True

417 (which is a materialized integer ndarray), otherwise return unchanged.

418

419 Returns

420 -------

421 arr : object

422

423 Examples

424 --------

425 >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category'))

426 ['a', 'b', 'c']

427 Categories (3, object): ['a', 'b', 'c']

428

429 Other objects like lists, arrays, and DataFrames are just passed through.

430

431 >>> extract_array([1, 2, 3])

432 [1, 2, 3]

433

434 For an ndarray-backed Series / Index the ndarray is returned.

435

436 >>> extract_array(pd.Series([1, 2, 3]))

437 array([1, 2, 3])

438

439 To extract all the way down to the ndarray, pass ``extract_numpy=True``.

440

441 >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True)

442 array([1, 2, 3])

443 """

444 if isinstance(obj, (ABCIndex, ABCSeries)):

445 if isinstance(obj, ABCRangeIndex):

446 if extract_range:

447 return obj._values

448 # https://github.com/python/mypy/issues/1081

449 # error: Incompatible return value type (got "RangeIndex", expected

450 # "Union[T, Union[ExtensionArray, ndarray[Any, Any]]]")

451 return obj # type: ignore[return-value]

452

453 return obj._values

454

455 elif extract_numpy and isinstance(obj, ABCPandasArray):

456 return obj.to_numpy()

457

458 return obj

459

460

461def ensure_wrapped_if_datetimelike(arr):

462 """

463 Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.

464 """

465 if isinstance(arr, np.ndarray):

466 if arr.dtype.kind == "M":

467 from pandas.core.arrays import DatetimeArray

468

469 return DatetimeArray._from_sequence(arr)

470

471 elif arr.dtype.kind == "m":

472 from pandas.core.arrays import TimedeltaArray

473

474 return TimedeltaArray._from_sequence(arr)

475

476 return arr

477

478

479def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:

480 """

481 Convert numpy MaskedArray to ensure mask is softened.

482 """

483 mask = ma.getmaskarray(data)

484 if mask.any():

485 data, fill_value = maybe_upcast(data, copy=True)

486 data.soften_mask() # set hardmask False if it was True

487 data[mask] = fill_value

488 else:

489 data = data.copy()

490 return data

491

492

493def sanitize_array(

494 data,

495 index: Index | None,

496 dtype: DtypeObj | None = None,

497 copy: bool = False,

498 raise_cast_failure: bool = True,

499 *,

500 allow_2d: bool = False,

501) -> ArrayLike:

502 """

503 Sanitize input data to an ndarray or ExtensionArray, copy if specified,

504 coerce to the dtype if specified.

505

506 Parameters

507 ----------

508 data : Any

509 index : Index or None, default None

510 dtype : np.dtype, ExtensionDtype, or None, default None

511 copy : bool, default False

512 raise_cast_failure : bool, default True

513 allow_2d : bool, default False

514 If False, raise if we have a 2D Arraylike.

515

516 Returns

517 -------

518 np.ndarray or ExtensionArray

519

520 Notes

521 -----

522 raise_cast_failure=False is only intended to be True when called from the

523 DataFrame constructor, as the dtype keyword there may be interpreted as only

524 applying to a subset of columns, see GH#24435.

525 """

526 if isinstance(data, ma.MaskedArray):

527 data = sanitize_masked_array(data)

528

529 if isinstance(dtype, PandasDtype):

530 # Avoid ending up with a PandasArray

531 dtype = dtype.numpy_dtype

532

533 # extract ndarray or ExtensionArray, ensure we have no PandasArray

534 data = extract_array(data, extract_numpy=True, extract_range=True)

535

536 if isinstance(data, np.ndarray) and data.ndim == 0:

537 if dtype is None:

538 dtype = data.dtype

539 data = lib.item_from_zerodim(data)

540 elif isinstance(data, range):

541 # GH#16804

542 data = range_to_ndarray(data)

543 copy = False

544

545 if not is_list_like(data):

546 if index is None:

547 raise ValueError("index must be specified when data is not list-like")

548 data = construct_1d_arraylike_from_scalar(data, len(index), dtype)

549 return data

550

551 # GH#846

552 if isinstance(data, np.ndarray):

553 if isinstance(data, np.matrix):

554 data = data.A

555

556 if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype):

557 # possibility of nan -> garbage

558 try:

559 # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int

560 # casting aligning with IntCastingNaNError below

561 with np.errstate(invalid="ignore"):

562 subarr = _try_cast(data, dtype, copy, True)

563 except IntCastingNaNError:

564 warnings.warn(

565 "In a future version, passing float-dtype values containing NaN "

566 "and an integer dtype will raise IntCastingNaNError "

567 "(subclass of ValueError) instead of silently ignoring the "

568 "passed dtype. To retain the old behavior, call Series(arr) or "

569 "DataFrame(arr) without passing a dtype.",

570 FutureWarning,

571 stacklevel=find_stack_level(),

572 )

573 subarr = np.array(data, copy=copy)

574 except ValueError:

575 if not raise_cast_failure:

576 # i.e. called via DataFrame constructor

577 warnings.warn(

578 "In a future version, passing float-dtype values and an "

579 "integer dtype to DataFrame will retain floating dtype "

580 "if they cannot be cast losslessly (matching Series behavior). "

581 "To retain the old behavior, use DataFrame(data).astype(dtype)",

582 FutureWarning,

583 stacklevel=find_stack_level(),

584 )

585 # GH#40110 until the deprecation is enforced, we _dont_

586 # ignore the dtype for DataFrame, and _do_ cast even though

587 # it is lossy.

588 dtype = cast(np.dtype, dtype)

589 return np.array(data, dtype=dtype, copy=copy)

590

591 # We ignore the dtype arg and return floating values,

592 # e.g. test_constructor_floating_data_int_dtype

593 # TODO: where is the discussion that documents the reason for this?

594 subarr = np.array(data, copy=copy)

595 else:

596 # we will try to copy by-definition here

597 subarr = _try_cast(data, dtype, copy, raise_cast_failure)

598

599 elif isinstance(data, ABCExtensionArray):

600 # it is already ensured above this is not a PandasArray

601 subarr = data

602

603 if dtype is not None:

604 subarr = subarr.astype(dtype, copy=copy)

605 elif copy:

606 subarr = subarr.copy()

607

608 else:

609 if isinstance(data, (set, frozenset)):

610 # Raise only for unordered sets, e.g., not for dict_keys

611 raise TypeError(f"'{type(data).__name__}' type is unordered")

612

613 # materialize e.g. generators, convert e.g. tuples, abc.ValueView

614 if hasattr(data, "__array__"):

615 # e.g. dask array GH#38645

616 data = np.array(data, copy=copy)

617 else:

618 data = list(data)

619

620 if dtype is not None or len(data) == 0:

621 try:

622 subarr = _try_cast(data, dtype, copy, raise_cast_failure)

623 except ValueError:

624 if is_integer_dtype(dtype):

625 casted = np.array(data, copy=False)

626 if casted.dtype.kind == "f":

627 # GH#40110 match the behavior we have if we passed

628 # a ndarray[float] to begin with

629 return sanitize_array(

630 casted,

631 index,

632 dtype,

633 copy=False,

634 raise_cast_failure=raise_cast_failure,

635 allow_2d=allow_2d,

636 )

637 else:

638 raise

639 else:

640 raise

641 else:

642 subarr = maybe_convert_platform(data)

643 if subarr.dtype == object:

644 subarr = cast(np.ndarray, subarr)

645 subarr = maybe_infer_to_datetimelike(subarr)

646

647 subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)

648

649 if isinstance(subarr, np.ndarray):

650 # at this point we should have dtype be None or subarr.dtype == dtype

651 dtype = cast(np.dtype, dtype)

652 subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

653

654 return subarr

655

656

657def range_to_ndarray(rng: range) -> np.ndarray:

658 """

659 Cast a range object to ndarray.

660 """

661 # GH#30171 perf avoid realizing range as a list in np.array

662 try:

663 arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64")

664 except OverflowError:

665 # GH#30173 handling for ranges that overflow int64

666 if (rng.start >= 0 and rng.step > 0) or (rng.stop >= 0 and rng.step < 0):

667 try:

668 arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64")

669 except OverflowError:

670 arr = construct_1d_object_array_from_listlike(list(rng))

671 else:

672 arr = construct_1d_object_array_from_listlike(list(rng))

673 return arr

674

675

676def _sanitize_ndim(

677 result: ArrayLike,

678 data,

679 dtype: DtypeObj | None,

680 index: Index | None,

681 *,

682 allow_2d: bool = False,

683) -> ArrayLike:

684 """

685 Ensure we have a 1-dimensional result array.

686 """

687 if getattr(result, "ndim", 0) == 0:

688 raise ValueError("result should be arraylike with ndim > 0")

689

690 elif result.ndim == 1:

691 # the result that we want

692 result = _maybe_repeat(result, index)

693

694 elif result.ndim > 1:

695 if isinstance(data, np.ndarray):

696 if allow_2d:

697 return result

698 raise ValueError("Data must be 1-dimensional")

699 if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype):

700 # i.e. PandasDtype("O")

701

702 result = com.asarray_tuplesafe(data, dtype=np.dtype("object"))

703 cls = dtype.construct_array_type()

704 result = cls._from_sequence(result, dtype=dtype)

705 else:

706 # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type

707 # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str,

708 # dtype[Any], None]"

709 result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type]

710 return result

711

712

713def _sanitize_str_dtypes(

714 result: np.ndarray, data, dtype: np.dtype | None, copy: bool

715) -> np.ndarray:

716 """

717 Ensure we have a dtype that is supported by pandas.

718 """

719

720 # This is to prevent mixed-type Series getting all casted to

721 # NumPy string type, e.g. NaN --> '-1#IND'.

722 if issubclass(result.dtype.type, str):

723 # GH#16605

724 # If not empty convert the data to dtype

725 # GH#19853: If data is a scalar, result has already the result

726 if not lib.is_scalar(data):

727 if not np.all(isna(data)):

728 data = np.array(data, dtype=dtype, copy=False)

729 result = np.array(data, dtype=object, copy=copy)

730 return result

731

732

733def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:

734 """

735 If we have a length-1 array and an index describing how long we expect

736 the result to be, repeat the array.

737 """

738 if index is not None:

739 if 1 == len(arr) != len(index):

740 arr = arr.repeat(len(index))

741 return arr

742

743

744def _try_cast(

745 arr: list | np.ndarray,

746 dtype: DtypeObj | None,

747 copy: bool,

748 raise_cast_failure: bool,

749) -> ArrayLike:

750 """

751 Convert input to numpy ndarray and optionally cast to a given dtype.

752

753 Parameters

754 ----------

755 arr : ndarray or list

756 Excludes: ExtensionArray, Series, Index.

757 dtype : np.dtype, ExtensionDtype or None

758 copy : bool

759 If False, don't copy the data if not needed.

760 raise_cast_failure : bool

761 If True, and if a dtype is specified, raise errors during casting.

762 Otherwise an object array is returned.

763

764 Returns

765 -------

766 np.ndarray or ExtensionArray

767 """

768 is_ndarray = isinstance(arr, np.ndarray)

769

770 if dtype is None:

771 # perf shortcut as this is the most common case

772 if is_ndarray:

773 arr = cast(np.ndarray, arr)

774 if arr.dtype != object:

775 return sanitize_to_nanoseconds(arr, copy=copy)

776

777 out = maybe_infer_to_datetimelike(arr)

778 if out is arr and copy:

779 out = out.copy()

780 return out

781

782 else:

783 # i.e. list

784 varr = np.array(arr, copy=False)

785 # filter out cases that we _dont_ want to go through

786 # maybe_infer_to_datetimelike

787 if varr.dtype != object or varr.size == 0:

788 return varr

789 return maybe_infer_to_datetimelike(varr)

790

791 elif isinstance(dtype, ExtensionDtype):

792 # create an extension array from its dtype

793 if isinstance(dtype, DatetimeTZDtype):

794 # We can't go through _from_sequence because it handles dt64naive

795 # data differently; _from_sequence treats naive as wall times,

796 # while maybe_cast_to_datetime treats it as UTC

797 # see test_maybe_promote_any_numpy_dtype_with_datetimetz

798 # TODO(2.0): with deprecations enforced, should be able to remove

799 # special case.

800 return maybe_cast_to_datetime(arr, dtype)

801 # TODO: copy?

802

803 array_type = dtype.construct_array_type()._from_sequence

804 subarr = array_type(arr, dtype=dtype, copy=copy)

805 return subarr

806

807 elif is_object_dtype(dtype):

808 if not is_ndarray:

809 subarr = construct_1d_object_array_from_listlike(arr)

810 return subarr

811 return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)

812

813 elif dtype.kind == "U":

814 # TODO: test cases with arr.dtype.kind in ["m", "M"]

815 if is_ndarray:

816 arr = cast(np.ndarray, arr)

817 shape = arr.shape

818 if arr.ndim > 1:

819 arr = arr.ravel()

820 else:

821 shape = (len(arr),)

822 return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape(

823 shape

824 )

825

826 elif dtype.kind in ["m", "M"]:

827 return maybe_cast_to_datetime(arr, dtype)

828

829 try:

830 # GH#15832: Check if we are requesting a numeric dtype and

831 # that we can convert the data to the requested dtype.

832 if is_integer_dtype(dtype):

833 # this will raise if we have e.g. floats

834

835 subarr = maybe_cast_to_integer_array(arr, dtype)

836 else:

837 # 4 tests fail if we move this to a try/except/else; see

838 # test_constructor_compound_dtypes, test_constructor_cast_failure

839 # test_constructor_dict_cast2, test_loc_setitem_dtype

840 subarr = np.array(arr, dtype=dtype, copy=copy)

841

842 except (ValueError, TypeError):

843 if raise_cast_failure:

844 raise

845 else:

846 # we only get here with raise_cast_failure False, which means

847 # called via the DataFrame constructor

848 # GH#24435

849 warnings.warn(

850 f"Could not cast to {dtype}, falling back to object. This "

851 "behavior is deprecated. In a future version, when a dtype is "

852 "passed to 'DataFrame', either all columns will be cast to that "

853 "dtype, or a TypeError will be raised.",

854 FutureWarning,

855 stacklevel=find_stack_level(),

856 )

857 subarr = np.array(arr, dtype=object, copy=copy)

858 return subarr

859

860

861def is_empty_data(data: Any) -> bool:

862 """

863 Utility to check if a Series is instantiated with empty data,

864 which does not contain dtype information.

865

866 Parameters

867 ----------

868 data : array-like, Iterable, dict, or scalar value

869 Contains data stored in Series.

870

871 Returns

872 -------

873 bool

874 """

875 is_none = data is None

876 is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype")

877 is_simple_empty = is_list_like_without_dtype and not data

878 return is_none or is_simple_empty

879

880

881def create_series_with_explicit_dtype(

882 data: Any = None,

883 index: ArrayLike | Index | None = None,

884 dtype: Dtype | None = None,

885 name: str | None = None,

886 copy: bool = False,

887 fastpath: bool = False,

888 dtype_if_empty: Dtype = object,

889) -> Series:

890 """

891 Helper to pass an explicit dtype when instantiating an empty Series.

892

893 This silences a DeprecationWarning described in GitHub-17261.

894

895 Parameters

896 ----------

897 data : Mirrored from Series.__init__

898 index : Mirrored from Series.__init__

899 dtype : Mirrored from Series.__init__

900 name : Mirrored from Series.__init__

901 copy : Mirrored from Series.__init__

902 fastpath : Mirrored from Series.__init__

903 dtype_if_empty : str, numpy.dtype, or ExtensionDtype

904 This dtype will be passed explicitly if an empty Series will

905 be instantiated.

906

907 Returns

908 -------

909 Series

910 """

911 from pandas.core.series import Series

912

913 if is_empty_data(data) and dtype is None:

914 dtype = dtype_if_empty

915 return Series(

916 data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath

917 )