Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/construction.py: 9%

253 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Constructor functions intended to be shared by pd.array, Series.__init__, 

3and Index.__new__. 

4 

5These should not depend on core.internals. 

6""" 

7from __future__ import annotations 

8 

9from typing import ( 

10 TYPE_CHECKING, 

11 Any, 

12 Optional, 

13 Sequence, 

14 Union, 

15 cast, 

16 overload, 

17) 

18import warnings 

19 

20import numpy as np 

21import numpy.ma as ma 

22 

23from pandas._libs import lib 

24from pandas._libs.tslibs.period import Period 

25from pandas._typing import ( 

26 AnyArrayLike, 

27 ArrayLike, 

28 Dtype, 

29 DtypeObj, 

30 T, 

31) 

32from pandas.errors import IntCastingNaNError 

33from pandas.util._exceptions import find_stack_level 

34 

35from pandas.core.dtypes.base import ( 

36 ExtensionDtype, 

37 _registry as registry, 

38) 

39from pandas.core.dtypes.cast import ( 

40 construct_1d_arraylike_from_scalar, 

41 construct_1d_object_array_from_listlike, 

42 maybe_cast_to_datetime, 

43 maybe_cast_to_integer_array, 

44 maybe_convert_platform, 

45 maybe_infer_to_datetimelike, 

46 maybe_upcast, 

47 sanitize_to_nanoseconds, 

48) 

49from pandas.core.dtypes.common import ( 

50 is_datetime64_ns_dtype, 

51 is_extension_array_dtype, 

52 is_float_dtype, 

53 is_integer_dtype, 

54 is_list_like, 

55 is_object_dtype, 

56 is_timedelta64_ns_dtype, 

57) 

58from pandas.core.dtypes.dtypes import ( 

59 DatetimeTZDtype, 

60 PandasDtype, 

61) 

62from pandas.core.dtypes.generic import ( 

63 ABCExtensionArray, 

64 ABCIndex, 

65 ABCPandasArray, 

66 ABCRangeIndex, 

67 ABCSeries, 

68) 

69from pandas.core.dtypes.missing import isna 

70 

71import pandas.core.common as com 

72 

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from pandas import ( 

75 ExtensionArray, 

76 Index, 

77 Series, 

78 ) 

79 

80 

81def array( 

82 data: Sequence[object] | AnyArrayLike, 

83 dtype: Dtype | None = None, 

84 copy: bool = True, 

85) -> ExtensionArray: 

86 """ 

87 Create an array. 

88 

89 Parameters 

90 ---------- 

91 data : Sequence of objects 

92 The scalars inside `data` should be instances of the 

93 scalar type for `dtype`. It's expected that `data` 

94 represents a 1-dimensional array of data. 

95 

96 When `data` is an Index or Series, the underlying array 

97 will be extracted from `data`. 

98 

99 dtype : str, np.dtype, or ExtensionDtype, optional 

100 The dtype to use for the array. This may be a NumPy 

101 dtype or an extension type registered with pandas using 

102 :meth:`pandas.api.extensions.register_extension_dtype`. 

103 

104 If not specified, there are two possibilities: 

105 

106 1. When `data` is a :class:`Series`, :class:`Index`, or 

107 :class:`ExtensionArray`, the `dtype` will be taken 

108 from the data. 

109 2. Otherwise, pandas will attempt to infer the `dtype` 

110 from the data. 

111 

112 Note that when `data` is a NumPy array, ``data.dtype`` is 

113 *not* used for inferring the array type. This is because 

114 NumPy cannot represent all the types of data that can be 

115 held in extension arrays. 

116 

117 Currently, pandas will infer an extension dtype for sequences of 

118 

119 ============================== ======================================= 

120 Scalar Type Array Type 

121 ============================== ======================================= 

122 :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray` 

123 :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` 

124 :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` 

125 :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` 

126 :class:`int` :class:`pandas.arrays.IntegerArray` 

127 :class:`float` :class:`pandas.arrays.FloatingArray` 

128 :class:`str` :class:`pandas.arrays.StringArray` or 

129 :class:`pandas.arrays.ArrowStringArray` 

130 :class:`bool` :class:`pandas.arrays.BooleanArray` 

131 ============================== ======================================= 

132 

133 The ExtensionArray created when the scalar type is :class:`str` is determined by 

134 ``pd.options.mode.string_storage`` if the dtype is not explicitly given. 

135 

136 For all other cases, NumPy's usual inference rules will be used. 

137 

138 .. versionchanged:: 1.0.0 

139 

140 Pandas infers nullable-integer dtype for integer data, 

141 string dtype for string data, and nullable-boolean dtype 

142 for boolean data. 

143 

144 .. versionchanged:: 1.2.0 

145 

146 Pandas now also infers nullable-floating dtype for float-like 

147 input data 

148 

149 copy : bool, default True 

150 Whether to copy the data, even if not necessary. Depending 

151 on the type of `data`, creating the new array may require 

152 copying data, even if ``copy=False``. 

153 

154 Returns 

155 ------- 

156 ExtensionArray 

157 The newly created array. 

158 

159 Raises 

160 ------ 

161 ValueError 

162 When `data` is not 1-dimensional. 

163 

164 See Also 

165 -------- 

166 numpy.array : Construct a NumPy array. 

167 Series : Construct a pandas Series. 

168 Index : Construct a pandas Index. 

169 arrays.PandasArray : ExtensionArray wrapping a NumPy array. 

170 Series.array : Extract the array stored within a Series. 

171 

172 Notes 

173 ----- 

174 Omitting the `dtype` argument means pandas will attempt to infer the 

175 best array type from the values in the data. As new array types are 

176 added by pandas and 3rd party libraries, the "best" array type may 

177 change. We recommend specifying `dtype` to ensure that 

178 

179 1. the correct array type for the data is returned 

180 2. the returned array type doesn't change as new extension types 

181 are added by pandas and third-party libraries 

182 

183 Additionally, if the underlying memory representation of the returned 

184 array matters, we recommend specifying the `dtype` as a concrete object 

185 rather than a string alias or allowing it to be inferred. For example, 

186 a future version of pandas or a 3rd-party library may include a 

187 dedicated ExtensionArray for string data. In this event, the following 

188 would no longer return a :class:`arrays.PandasArray` backed by a NumPy 

189 array. 

190 

191 >>> pd.array(['a', 'b'], dtype=str) 

192 <PandasArray> 

193 ['a', 'b'] 

194 Length: 2, dtype: str32 

195 

196 This would instead return the new ExtensionArray dedicated for string 

197 data. If you really need the new array to be backed by a NumPy array, 

198 specify that in the dtype. 

199 

200 >>> pd.array(['a', 'b'], dtype=np.dtype("<U1")) 

201 <PandasArray> 

202 ['a', 'b'] 

203 Length: 2, dtype: str32 

204 

205 Finally, Pandas has arrays that mostly overlap with NumPy 

206 

207 * :class:`arrays.DatetimeArray` 

208 * :class:`arrays.TimedeltaArray` 

209 

210 When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is 

211 passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray`` 

212 rather than a ``PandasArray``. This is for symmetry with the case of 

213 timezone-aware data, which NumPy does not natively support. 

214 

215 >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') 

216 <DatetimeArray> 

217 ['2015-01-01 00:00:00', '2016-01-01 00:00:00'] 

218 Length: 2, dtype: datetime64[ns] 

219 

220 >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]') 

221 <TimedeltaArray> 

222 ['0 days 01:00:00', '0 days 02:00:00'] 

223 Length: 2, dtype: timedelta64[ns] 

224 

225 Examples 

226 -------- 

227 If a dtype is not specified, pandas will infer the best dtype from the values. 

228 See the description of `dtype` for the types pandas infers for. 

229 

230 >>> pd.array([1, 2]) 

231 <IntegerArray> 

232 [1, 2] 

233 Length: 2, dtype: Int64 

234 

235 >>> pd.array([1, 2, np.nan]) 

236 <IntegerArray> 

237 [1, 2, <NA>] 

238 Length: 3, dtype: Int64 

239 

240 >>> pd.array([1.1, 2.2]) 

241 <FloatingArray> 

242 [1.1, 2.2] 

243 Length: 2, dtype: Float64 

244 

245 >>> pd.array(["a", None, "c"]) 

246 <StringArray> 

247 ['a', <NA>, 'c'] 

248 Length: 3, dtype: string 

249 

250 >>> with pd.option_context("string_storage", "pyarrow"): 

251 ... arr = pd.array(["a", None, "c"]) 

252 ... 

253 >>> arr 

254 <ArrowStringArray> 

255 ['a', <NA>, 'c'] 

256 Length: 3, dtype: string 

257 

258 >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) 

259 <PeriodArray> 

260 ['2000-01-01', '2000-01-01'] 

261 Length: 2, dtype: period[D] 

262 

263 You can use the string alias for `dtype` 

264 

265 >>> pd.array(['a', 'b', 'a'], dtype='category') 

266 ['a', 'b', 'a'] 

267 Categories (2, object): ['a', 'b'] 

268 

269 Or specify the actual dtype 

270 

271 >>> pd.array(['a', 'b', 'a'], 

272 ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) 

273 ['a', 'b', 'a'] 

274 Categories (3, object): ['a' < 'b' < 'c'] 

275 

276 If pandas does not infer a dedicated extension type a 

277 :class:`arrays.PandasArray` is returned. 

278 

279 >>> pd.array([1 + 1j, 3 + 2j]) 

280 <PandasArray> 

281 [(1+1j), (3+2j)] 

282 Length: 2, dtype: complex128 

283 

284 As mentioned in the "Notes" section, new extension types may be added 

285 in the future (by pandas or 3rd party libraries), causing the return 

286 value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype` 

287 as a NumPy dtype if you need to ensure there's no future change in 

288 behavior. 

289 

290 >>> pd.array([1, 2], dtype=np.dtype("int32")) 

291 <PandasArray> 

292 [1, 2] 

293 Length: 2, dtype: int32 

294 

295 `data` must be 1-dimensional. A ValueError is raised when the input 

296 has the wrong dimensionality. 

297 

298 >>> pd.array(1) 

299 Traceback (most recent call last): 

300 ... 

301 ValueError: Cannot pass scalar '1' to 'pandas.array'. 

302 """ 

303 from pandas.core.arrays import ( 

304 BooleanArray, 

305 DatetimeArray, 

306 ExtensionArray, 

307 FloatingArray, 

308 IntegerArray, 

309 IntervalArray, 

310 PandasArray, 

311 PeriodArray, 

312 TimedeltaArray, 

313 ) 

314 from pandas.core.arrays.string_ import StringDtype 

315 

316 if lib.is_scalar(data): 

317 msg = f"Cannot pass scalar '{data}' to 'pandas.array'." 

318 raise ValueError(msg) 

319 

320 if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)): 

321 # Note: we exclude np.ndarray here, will do type inference on it 

322 dtype = data.dtype 

323 

324 data = extract_array(data, extract_numpy=True) 

325 

326 # this returns None for not-found dtypes. 

327 if isinstance(dtype, str): 

328 dtype = registry.find(dtype) or dtype 

329 

330 if is_extension_array_dtype(dtype): 

331 cls = cast(ExtensionDtype, dtype).construct_array_type() 

332 return cls._from_sequence(data, dtype=dtype, copy=copy) 

333 

334 if dtype is None: 

335 inferred_dtype = lib.infer_dtype(data, skipna=True) 

336 if inferred_dtype == "period": 

337 period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data) 

338 return PeriodArray._from_sequence(period_data, copy=copy) 

339 

340 elif inferred_dtype == "interval": 

341 return IntervalArray(data, copy=copy) 

342 

343 elif inferred_dtype.startswith("datetime"): 

344 # datetime, datetime64 

345 try: 

346 return DatetimeArray._from_sequence(data, copy=copy) 

347 except ValueError: 

348 # Mixture of timezones, fall back to PandasArray 

349 pass 

350 

351 elif inferred_dtype.startswith("timedelta"): 

352 # timedelta, timedelta64 

353 return TimedeltaArray._from_sequence(data, copy=copy) 

354 

355 elif inferred_dtype == "string": 

356 # StringArray/ArrowStringArray depending on pd.options.mode.string_storage 

357 return StringDtype().construct_array_type()._from_sequence(data, copy=copy) 

358 

359 elif inferred_dtype == "integer": 

360 return IntegerArray._from_sequence(data, copy=copy) 

361 

362 elif ( 

363 inferred_dtype in ("floating", "mixed-integer-float") 

364 and getattr(data, "dtype", None) != np.float16 

365 ): 

366 # GH#44715 Exclude np.float16 bc FloatingArray does not support it; 

367 # we will fall back to PandasArray. 

368 return FloatingArray._from_sequence(data, copy=copy) 

369 

370 elif inferred_dtype == "boolean": 

371 return BooleanArray._from_sequence(data, copy=copy) 

372 

373 # Pandas overrides NumPy for 

374 # 1. datetime64[ns] 

375 # 2. timedelta64[ns] 

376 # so that a DatetimeArray is returned. 

377 if is_datetime64_ns_dtype(dtype): 

378 return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) 

379 elif is_timedelta64_ns_dtype(dtype): 

380 return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) 

381 

382 return PandasArray._from_sequence(data, dtype=dtype, copy=copy) 

383 

384 

385@overload 

386def extract_array( 

387 obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ... 

388) -> ArrayLike: 

389 ... 

390 

391 

392@overload 

393def extract_array( 

394 obj: T, extract_numpy: bool = ..., extract_range: bool = ... 

395) -> T | ArrayLike: 

396 ... 

397 

398 

399def extract_array( 

400 obj: T, extract_numpy: bool = False, extract_range: bool = False 

401) -> T | ArrayLike: 

402 """ 

403 Extract the ndarray or ExtensionArray from a Series or Index. 

404 

405 For all other types, `obj` is just returned as is. 

406 

407 Parameters 

408 ---------- 

409 obj : object 

410 For Series / Index, the underlying ExtensionArray is unboxed. 

411 

412 extract_numpy : bool, default False 

413 Whether to extract the ndarray from a PandasArray. 

414 

415 extract_range : bool, default False 

416 If we have a RangeIndex, return range._values if True 

417 (which is a materialized integer ndarray), otherwise return unchanged. 

418 

419 Returns 

420 ------- 

421 arr : object 

422 

423 Examples 

424 -------- 

425 >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category')) 

426 ['a', 'b', 'c'] 

427 Categories (3, object): ['a', 'b', 'c'] 

428 

429 Other objects like lists, arrays, and DataFrames are just passed through. 

430 

431 >>> extract_array([1, 2, 3]) 

432 [1, 2, 3] 

433 

434 For an ndarray-backed Series / Index the ndarray is returned. 

435 

436 >>> extract_array(pd.Series([1, 2, 3])) 

437 array([1, 2, 3]) 

438 

439 To extract all the way down to the ndarray, pass ``extract_numpy=True``. 

440 

441 >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True) 

442 array([1, 2, 3]) 

443 """ 

444 if isinstance(obj, (ABCIndex, ABCSeries)): 

445 if isinstance(obj, ABCRangeIndex): 

446 if extract_range: 

447 return obj._values 

448 # https://github.com/python/mypy/issues/1081 

449 # error: Incompatible return value type (got "RangeIndex", expected 

450 # "Union[T, Union[ExtensionArray, ndarray[Any, Any]]]") 

451 return obj # type: ignore[return-value] 

452 

453 return obj._values 

454 

455 elif extract_numpy and isinstance(obj, ABCPandasArray): 

456 return obj.to_numpy() 

457 

458 return obj 

459 

460 

461def ensure_wrapped_if_datetimelike(arr): 

462 """ 

463 Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray. 

464 """ 

465 if isinstance(arr, np.ndarray): 

466 if arr.dtype.kind == "M": 

467 from pandas.core.arrays import DatetimeArray 

468 

469 return DatetimeArray._from_sequence(arr) 

470 

471 elif arr.dtype.kind == "m": 

472 from pandas.core.arrays import TimedeltaArray 

473 

474 return TimedeltaArray._from_sequence(arr) 

475 

476 return arr 

477 

478 

479def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: 

480 """ 

481 Convert numpy MaskedArray to ensure mask is softened. 

482 """ 

483 mask = ma.getmaskarray(data) 

484 if mask.any(): 

485 data, fill_value = maybe_upcast(data, copy=True) 

486 data.soften_mask() # set hardmask False if it was True 

487 data[mask] = fill_value 

488 else: 

489 data = data.copy() 

490 return data 

491 

492 

493def sanitize_array( 

494 data, 

495 index: Index | None, 

496 dtype: DtypeObj | None = None, 

497 copy: bool = False, 

498 raise_cast_failure: bool = True, 

499 *, 

500 allow_2d: bool = False, 

501) -> ArrayLike: 

502 """ 

503 Sanitize input data to an ndarray or ExtensionArray, copy if specified, 

504 coerce to the dtype if specified. 

505 

506 Parameters 

507 ---------- 

508 data : Any 

509 index : Index or None, default None 

510 dtype : np.dtype, ExtensionDtype, or None, default None 

511 copy : bool, default False 

512 raise_cast_failure : bool, default True 

513 allow_2d : bool, default False 

514 If False, raise if we have a 2D Arraylike. 

515 

516 Returns 

517 ------- 

518 np.ndarray or ExtensionArray 

519 

520 Notes 

521 ----- 

522 raise_cast_failure=False is only intended to be True when called from the 

523 DataFrame constructor, as the dtype keyword there may be interpreted as only 

524 applying to a subset of columns, see GH#24435. 

525 """ 

526 if isinstance(data, ma.MaskedArray): 

527 data = sanitize_masked_array(data) 

528 

529 if isinstance(dtype, PandasDtype): 

530 # Avoid ending up with a PandasArray 

531 dtype = dtype.numpy_dtype 

532 

533 # extract ndarray or ExtensionArray, ensure we have no PandasArray 

534 data = extract_array(data, extract_numpy=True, extract_range=True) 

535 

536 if isinstance(data, np.ndarray) and data.ndim == 0: 

537 if dtype is None: 

538 dtype = data.dtype 

539 data = lib.item_from_zerodim(data) 

540 elif isinstance(data, range): 

541 # GH#16804 

542 data = range_to_ndarray(data) 

543 copy = False 

544 

545 if not is_list_like(data): 

546 if index is None: 

547 raise ValueError("index must be specified when data is not list-like") 

548 data = construct_1d_arraylike_from_scalar(data, len(index), dtype) 

549 return data 

550 

551 # GH#846 

552 if isinstance(data, np.ndarray): 

553 if isinstance(data, np.matrix): 

554 data = data.A 

555 

556 if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype): 

557 # possibility of nan -> garbage 

558 try: 

559 # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int 

560 # casting aligning with IntCastingNaNError below 

561 with np.errstate(invalid="ignore"): 

562 subarr = _try_cast(data, dtype, copy, True) 

563 except IntCastingNaNError: 

564 warnings.warn( 

565 "In a future version, passing float-dtype values containing NaN " 

566 "and an integer dtype will raise IntCastingNaNError " 

567 "(subclass of ValueError) instead of silently ignoring the " 

568 "passed dtype. To retain the old behavior, call Series(arr) or " 

569 "DataFrame(arr) without passing a dtype.", 

570 FutureWarning, 

571 stacklevel=find_stack_level(), 

572 ) 

573 subarr = np.array(data, copy=copy) 

574 except ValueError: 

575 if not raise_cast_failure: 

576 # i.e. called via DataFrame constructor 

577 warnings.warn( 

578 "In a future version, passing float-dtype values and an " 

579 "integer dtype to DataFrame will retain floating dtype " 

580 "if they cannot be cast losslessly (matching Series behavior). " 

581 "To retain the old behavior, use DataFrame(data).astype(dtype)", 

582 FutureWarning, 

583 stacklevel=find_stack_level(), 

584 ) 

585 # GH#40110 until the deprecation is enforced, we _dont_ 

586 # ignore the dtype for DataFrame, and _do_ cast even though 

587 # it is lossy. 

588 dtype = cast(np.dtype, dtype) 

589 return np.array(data, dtype=dtype, copy=copy) 

590 

591 # We ignore the dtype arg and return floating values, 

592 # e.g. test_constructor_floating_data_int_dtype 

593 # TODO: where is the discussion that documents the reason for this? 

594 subarr = np.array(data, copy=copy) 

595 else: 

596 # we will try to copy by-definition here 

597 subarr = _try_cast(data, dtype, copy, raise_cast_failure) 

598 

599 elif isinstance(data, ABCExtensionArray): 

600 # it is already ensured above this is not a PandasArray 

601 subarr = data 

602 

603 if dtype is not None: 

604 subarr = subarr.astype(dtype, copy=copy) 

605 elif copy: 

606 subarr = subarr.copy() 

607 

608 else: 

609 if isinstance(data, (set, frozenset)): 

610 # Raise only for unordered sets, e.g., not for dict_keys 

611 raise TypeError(f"'{type(data).__name__}' type is unordered") 

612 

613 # materialize e.g. generators, convert e.g. tuples, abc.ValueView 

614 if hasattr(data, "__array__"): 

615 # e.g. dask array GH#38645 

616 data = np.array(data, copy=copy) 

617 else: 

618 data = list(data) 

619 

620 if dtype is not None or len(data) == 0: 

621 try: 

622 subarr = _try_cast(data, dtype, copy, raise_cast_failure) 

623 except ValueError: 

624 if is_integer_dtype(dtype): 

625 casted = np.array(data, copy=False) 

626 if casted.dtype.kind == "f": 

627 # GH#40110 match the behavior we have if we passed 

628 # a ndarray[float] to begin with 

629 return sanitize_array( 

630 casted, 

631 index, 

632 dtype, 

633 copy=False, 

634 raise_cast_failure=raise_cast_failure, 

635 allow_2d=allow_2d, 

636 ) 

637 else: 

638 raise 

639 else: 

640 raise 

641 else: 

642 subarr = maybe_convert_platform(data) 

643 if subarr.dtype == object: 

644 subarr = cast(np.ndarray, subarr) 

645 subarr = maybe_infer_to_datetimelike(subarr) 

646 

647 subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) 

648 

649 if isinstance(subarr, np.ndarray): 

650 # at this point we should have dtype be None or subarr.dtype == dtype 

651 dtype = cast(np.dtype, dtype) 

652 subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) 

653 

654 return subarr 

655 

656 

657def range_to_ndarray(rng: range) -> np.ndarray: 

658 """ 

659 Cast a range object to ndarray. 

660 """ 

661 # GH#30171 perf avoid realizing range as a list in np.array 

662 try: 

663 arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64") 

664 except OverflowError: 

665 # GH#30173 handling for ranges that overflow int64 

666 if (rng.start >= 0 and rng.step > 0) or (rng.stop >= 0 and rng.step < 0): 

667 try: 

668 arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64") 

669 except OverflowError: 

670 arr = construct_1d_object_array_from_listlike(list(rng)) 

671 else: 

672 arr = construct_1d_object_array_from_listlike(list(rng)) 

673 return arr 

674 

675 

676def _sanitize_ndim( 

677 result: ArrayLike, 

678 data, 

679 dtype: DtypeObj | None, 

680 index: Index | None, 

681 *, 

682 allow_2d: bool = False, 

683) -> ArrayLike: 

684 """ 

685 Ensure we have a 1-dimensional result array. 

686 """ 

687 if getattr(result, "ndim", 0) == 0: 

688 raise ValueError("result should be arraylike with ndim > 0") 

689 

690 elif result.ndim == 1: 

691 # the result that we want 

692 result = _maybe_repeat(result, index) 

693 

694 elif result.ndim > 1: 

695 if isinstance(data, np.ndarray): 

696 if allow_2d: 

697 return result 

698 raise ValueError("Data must be 1-dimensional") 

699 if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype): 

700 # i.e. PandasDtype("O") 

701 

702 result = com.asarray_tuplesafe(data, dtype=np.dtype("object")) 

703 cls = dtype.construct_array_type() 

704 result = cls._from_sequence(result, dtype=dtype) 

705 else: 

706 # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type 

707 # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str, 

708 # dtype[Any], None]" 

709 result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type] 

710 return result 

711 

712 

713def _sanitize_str_dtypes( 

714 result: np.ndarray, data, dtype: np.dtype | None, copy: bool 

715) -> np.ndarray: 

716 """ 

717 Ensure we have a dtype that is supported by pandas. 

718 """ 

719 

720 # This is to prevent mixed-type Series getting all casted to 

721 # NumPy string type, e.g. NaN --> '-1#IND'. 

722 if issubclass(result.dtype.type, str): 

723 # GH#16605 

724 # If not empty convert the data to dtype 

725 # GH#19853: If data is a scalar, result has already the result 

726 if not lib.is_scalar(data): 

727 if not np.all(isna(data)): 

728 data = np.array(data, dtype=dtype, copy=False) 

729 result = np.array(data, dtype=object, copy=copy) 

730 return result 

731 

732 

733def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike: 

734 """ 

735 If we have a length-1 array and an index describing how long we expect 

736 the result to be, repeat the array. 

737 """ 

738 if index is not None: 

739 if 1 == len(arr) != len(index): 

740 arr = arr.repeat(len(index)) 

741 return arr 

742 

743 

744def _try_cast( 

745 arr: list | np.ndarray, 

746 dtype: DtypeObj | None, 

747 copy: bool, 

748 raise_cast_failure: bool, 

749) -> ArrayLike: 

750 """ 

751 Convert input to numpy ndarray and optionally cast to a given dtype. 

752 

753 Parameters 

754 ---------- 

755 arr : ndarray or list 

756 Excludes: ExtensionArray, Series, Index. 

757 dtype : np.dtype, ExtensionDtype or None 

758 copy : bool 

759 If False, don't copy the data if not needed. 

760 raise_cast_failure : bool 

761 If True, and if a dtype is specified, raise errors during casting. 

762 Otherwise an object array is returned. 

763 

764 Returns 

765 ------- 

766 np.ndarray or ExtensionArray 

767 """ 

768 is_ndarray = isinstance(arr, np.ndarray) 

769 

770 if dtype is None: 

771 # perf shortcut as this is the most common case 

772 if is_ndarray: 

773 arr = cast(np.ndarray, arr) 

774 if arr.dtype != object: 

775 return sanitize_to_nanoseconds(arr, copy=copy) 

776 

777 out = maybe_infer_to_datetimelike(arr) 

778 if out is arr and copy: 

779 out = out.copy() 

780 return out 

781 

782 else: 

783 # i.e. list 

784 varr = np.array(arr, copy=False) 

785 # filter out cases that we _dont_ want to go through 

786 # maybe_infer_to_datetimelike 

787 if varr.dtype != object or varr.size == 0: 

788 return varr 

789 return maybe_infer_to_datetimelike(varr) 

790 

791 elif isinstance(dtype, ExtensionDtype): 

792 # create an extension array from its dtype 

793 if isinstance(dtype, DatetimeTZDtype): 

794 # We can't go through _from_sequence because it handles dt64naive 

795 # data differently; _from_sequence treats naive as wall times, 

796 # while maybe_cast_to_datetime treats it as UTC 

797 # see test_maybe_promote_any_numpy_dtype_with_datetimetz 

798 # TODO(2.0): with deprecations enforced, should be able to remove 

799 # special case. 

800 return maybe_cast_to_datetime(arr, dtype) 

801 # TODO: copy? 

802 

803 array_type = dtype.construct_array_type()._from_sequence 

804 subarr = array_type(arr, dtype=dtype, copy=copy) 

805 return subarr 

806 

807 elif is_object_dtype(dtype): 

808 if not is_ndarray: 

809 subarr = construct_1d_object_array_from_listlike(arr) 

810 return subarr 

811 return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) 

812 

813 elif dtype.kind == "U": 

814 # TODO: test cases with arr.dtype.kind in ["m", "M"] 

815 if is_ndarray: 

816 arr = cast(np.ndarray, arr) 

817 shape = arr.shape 

818 if arr.ndim > 1: 

819 arr = arr.ravel() 

820 else: 

821 shape = (len(arr),) 

822 return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape( 

823 shape 

824 ) 

825 

826 elif dtype.kind in ["m", "M"]: 

827 return maybe_cast_to_datetime(arr, dtype) 

828 

829 try: 

830 # GH#15832: Check if we are requesting a numeric dtype and 

831 # that we can convert the data to the requested dtype. 

832 if is_integer_dtype(dtype): 

833 # this will raise if we have e.g. floats 

834 

835 subarr = maybe_cast_to_integer_array(arr, dtype) 

836 else: 

837 # 4 tests fail if we move this to a try/except/else; see 

838 # test_constructor_compound_dtypes, test_constructor_cast_failure 

839 # test_constructor_dict_cast2, test_loc_setitem_dtype 

840 subarr = np.array(arr, dtype=dtype, copy=copy) 

841 

842 except (ValueError, TypeError): 

843 if raise_cast_failure: 

844 raise 

845 else: 

846 # we only get here with raise_cast_failure False, which means 

847 # called via the DataFrame constructor 

848 # GH#24435 

849 warnings.warn( 

850 f"Could not cast to {dtype}, falling back to object. This " 

851 "behavior is deprecated. In a future version, when a dtype is " 

852 "passed to 'DataFrame', either all columns will be cast to that " 

853 "dtype, or a TypeError will be raised.", 

854 FutureWarning, 

855 stacklevel=find_stack_level(), 

856 ) 

857 subarr = np.array(arr, dtype=object, copy=copy) 

858 return subarr 

859 

860 

861def is_empty_data(data: Any) -> bool: 

862 """ 

863 Utility to check if a Series is instantiated with empty data, 

864 which does not contain dtype information. 

865 

866 Parameters 

867 ---------- 

868 data : array-like, Iterable, dict, or scalar value 

869 Contains data stored in Series. 

870 

871 Returns 

872 ------- 

873 bool 

874 """ 

875 is_none = data is None 

876 is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype") 

877 is_simple_empty = is_list_like_without_dtype and not data 

878 return is_none or is_simple_empty 

879 

880 

881def create_series_with_explicit_dtype( 

882 data: Any = None, 

883 index: ArrayLike | Index | None = None, 

884 dtype: Dtype | None = None, 

885 name: str | None = None, 

886 copy: bool = False, 

887 fastpath: bool = False, 

888 dtype_if_empty: Dtype = object, 

889) -> Series: 

890 """ 

891 Helper to pass an explicit dtype when instantiating an empty Series. 

892 

893 This silences a DeprecationWarning described in GitHub-17261. 

894 

895 Parameters 

896 ---------- 

897 data : Mirrored from Series.__init__ 

898 index : Mirrored from Series.__init__ 

899 dtype : Mirrored from Series.__init__ 

900 name : Mirrored from Series.__init__ 

901 copy : Mirrored from Series.__init__ 

902 fastpath : Mirrored from Series.__init__ 

903 dtype_if_empty : str, numpy.dtype, or ExtensionDtype 

904 This dtype will be passed explicitly if an empty Series will 

905 be instantiated. 

906 

907 Returns 

908 ------- 

909 Series 

910 """ 

911 from pandas.core.series import Series 

912 

913 if is_empty_data(data) and dtype is None: 

914 dtype = dtype_if_empty 

915 return Series( 

916 data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath 

917 )