Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/dtypes/cast.py: 7%

812 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Routines for casting. 

3""" 

4 

5from __future__ import annotations 

6 

7from datetime import ( 

8 date, 

9 datetime, 

10 timedelta, 

11) 

12import functools 

13from typing import ( 

14 TYPE_CHECKING, 

15 Any, 

16 Sized, 

17 TypeVar, 

18 cast, 

19 overload, 

20) 

21import warnings 

22 

23from dateutil.parser import ParserError 

24import numpy as np 

25 

26from pandas._libs import lib 

27from pandas._libs.tslibs import ( 

28 NaT, 

29 OutOfBoundsDatetime, 

30 OutOfBoundsTimedelta, 

31 Timedelta, 

32 Timestamp, 

33 astype_overflowsafe, 

34) 

35from pandas._libs.tslibs.timedeltas import array_to_timedelta64 

36from pandas._typing import ( 

37 ArrayLike, 

38 Dtype, 

39 DtypeObj, 

40 Scalar, 

41) 

42from pandas.errors import IntCastingNaNError 

43from pandas.util._exceptions import find_stack_level 

44from pandas.util._validators import validate_bool_kwarg 

45 

46from pandas.core.dtypes.astype import astype_nansafe 

47from pandas.core.dtypes.common import ( 

48 DT64NS_DTYPE, 

49 TD64NS_DTYPE, 

50 ensure_int8, 

51 ensure_int16, 

52 ensure_int32, 

53 ensure_int64, 

54 ensure_object, 

55 ensure_str, 

56 is_bool, 

57 is_bool_dtype, 

58 is_complex, 

59 is_complex_dtype, 

60 is_datetime64_dtype, 

61 is_datetime64tz_dtype, 

62 is_dtype_equal, 

63 is_extension_array_dtype, 

64 is_float, 

65 is_float_dtype, 

66 is_integer, 

67 is_integer_dtype, 

68 is_numeric_dtype, 

69 is_object_dtype, 

70 is_scalar, 

71 is_string_dtype, 

72 is_timedelta64_dtype, 

73 is_unsigned_integer_dtype, 

74 pandas_dtype, 

75) 

76from pandas.core.dtypes.dtypes import ( 

77 CategoricalDtype, 

78 DatetimeTZDtype, 

79 ExtensionDtype, 

80 IntervalDtype, 

81 PeriodDtype, 

82) 

83from pandas.core.dtypes.generic import ( 

84 ABCExtensionArray, 

85 ABCIndex, 

86 ABCSeries, 

87) 

88from pandas.core.dtypes.inference import is_list_like 

89from pandas.core.dtypes.missing import ( 

90 array_equivalent, 

91 is_valid_na_for_dtype, 

92 isna, 

93 na_value_for_dtype, 

94 notna, 

95) 

96 

97if TYPE_CHECKING: 97 ↛ 99line 97 didn't jump to line 99, because the condition on line 97 was never true

98 

99 from pandas import Index 

100 from pandas.core.arrays import ( 

101 Categorical, 

102 DatetimeArray, 

103 ExtensionArray, 

104 IntervalArray, 

105 PeriodArray, 

106 TimedeltaArray, 

107 ) 

108 

109 

110_int8_max = np.iinfo(np.int8).max 

111_int16_max = np.iinfo(np.int16).max 

112_int32_max = np.iinfo(np.int32).max 

113_int64_max = np.iinfo(np.int64).max 

114 

115_dtype_obj = np.dtype(object) 

116 

117NumpyArrayT = TypeVar("NumpyArrayT", bound=np.ndarray) 

118 

119 

120def maybe_convert_platform( 

121 values: list | tuple | range | np.ndarray | ExtensionArray, 

122) -> ArrayLike: 

123 """try to do platform conversion, allow ndarray or list here""" 

124 arr: ArrayLike 

125 

126 if isinstance(values, (list, tuple, range)): 

127 arr = construct_1d_object_array_from_listlike(values) 

128 else: 

129 # The caller is responsible for ensuring that we have np.ndarray 

130 # or ExtensionArray here. 

131 arr = values 

132 

133 if arr.dtype == _dtype_obj: 

134 arr = cast(np.ndarray, arr) 

135 arr = lib.maybe_convert_objects(arr) 

136 

137 return arr 

138 

139 

140def is_nested_object(obj) -> bool: 

141 """ 

142 return a boolean if we have a nested object, e.g. a Series with 1 or 

143 more Series elements 

144 

145 This may not be necessarily be performant. 

146 

147 """ 

148 return bool( 

149 isinstance(obj, ABCSeries) 

150 and is_object_dtype(obj.dtype) 

151 and any(isinstance(v, ABCSeries) for v in obj._values) 

152 ) 

153 

154 

155def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar: 

156 """ 

157 Cast scalar to Timestamp or Timedelta if scalar is datetime-like 

158 and dtype is not object. 

159 

160 Parameters 

161 ---------- 

162 value : scalar 

163 dtype : Dtype, optional 

164 

165 Returns 

166 ------- 

167 scalar 

168 """ 

169 if dtype == _dtype_obj: 

170 pass 

171 elif isinstance(value, (np.datetime64, datetime)): 

172 value = Timestamp(value) 

173 elif isinstance(value, (np.timedelta64, timedelta)): 

174 value = Timedelta(value) 

175 

176 return value 

177 

178 

179def maybe_box_native(value: Scalar) -> Scalar: 

180 """ 

181 If passed a scalar cast the scalar to a python native type. 

182 

183 Parameters 

184 ---------- 

185 value : scalar or Series 

186 

187 Returns 

188 ------- 

189 scalar or Series 

190 """ 

191 if is_float(value): 

192 # error: Argument 1 to "float" has incompatible type 

193 # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; 

194 # expected "Union[SupportsFloat, _SupportsIndex, str]" 

195 value = float(value) # type: ignore[arg-type] 

196 elif is_integer(value): 

197 # error: Argument 1 to "int" has incompatible type 

198 # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; 

199 # expected "Union[str, SupportsInt, _SupportsIndex, _SupportsTrunc]" 

200 value = int(value) # type: ignore[arg-type] 

201 elif is_bool(value): 

202 value = bool(value) 

203 elif isinstance(value, (np.datetime64, np.timedelta64)): 

204 value = maybe_box_datetimelike(value) 

205 return value 

206 

207 

208def _maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: 

209 """ 

210 Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting 

211 into a numpy array. Failing to unbox would risk dropping nanoseconds. 

212 

213 Notes 

214 ----- 

215 Caller is responsible for checking dtype.kind in ["m", "M"] 

216 """ 

217 if is_valid_na_for_dtype(value, dtype): 

218 # GH#36541: can't fill array directly with pd.NaT 

219 # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) 

220 # ValueError: cannot convert float NaN to integer 

221 value = dtype.type("NaT", "ns") 

222 elif isinstance(value, Timestamp): 

223 if value.tz is None: 

224 value = value.to_datetime64() 

225 elif not isinstance(dtype, DatetimeTZDtype): 

226 raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype") 

227 elif isinstance(value, Timedelta): 

228 value = value.to_timedelta64() 

229 

230 _disallow_mismatched_datetimelike(value, dtype) 

231 return value 

232 

233 

234def _disallow_mismatched_datetimelike(value, dtype: DtypeObj): 

235 """ 

236 numpy allows np.array(dt64values, dtype="timedelta64[ns]") and 

237 vice-versa, but we do not want to allow this, so we need to 

238 check explicitly 

239 """ 

240 vdtype = getattr(value, "dtype", None) 

241 if vdtype is None: 

242 return 

243 elif (vdtype.kind == "m" and dtype.kind == "M") or ( 

244 vdtype.kind == "M" and dtype.kind == "m" 

245 ): 

246 raise TypeError(f"Cannot cast {repr(value)} to {dtype}") 

247 

248 

249@overload 

250def maybe_downcast_to_dtype(result: np.ndarray, dtype: str | np.dtype) -> np.ndarray: 

251 ... 

252 

253 

254@overload 

255def maybe_downcast_to_dtype(result: ExtensionArray, dtype: str | np.dtype) -> ArrayLike: 

256 ... 

257 

258 

259def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLike: 

260 """ 

261 try to cast to the specified dtype (e.g. convert back to bool/int 

262 or could be an astype of float64->float32 

263 """ 

264 do_round = False 

265 

266 if isinstance(dtype, str): 

267 if dtype == "infer": 

268 inferred_type = lib.infer_dtype(result, skipna=False) 

269 if inferred_type == "boolean": 

270 dtype = "bool" 

271 elif inferred_type == "integer": 

272 dtype = "int64" 

273 elif inferred_type == "datetime64": 

274 dtype = "datetime64[ns]" 

275 elif inferred_type in ["timedelta", "timedelta64"]: 

276 dtype = "timedelta64[ns]" 

277 

278 # try to upcast here 

279 elif inferred_type == "floating": 

280 dtype = "int64" 

281 if issubclass(result.dtype.type, np.number): 

282 do_round = True 

283 

284 else: 

285 # TODO: complex? what if result is already non-object? 

286 dtype = "object" 

287 

288 dtype = np.dtype(dtype) 

289 

290 if not isinstance(dtype, np.dtype): 

291 # enforce our signature annotation 

292 raise TypeError(dtype) # pragma: no cover 

293 

294 converted = maybe_downcast_numeric(result, dtype, do_round) 

295 if converted is not result: 

296 return converted 

297 

298 # a datetimelike 

299 # GH12821, iNaT is cast to float 

300 if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]: 

301 result = result.astype(dtype) 

302 

303 elif dtype.kind == "m" and result.dtype == _dtype_obj: 

304 # test_where_downcast_to_td64 

305 result = cast(np.ndarray, result) 

306 result = array_to_timedelta64(result) 

307 

308 elif dtype == np.dtype("M8[ns]") and result.dtype == _dtype_obj: 

309 return np.asarray(maybe_cast_to_datetime(result, dtype=dtype)) 

310 

311 return result 

312 

313 

314@overload 

315def maybe_downcast_numeric( 

316 result: np.ndarray, dtype: np.dtype, do_round: bool = False 

317) -> np.ndarray: 

318 ... 

319 

320 

321@overload 

322def maybe_downcast_numeric( 

323 result: ExtensionArray, dtype: DtypeObj, do_round: bool = False 

324) -> ArrayLike: 

325 ... 

326 

327 

328def maybe_downcast_numeric( 

329 result: ArrayLike, dtype: DtypeObj, do_round: bool = False 

330) -> ArrayLike: 

331 """ 

332 Subset of maybe_downcast_to_dtype restricted to numeric dtypes. 

333 

334 Parameters 

335 ---------- 

336 result : ndarray or ExtensionArray 

337 dtype : np.dtype or ExtensionDtype 

338 do_round : bool 

339 

340 Returns 

341 ------- 

342 ndarray or ExtensionArray 

343 """ 

344 if not isinstance(dtype, np.dtype) or not isinstance(result.dtype, np.dtype): 

345 # e.g. SparseDtype has no itemsize attr 

346 return result 

347 

348 def trans(x): 

349 if do_round: 

350 return x.round() 

351 return x 

352 

353 if dtype.kind == result.dtype.kind: 

354 # don't allow upcasts here (except if empty) 

355 if result.dtype.itemsize <= dtype.itemsize and result.size: 

356 return result 

357 

358 if is_bool_dtype(dtype) or is_integer_dtype(dtype): 

359 

360 if not result.size: 

361 # if we don't have any elements, just astype it 

362 return trans(result).astype(dtype) 

363 

364 # do a test on the first element, if it fails then we are done 

365 r = result.ravel() 

366 arr = np.array([r[0]]) 

367 

368 if isna(arr).any(): 

369 # if we have any nulls, then we are done 

370 return result 

371 

372 elif not isinstance(r[0], (np.integer, np.floating, int, float, bool)): 

373 # a comparable, e.g. a Decimal may slip in here 

374 return result 

375 

376 if ( 

377 issubclass(result.dtype.type, (np.object_, np.number)) 

378 and notna(result).all() 

379 ): 

380 new_result = trans(result).astype(dtype) 

381 if new_result.dtype.kind == "O" or result.dtype.kind == "O": 

382 # np.allclose may raise TypeError on object-dtype 

383 if (new_result == result).all(): 

384 return new_result 

385 else: 

386 if np.allclose(new_result, result, rtol=0): 

387 return new_result 

388 

389 elif ( 

390 issubclass(dtype.type, np.floating) 

391 and not is_bool_dtype(result.dtype) 

392 and not is_string_dtype(result.dtype) 

393 ): 

394 new_result = result.astype(dtype) 

395 

396 # Adjust tolerances based on floating point size 

397 size_tols = {4: 5e-4, 8: 5e-8, 16: 5e-16} 

398 

399 atol = size_tols.get(new_result.dtype.itemsize, 0.0) 

400 

401 # Check downcast float values are still equal within 7 digits when 

402 # converting from float64 to float32 

403 if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol): 

404 return new_result 

405 

406 elif dtype.kind == result.dtype.kind == "c": 

407 new_result = result.astype(dtype) 

408 

409 if array_equivalent(new_result, result): 

410 # TODO: use tolerance like we do for float? 

411 return new_result 

412 

413 return result 

414 

415 

416def maybe_cast_pointwise_result( 

417 result: ArrayLike, 

418 dtype: DtypeObj, 

419 numeric_only: bool = False, 

420 same_dtype: bool = True, 

421) -> ArrayLike: 

422 """ 

423 Try casting result of a pointwise operation back to the original dtype if 

424 appropriate. 

425 

426 Parameters 

427 ---------- 

428 result : array-like 

429 Result to cast. 

430 dtype : np.dtype or ExtensionDtype 

431 Input Series from which result was calculated. 

432 numeric_only : bool, default False 

433 Whether to cast only numerics or datetimes as well. 

434 same_dtype : bool, default True 

435 Specify dtype when calling _from_sequence 

436 

437 Returns 

438 ------- 

439 result : array-like 

440 result maybe casted to the dtype. 

441 """ 

442 

443 assert not is_scalar(result) 

444 

445 if isinstance(dtype, ExtensionDtype): 

446 if not isinstance(dtype, (CategoricalDtype, DatetimeTZDtype)): 

447 # TODO: avoid this special-casing 

448 # We have to special case categorical so as not to upcast 

449 # things like counts back to categorical 

450 

451 cls = dtype.construct_array_type() 

452 if same_dtype: 

453 result = maybe_cast_to_extension_array(cls, result, dtype=dtype) 

454 else: 

455 result = maybe_cast_to_extension_array(cls, result) 

456 

457 elif (numeric_only and is_numeric_dtype(dtype)) or not numeric_only: 

458 result = maybe_downcast_to_dtype(result, dtype) 

459 

460 return result 

461 

462 

463def maybe_cast_to_extension_array( 

464 cls: type[ExtensionArray], obj: ArrayLike, dtype: ExtensionDtype | None = None 

465) -> ArrayLike: 

466 """ 

467 Call to `_from_sequence` that returns the object unchanged on Exception. 

468 

469 Parameters 

470 ---------- 

471 cls : class, subclass of ExtensionArray 

472 obj : arraylike 

473 Values to pass to cls._from_sequence 

474 dtype : ExtensionDtype, optional 

475 

476 Returns 

477 ------- 

478 ExtensionArray or obj 

479 """ 

480 from pandas.core.arrays.string_ import BaseStringArray 

481 

482 assert isinstance(cls, type), f"must pass a type: {cls}" 

483 assertion_msg = f"must pass a subclass of ExtensionArray: {cls}" 

484 assert issubclass(cls, ABCExtensionArray), assertion_msg 

485 

486 # Everything can be converted to StringArrays, but we may not want to convert 

487 if issubclass(cls, BaseStringArray) and lib.infer_dtype(obj) != "string": 

488 return obj 

489 

490 try: 

491 result = cls._from_sequence(obj, dtype=dtype) 

492 except Exception: 

493 # We can't predict what downstream EA constructors may raise 

494 result = obj 

495 return result 

496 

497 

498@overload 

499def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype: 

500 ... 

501 

502 

503@overload 

504def ensure_dtype_can_hold_na(dtype: ExtensionDtype) -> ExtensionDtype: 

505 ... 

506 

507 

508def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj: 

509 """ 

510 If we have a dtype that cannot hold NA values, find the best match that can. 

511 """ 

512 if isinstance(dtype, ExtensionDtype): 

513 if dtype._can_hold_na: 

514 return dtype 

515 elif isinstance(dtype, IntervalDtype): 

516 # TODO(GH#45349): don't special-case IntervalDtype, allow 

517 # overriding instead of returning object below. 

518 return IntervalDtype(np.float64, closed=dtype.closed) 

519 return _dtype_obj 

520 elif dtype.kind == "b": 

521 return _dtype_obj 

522 elif dtype.kind in ["i", "u"]: 

523 return np.dtype(np.float64) 

524 return dtype 

525 

526 

527def maybe_promote(dtype: np.dtype, fill_value=np.nan): 

528 """ 

529 Find the minimal dtype that can hold both the given dtype and fill_value. 

530 

531 Parameters 

532 ---------- 

533 dtype : np.dtype 

534 fill_value : scalar, default np.nan 

535 

536 Returns 

537 ------- 

538 dtype 

539 Upcasted from dtype argument if necessary. 

540 fill_value 

541 Upcasted from fill_value argument if necessary. 

542 

543 Raises 

544 ------ 

545 ValueError 

546 If fill_value is a non-scalar and dtype is not object. 

547 """ 

548 # TODO(2.0): need to directly use the non-cached version as long as we 

549 # possibly raise a deprecation warning for datetime dtype 

550 if dtype.kind == "M": 

551 return _maybe_promote(dtype, fill_value) 

552 # for performance, we are using a cached version of the actual implementation 

553 # of the function in _maybe_promote. However, this doesn't always work (in case 

554 # of non-hashable arguments), so we fallback to the actual implementation if needed 

555 try: 

556 # error: Argument 3 to "__call__" of "_lru_cache_wrapper" has incompatible type 

557 # "Type[Any]"; expected "Hashable" [arg-type] 

558 return _maybe_promote_cached( 

559 dtype, fill_value, type(fill_value) # type: ignore[arg-type] 

560 ) 

561 except TypeError: 

562 # if fill_value is not hashable (required for caching) 

563 return _maybe_promote(dtype, fill_value) 

564 

565 

566@functools.lru_cache(maxsize=128) 

567def _maybe_promote_cached(dtype, fill_value, fill_value_type): 

568 # The cached version of _maybe_promote below 

569 # This also use fill_value_type as (unused) argument to use this in the 

570 # cache lookup -> to differentiate 1 and True 

571 return _maybe_promote(dtype, fill_value) 

572 

573 

574def _maybe_promote(dtype: np.dtype, fill_value=np.nan): 

575 # The actual implementation of the function, use `maybe_promote` above for 

576 # a cached version. 

577 if not is_scalar(fill_value): 

578 # with object dtype there is nothing to promote, and the user can 

579 # pass pretty much any weird fill_value they like 

580 if not is_object_dtype(dtype): 

581 # with object dtype there is nothing to promote, and the user can 

582 # pass pretty much any weird fill_value they like 

583 raise ValueError("fill_value must be a scalar") 

584 dtype = _dtype_obj 

585 return dtype, fill_value 

586 

587 kinds = ["i", "u", "f", "c", "m", "M"] 

588 if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in kinds: 

589 dtype = ensure_dtype_can_hold_na(dtype) 

590 fv = na_value_for_dtype(dtype) 

591 return dtype, fv 

592 

593 elif isinstance(dtype, CategoricalDtype): 

594 if fill_value in dtype.categories or isna(fill_value): 

595 return dtype, fill_value 

596 else: 

597 return object, ensure_object(fill_value) 

598 

599 elif isna(fill_value): 

600 dtype = _dtype_obj 

601 if fill_value is None: 

602 # but we retain e.g. pd.NA 

603 fill_value = np.nan 

604 return dtype, fill_value 

605 

606 # returns tuple of (dtype, fill_value) 

607 if issubclass(dtype.type, np.datetime64): 

608 inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True) 

609 if inferred == dtype: 

610 return dtype, fv 

611 

612 # TODO(2.0): once this deprecation is enforced, this whole case 

613 # becomes equivalent to: 

614 # dta = DatetimeArray._from_sequence([], dtype="M8[ns]") 

615 # try: 

616 # fv = dta._validate_setitem_value(fill_value) 

617 # return dta.dtype, fv 

618 # except (ValueError, TypeError): 

619 # return _dtype_obj, fill_value 

620 if isinstance(fill_value, date) and not isinstance(fill_value, datetime): 

621 # deprecate casting of date object to match infer_dtype_from_scalar 

622 # and DatetimeArray._validate_setitem_value 

623 try: 

624 fv = Timestamp(fill_value).to_datetime64() 

625 except OutOfBoundsDatetime: 

626 pass 

627 else: 

628 warnings.warn( 

629 "Using a `date` object for fill_value with `datetime64[ns]` " 

630 "dtype is deprecated. In a future version, this will be cast " 

631 "to object dtype. Pass `fill_value=Timestamp(date_obj)` instead.", 

632 FutureWarning, 

633 stacklevel=find_stack_level(), 

634 ) 

635 return dtype, fv 

636 elif isinstance(fill_value, str): 

637 try: 

638 # explicitly wrap in str to convert np.str_ 

639 fv = Timestamp(str(fill_value)) 

640 except (ValueError, TypeError): 

641 pass 

642 else: 

643 if isna(fv) or fv.tz is None: 

644 return dtype, fv.asm8 

645 

646 return np.dtype("object"), fill_value 

647 

648 elif issubclass(dtype.type, np.timedelta64): 

649 inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True) 

650 if inferred == dtype: 

651 return dtype, fv 

652 

653 return np.dtype("object"), fill_value 

654 

655 elif is_float(fill_value): 

656 if issubclass(dtype.type, np.bool_): 

657 dtype = np.dtype(np.object_) 

658 

659 elif issubclass(dtype.type, np.integer): 

660 dtype = np.dtype(np.float64) 

661 

662 elif dtype.kind == "f": 

663 mst = np.min_scalar_type(fill_value) 

664 if mst > dtype: 

665 # e.g. mst is np.float64 and dtype is np.float32 

666 dtype = mst 

667 

668 elif dtype.kind == "c": 

669 mst = np.min_scalar_type(fill_value) 

670 dtype = np.promote_types(dtype, mst) 

671 

672 elif is_bool(fill_value): 

673 if not issubclass(dtype.type, np.bool_): 

674 dtype = np.dtype(np.object_) 

675 

676 elif is_integer(fill_value): 

677 if issubclass(dtype.type, np.bool_): 

678 dtype = np.dtype(np.object_) 

679 

680 elif issubclass(dtype.type, np.integer): 

681 if not np.can_cast(fill_value, dtype): 

682 # upcast to prevent overflow 

683 mst = np.min_scalar_type(fill_value) 

684 dtype = np.promote_types(dtype, mst) 

685 if dtype.kind == "f": 

686 # Case where we disagree with numpy 

687 dtype = np.dtype(np.object_) 

688 

689 elif is_complex(fill_value): 

690 if issubclass(dtype.type, np.bool_): 

691 dtype = np.dtype(np.object_) 

692 

693 elif issubclass(dtype.type, (np.integer, np.floating)): 

694 mst = np.min_scalar_type(fill_value) 

695 dtype = np.promote_types(dtype, mst) 

696 

697 elif dtype.kind == "c": 

698 mst = np.min_scalar_type(fill_value) 

699 if mst > dtype: 

700 # e.g. mst is np.complex128 and dtype is np.complex64 

701 dtype = mst 

702 

703 else: 

704 dtype = np.dtype(np.object_) 

705 

706 # in case we have a string that looked like a number 

707 if issubclass(dtype.type, (bytes, str)): 

708 dtype = np.dtype(np.object_) 

709 

710 fill_value = _ensure_dtype_type(fill_value, dtype) 

711 return dtype, fill_value 

712 

713 

714def _ensure_dtype_type(value, dtype: np.dtype): 

715 """ 

716 Ensure that the given value is an instance of the given dtype. 

717 

718 e.g. if out dtype is np.complex64_, we should have an instance of that 

719 as opposed to a python complex object. 

720 

721 Parameters 

722 ---------- 

723 value : object 

724 dtype : np.dtype 

725 

726 Returns 

727 ------- 

728 object 

729 """ 

730 # Start with exceptions in which we do _not_ cast to numpy types 

731 

732 if dtype == _dtype_obj: 

733 return value 

734 

735 # Note: before we get here we have already excluded isna(value) 

736 return dtype.type(value) 

737 

738 

739def infer_dtype_from(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: 

740 """ 

741 Interpret the dtype from a scalar or array. 

742 

743 Parameters 

744 ---------- 

745 val : object 

746 pandas_dtype : bool, default False 

747 whether to infer dtype including pandas extension types. 

748 If False, scalar/array belongs to pandas extension types is inferred as 

749 object 

750 """ 

751 if not is_list_like(val): 

752 return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) 

753 return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) 

754 

755 

756def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: 

757 """ 

758 Interpret the dtype from a scalar. 

759 

760 Parameters 

761 ---------- 

762 pandas_dtype : bool, default False 

763 whether to infer dtype including pandas extension types. 

764 If False, scalar belongs to pandas extension types is inferred as 

765 object 

766 """ 

767 dtype: DtypeObj = _dtype_obj 

768 

769 # a 1-element ndarray 

770 if isinstance(val, np.ndarray): 

771 if val.ndim != 0: 

772 msg = "invalid ndarray passed to infer_dtype_from_scalar" 

773 raise ValueError(msg) 

774 

775 dtype = val.dtype 

776 val = lib.item_from_zerodim(val) 

777 

778 elif isinstance(val, str): 

779 

780 # If we create an empty array using a string to infer 

781 # the dtype, NumPy will only allocate one character per entry 

782 # so this is kind of bad. Alternately we could use np.repeat 

783 # instead of np.empty (but then you still don't want things 

784 # coming out as np.str_! 

785 

786 dtype = _dtype_obj 

787 

788 elif isinstance(val, (np.datetime64, datetime)): 

789 try: 

790 val = Timestamp(val) 

791 except OutOfBoundsDatetime: 

792 return _dtype_obj, val 

793 

794 # error: Non-overlapping identity check (left operand type: "Timestamp", 

795 # right operand type: "NaTType") 

796 if val is NaT or val.tz is None: # type: ignore[comparison-overlap] 

797 dtype = np.dtype("M8[ns]") 

798 val = val.to_datetime64() 

799 else: 

800 if pandas_dtype: 

801 dtype = DatetimeTZDtype(unit="ns", tz=val.tz) 

802 else: 

803 # return datetimetz as object 

804 return _dtype_obj, val 

805 

806 elif isinstance(val, (np.timedelta64, timedelta)): 

807 try: 

808 val = Timedelta(val) 

809 except (OutOfBoundsTimedelta, OverflowError): 

810 dtype = _dtype_obj 

811 else: 

812 dtype = np.dtype("m8[ns]") 

813 val = np.timedelta64(val.value, "ns") 

814 

815 elif is_bool(val): 

816 dtype = np.dtype(np.bool_) 

817 

818 elif is_integer(val): 

819 if isinstance(val, np.integer): 

820 dtype = np.dtype(type(val)) 

821 else: 

822 dtype = np.dtype(np.int64) 

823 

824 try: 

825 np.array(val, dtype=dtype) 

826 except OverflowError: 

827 dtype = np.array(val).dtype 

828 

829 elif is_float(val): 

830 if isinstance(val, np.floating): 

831 dtype = np.dtype(type(val)) 

832 else: 

833 dtype = np.dtype(np.float64) 

834 

835 elif is_complex(val): 

836 dtype = np.dtype(np.complex_) 

837 

838 elif pandas_dtype: 

839 if lib.is_period(val): 

840 dtype = PeriodDtype(freq=val.freq) 

841 elif lib.is_interval(val): 

842 subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] 

843 dtype = IntervalDtype(subtype=subtype, closed=val.closed) 

844 

845 return dtype, val 

846 

847 

848def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]: 

849 """ 

850 Convert datetimelike-keyed dicts to a Timestamp-keyed dict. 

851 

852 Parameters 

853 ---------- 

854 d: dict-like object 

855 

856 Returns 

857 ------- 

858 dict 

859 """ 

860 return {maybe_box_datetimelike(key): value for key, value in d.items()} 

861 

862 

863def infer_dtype_from_array( 

864 arr, pandas_dtype: bool = False 

865) -> tuple[DtypeObj, ArrayLike]: 

866 """ 

867 Infer the dtype from an array. 

868 

869 Parameters 

870 ---------- 

871 arr : array 

872 pandas_dtype : bool, default False 

873 whether to infer dtype including pandas extension types. 

874 If False, array belongs to pandas extension types 

875 is inferred as object 

876 

877 Returns 

878 ------- 

879 tuple (numpy-compat/pandas-compat dtype, array) 

880 

881 Notes 

882 ----- 

883 if pandas_dtype=False. these infer to numpy dtypes 

884 exactly with the exception that mixed / object dtypes 

885 are not coerced by stringifying or conversion 

886 

887 if pandas_dtype=True. datetime64tz-aware/categorical 

888 types will retain there character. 

889 

890 Examples 

891 -------- 

892 >>> np.asarray([1, '1']) 

893 array(['1', '1'], dtype='<U21') 

894 

895 >>> infer_dtype_from_array([1, '1']) 

896 (dtype('O'), [1, '1']) 

897 """ 

898 if isinstance(arr, np.ndarray): 

899 return arr.dtype, arr 

900 

901 if not is_list_like(arr): 

902 raise TypeError("'arr' must be list-like") 

903 

904 if pandas_dtype and is_extension_array_dtype(arr): 

905 return arr.dtype, arr 

906 

907 elif isinstance(arr, ABCSeries): 

908 return arr.dtype, np.asarray(arr) 

909 

910 # don't force numpy coerce with nan's 

911 inferred = lib.infer_dtype(arr, skipna=False) 

912 if inferred in ["string", "bytes", "mixed", "mixed-integer"]: 

913 return (np.dtype(np.object_), arr) 

914 

915 arr = np.asarray(arr) 

916 return arr.dtype, arr 

917 

918 

919def _maybe_infer_dtype_type(element): 

920 """ 

921 Try to infer an object's dtype, for use in arithmetic ops. 

922 

923 Uses `element.dtype` if that's available. 

924 Objects implementing the iterator protocol are cast to a NumPy array, 

925 and from there the array's type is used. 

926 

927 Parameters 

928 ---------- 

929 element : object 

930 Possibly has a `.dtype` attribute, and possibly the iterator 

931 protocol. 

932 

933 Returns 

934 ------- 

935 tipo : type 

936 

937 Examples 

938 -------- 

939 >>> from collections import namedtuple 

940 >>> Foo = namedtuple("Foo", "dtype") 

941 >>> _maybe_infer_dtype_type(Foo(np.dtype("i8"))) 

942 dtype('int64') 

943 """ 

944 tipo = None 

945 if hasattr(element, "dtype"): 

946 tipo = element.dtype 

947 elif is_list_like(element): 

948 element = np.asarray(element) 

949 tipo = element.dtype 

950 return tipo 

951 

952 

953def maybe_upcast( 

954 values: NumpyArrayT, 

955 fill_value: Scalar = np.nan, 

956 copy: bool = False, 

957) -> tuple[NumpyArrayT, Scalar]: 

958 """ 

959 Provide explicit type promotion and coercion. 

960 

961 Parameters 

962 ---------- 

963 values : np.ndarray 

964 The array that we may want to upcast. 

965 fill_value : what we want to fill with 

966 copy : bool, default True 

967 If True always make a copy even if no upcast is required. 

968 

969 Returns 

970 ------- 

971 values: np.ndarray 

972 the original array, possibly upcast 

973 fill_value: 

974 the fill value, possibly upcast 

975 """ 

976 new_dtype, fill_value = maybe_promote(values.dtype, fill_value) 

977 # We get a copy in all cases _except_ (values.dtype == new_dtype and not copy) 

978 upcast_values = values.astype(new_dtype, copy=copy) 

979 

980 # error: Incompatible return value type (got "Tuple[ndarray[Any, dtype[Any]], 

981 # Union[Union[str, int, float, bool] Union[Period, Timestamp, Timedelta, Any]]]", 

982 # expected "Tuple[NumpyArrayT, Union[Union[str, int, float, bool], Union[Period, 

983 # Timestamp, Timedelta, Any]]]") 

984 return upcast_values, fill_value # type: ignore[return-value] 

985 

986 

987def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None: 

988 """ 

989 Change string like dtypes to object for 

990 ``DataFrame.select_dtypes()``. 

991 """ 

992 # error: Argument 1 to <set> has incompatible type "Type[generic]"; expected 

993 # "Union[dtype[Any], ExtensionDtype, None]" 

994 # error: Argument 2 to <set> has incompatible type "Type[generic]"; expected 

995 # "Union[dtype[Any], ExtensionDtype, None]" 

996 non_string_dtypes = dtype_set - { 

997 np.dtype("S").type, # type: ignore[arg-type] 

998 np.dtype("<U").type, # type: ignore[arg-type] 

999 } 

1000 if non_string_dtypes != dtype_set: 

1001 raise TypeError("string dtypes are not allowed, use 'object' instead") 

1002 

1003 

1004def coerce_indexer_dtype(indexer, categories) -> np.ndarray: 

1005 """coerce the indexer input array to the smallest dtype possible""" 

1006 length = len(categories) 

1007 if length < _int8_max: 

1008 return ensure_int8(indexer) 

1009 elif length < _int16_max: 

1010 return ensure_int16(indexer) 

1011 elif length < _int32_max: 

1012 return ensure_int32(indexer) 

1013 return ensure_int64(indexer) 

1014 

1015 

1016def soft_convert_objects( 

1017 values: np.ndarray, 

1018 datetime: bool = True, 

1019 numeric: bool = True, 

1020 timedelta: bool = True, 

1021 period: bool = True, 

1022 copy: bool = True, 

1023) -> ArrayLike: 

1024 """ 

1025 Try to coerce datetime, timedelta, and numeric object-dtype columns 

1026 to inferred dtype. 

1027 

1028 Parameters 

1029 ---------- 

1030 values : np.ndarray[object] 

1031 datetime : bool, default True 

1032 numeric: bool, default True 

1033 timedelta : bool, default True 

1034 period : bool, default True 

1035 copy : bool, default True 

1036 

1037 Returns 

1038 ------- 

1039 np.ndarray or ExtensionArray 

1040 """ 

1041 validate_bool_kwarg(datetime, "datetime") 

1042 validate_bool_kwarg(numeric, "numeric") 

1043 validate_bool_kwarg(timedelta, "timedelta") 

1044 validate_bool_kwarg(copy, "copy") 

1045 

1046 conversion_count = sum((datetime, numeric, timedelta)) 

1047 if conversion_count == 0: 

1048 raise ValueError("At least one of datetime, numeric or timedelta must be True.") 

1049 

1050 # Soft conversions 

1051 if datetime or timedelta: 

1052 # GH 20380, when datetime is beyond year 2262, hence outside 

1053 # bound of nanosecond-resolution 64-bit integers. 

1054 try: 

1055 converted = lib.maybe_convert_objects( 

1056 values, 

1057 convert_datetime=datetime, 

1058 convert_timedelta=timedelta, 

1059 convert_period=period, 

1060 ) 

1061 except (OutOfBoundsDatetime, ValueError): 

1062 return values 

1063 if converted is not values: 

1064 return converted 

1065 

1066 if numeric and is_object_dtype(values.dtype): 

1067 converted, _ = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) 

1068 

1069 # If all NaNs, then do not-alter 

1070 values = converted if not isna(converted).all() else values 

1071 values = values.copy() if copy else values 

1072 

1073 return values 

1074 

1075 

1076def convert_dtypes( 

1077 input_array: ArrayLike, 

1078 convert_string: bool = True, 

1079 convert_integer: bool = True, 

1080 convert_boolean: bool = True, 

1081 convert_floating: bool = True, 

1082) -> DtypeObj: 

1083 """ 

1084 Convert objects to best possible type, and optionally, 

1085 to types supporting ``pd.NA``. 

1086 

1087 Parameters 

1088 ---------- 

1089 input_array : ExtensionArray or np.ndarray 

1090 convert_string : bool, default True 

1091 Whether object dtypes should be converted to ``StringDtype()``. 

1092 convert_integer : bool, default True 

1093 Whether, if possible, conversion can be done to integer extension types. 

1094 convert_boolean : bool, defaults True 

1095 Whether object dtypes should be converted to ``BooleanDtypes()``. 

1096 convert_floating : bool, defaults True 

1097 Whether, if possible, conversion can be done to floating extension types. 

1098 If `convert_integer` is also True, preference will be give to integer 

1099 dtypes if the floats can be faithfully casted to integers. 

1100 

1101 Returns 

1102 ------- 

1103 np.dtype, or ExtensionDtype 

1104 """ 

1105 inferred_dtype: str | DtypeObj 

1106 

1107 if ( 

1108 convert_string or convert_integer or convert_boolean or convert_floating 

1109 ) and isinstance(input_array, np.ndarray): 

1110 

1111 if is_object_dtype(input_array.dtype): 

1112 inferred_dtype = lib.infer_dtype(input_array) 

1113 else: 

1114 inferred_dtype = input_array.dtype 

1115 

1116 if is_string_dtype(inferred_dtype): 

1117 if not convert_string or inferred_dtype == "bytes": 

1118 return input_array.dtype 

1119 else: 

1120 return pandas_dtype("string") 

1121 

1122 if convert_integer: 

1123 target_int_dtype = pandas_dtype("Int64") 

1124 

1125 if is_integer_dtype(input_array.dtype): 

1126 from pandas.core.arrays.integer import INT_STR_TO_DTYPE 

1127 

1128 inferred_dtype = INT_STR_TO_DTYPE.get( 

1129 input_array.dtype.name, target_int_dtype 

1130 ) 

1131 elif is_numeric_dtype(input_array.dtype): 

1132 # TODO: de-dup with maybe_cast_to_integer_array? 

1133 arr = input_array[notna(input_array)] 

1134 if (arr.astype(int) == arr).all(): 

1135 inferred_dtype = target_int_dtype 

1136 else: 

1137 inferred_dtype = input_array.dtype 

1138 

1139 if convert_floating: 

1140 if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( 

1141 input_array.dtype 

1142 ): 

1143 from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE 

1144 

1145 inferred_float_dtype: DtypeObj = FLOAT_STR_TO_DTYPE.get( 

1146 input_array.dtype.name, pandas_dtype("Float64") 

1147 ) 

1148 # if we could also convert to integer, check if all floats 

1149 # are actually integers 

1150 if convert_integer: 

1151 # TODO: de-dup with maybe_cast_to_integer_array? 

1152 arr = input_array[notna(input_array)] 

1153 if (arr.astype(int) == arr).all(): 

1154 inferred_dtype = pandas_dtype("Int64") 

1155 else: 

1156 inferred_dtype = inferred_float_dtype 

1157 else: 

1158 inferred_dtype = inferred_float_dtype 

1159 

1160 if convert_boolean: 

1161 if is_bool_dtype(input_array.dtype): 

1162 inferred_dtype = pandas_dtype("boolean") 

1163 elif isinstance(inferred_dtype, str) and inferred_dtype == "boolean": 

1164 inferred_dtype = pandas_dtype("boolean") 

1165 

1166 if isinstance(inferred_dtype, str): 

1167 # If we couldn't do anything else, then we retain the dtype 

1168 inferred_dtype = input_array.dtype 

1169 

1170 else: 

1171 return input_array.dtype 

1172 

1173 # error: Incompatible return value type (got "Union[str, Union[dtype[Any], 

1174 # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]") 

1175 return inferred_dtype # type: ignore[return-value] 

1176 

1177 

1178def maybe_infer_to_datetimelike( 

1179 value: np.ndarray, 

1180) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray: 

1181 """ 

1182 we might have a array (or single object) that is datetime like, 

1183 and no dtype is passed don't change the value unless we find a 

1184 datetime/timedelta set 

1185 

1186 this is pretty strict in that a datetime/timedelta is REQUIRED 

1187 in addition to possible nulls/string likes 

1188 

1189 Parameters 

1190 ---------- 

1191 value : np.ndarray[object] 

1192 

1193 Returns 

1194 ------- 

1195 np.ndarray, DatetimeArray, TimedeltaArray, PeriodArray, or IntervalArray 

1196 

1197 """ 

1198 if not isinstance(value, np.ndarray) or value.dtype != object: 

1199 # Caller is responsible for passing only ndarray[object] 

1200 raise TypeError(type(value)) # pragma: no cover 

1201 

1202 v = np.array(value, copy=False) 

1203 

1204 shape = v.shape 

1205 if v.ndim != 1: 

1206 v = v.ravel() 

1207 

1208 if not len(v): 

1209 return value 

1210 

1211 def try_datetime(v: np.ndarray) -> ArrayLike: 

1212 # Coerce to datetime64, datetime64tz, or in corner cases 

1213 # object[datetimes] 

1214 from pandas.core.arrays.datetimes import sequence_to_datetimes 

1215 

1216 try: 

1217 # GH#19671 we pass require_iso8601 to be relatively strict 

1218 # when parsing strings. 

1219 dta = sequence_to_datetimes(v, require_iso8601=True) 

1220 except (ValueError, TypeError): 

1221 # e.g. <class 'numpy.timedelta64'> is not convertible to datetime 

1222 return v.reshape(shape) 

1223 else: 

1224 # GH#19761 we may have mixed timezones, in which cast 'dta' is 

1225 # an ndarray[object]. Only 1 test 

1226 # relies on this behavior, see GH#40111 

1227 return dta.reshape(shape) 

1228 

1229 def try_timedelta(v: np.ndarray) -> np.ndarray: 

1230 # safe coerce to timedelta64 

1231 

1232 # will try first with a string & object conversion 

1233 try: 

1234 # bc we know v.dtype == object, this is equivalent to 

1235 # `np.asarray(to_timedelta(v))`, but using a lower-level API that 

1236 # does not require a circular import. 

1237 td_values = array_to_timedelta64(v).view("m8[ns]") 

1238 except (ValueError, OverflowError): 

1239 return v.reshape(shape) 

1240 else: 

1241 return td_values.reshape(shape) 

1242 

1243 inferred_type, seen_str = lib.infer_datetimelike_array(ensure_object(v)) 

1244 if inferred_type in ["period", "interval"]: 

1245 # Incompatible return value type (got "Union[ExtensionArray, ndarray]", 

1246 # expected "Union[ndarray, DatetimeArray, TimedeltaArray, PeriodArray, 

1247 # IntervalArray]") 

1248 return lib.maybe_convert_objects( # type: ignore[return-value] 

1249 v, convert_period=True, convert_interval=True 

1250 ) 

1251 

1252 if inferred_type == "datetime": 

1253 # error: Incompatible types in assignment (expression has type "ExtensionArray", 

1254 # variable has type "Union[ndarray, List[Any]]") 

1255 value = try_datetime(v) # type: ignore[assignment] 

1256 elif inferred_type == "timedelta": 

1257 value = try_timedelta(v) 

1258 elif inferred_type == "nat": 

1259 

1260 # if all NaT, return as datetime 

1261 if isna(v).all(): 

1262 # error: Incompatible types in assignment (expression has type 

1263 # "ExtensionArray", variable has type "Union[ndarray, List[Any]]") 

1264 value = try_datetime(v) # type: ignore[assignment] 

1265 else: 

1266 

1267 # We have at least a NaT and a string 

1268 # try timedelta first to avoid spurious datetime conversions 

1269 # e.g. '00:00:01' is a timedelta but technically is also a datetime 

1270 value = try_timedelta(v) 

1271 if lib.infer_dtype(value, skipna=False) in ["mixed"]: 

1272 # cannot skip missing values, as NaT implies that the string 

1273 # is actually a datetime 

1274 

1275 # error: Incompatible types in assignment (expression has type 

1276 # "ExtensionArray", variable has type "Union[ndarray, List[Any]]") 

1277 value = try_datetime(v) # type: ignore[assignment] 

1278 

1279 if value.dtype.kind in ["m", "M"] and seen_str: 

1280 # TODO(2.0): enforcing this deprecation should close GH#40111 

1281 warnings.warn( 

1282 f"Inferring {value.dtype} from data containing strings is deprecated " 

1283 "and will be removed in a future version. To retain the old behavior " 

1284 f"explicitly pass Series(data, dtype={value.dtype})", 

1285 FutureWarning, 

1286 stacklevel=find_stack_level(), 

1287 ) 

1288 return value 

1289 

1290 

1291def maybe_cast_to_datetime( 

1292 value: ExtensionArray | np.ndarray | list, dtype: DtypeObj | None 

1293) -> ExtensionArray | np.ndarray: 

1294 """ 

1295 try to cast the array/value to a datetimelike dtype, converting float 

1296 nan to iNaT 

1297 

1298 We allow a list *only* when dtype is not None. 

1299 """ 

1300 from pandas.core.arrays.datetimes import sequence_to_datetimes 

1301 from pandas.core.arrays.timedeltas import TimedeltaArray 

1302 

1303 if not is_list_like(value): 

1304 raise TypeError("value must be listlike") 

1305 

1306 if is_timedelta64_dtype(dtype): 

1307 # TODO: _from_sequence would raise ValueError in cases where 

1308 # _ensure_nanosecond_dtype raises TypeError 

1309 dtype = cast(np.dtype, dtype) 

1310 dtype = _ensure_nanosecond_dtype(dtype) 

1311 res = TimedeltaArray._from_sequence(value, dtype=dtype) 

1312 return res 

1313 

1314 if dtype is not None: 

1315 is_datetime64 = is_datetime64_dtype(dtype) 

1316 is_datetime64tz = is_datetime64tz_dtype(dtype) 

1317 

1318 vdtype = getattr(value, "dtype", None) 

1319 

1320 if is_datetime64 or is_datetime64tz: 

1321 dtype = _ensure_nanosecond_dtype(dtype) 

1322 

1323 value = np.array(value, copy=False) 

1324 

1325 # we have an array of datetime or timedeltas & nulls 

1326 if value.size or not is_dtype_equal(value.dtype, dtype): 

1327 _disallow_mismatched_datetimelike(value, dtype) 

1328 

1329 try: 

1330 if is_datetime64: 

1331 dta = sequence_to_datetimes(value) 

1332 # GH 25843: Remove tz information since the dtype 

1333 # didn't specify one 

1334 

1335 if dta.tz is not None: 

1336 warnings.warn( 

1337 "Data is timezone-aware. Converting " 

1338 "timezone-aware data to timezone-naive by " 

1339 "passing dtype='datetime64[ns]' to " 

1340 "DataFrame or Series is deprecated and will " 

1341 "raise in a future version. Use " 

1342 "`pd.Series(values).dt.tz_localize(None)` " 

1343 "instead.", 

1344 FutureWarning, 

1345 stacklevel=find_stack_level(), 

1346 ) 

1347 # equiv: dta.view(dtype) 

1348 # Note: NOT equivalent to dta.astype(dtype) 

1349 dta = dta.tz_localize(None) 

1350 

1351 value = dta 

1352 elif is_datetime64tz: 

1353 dtype = cast(DatetimeTZDtype, dtype) 

1354 # The string check can be removed once issue #13712 

1355 # is solved. String data that is passed with a 

1356 # datetime64tz is assumed to be naive which should 

1357 # be localized to the timezone. 

1358 is_dt_string = is_string_dtype(value.dtype) 

1359 dta = sequence_to_datetimes(value) 

1360 if dta.tz is not None: 

1361 value = dta.astype(dtype, copy=False) 

1362 elif is_dt_string: 

1363 # Strings here are naive, so directly localize 

1364 # equiv: dta.astype(dtype) # though deprecated 

1365 

1366 value = dta.tz_localize(dtype.tz) 

1367 else: 

1368 # Numeric values are UTC at this point, 

1369 # so localize and convert 

1370 # equiv: Series(dta).astype(dtype) # though deprecated 

1371 if getattr(vdtype, "kind", None) == "M": 

1372 # GH#24559, GH#33401 deprecate behavior inconsistent 

1373 # with DatetimeArray/DatetimeIndex 

1374 warnings.warn( 

1375 "In a future version, constructing a Series " 

1376 "from datetime64[ns] data and a " 

1377 "DatetimeTZDtype will interpret the data " 

1378 "as wall-times instead of " 

1379 "UTC times, matching the behavior of " 

1380 "DatetimeIndex. To treat the data as UTC " 

1381 "times, use pd.Series(data).dt" 

1382 ".tz_localize('UTC').tz_convert(dtype.tz) " 

1383 "or pd.Series(data.view('int64'), dtype=dtype)", 

1384 FutureWarning, 

1385 stacklevel=find_stack_level(), 

1386 ) 

1387 

1388 value = dta.tz_localize("UTC").tz_convert(dtype.tz) 

1389 except OutOfBoundsDatetime: 

1390 raise 

1391 except ParserError: 

1392 # Note: this is dateutil's ParserError, not ours. 

1393 pass 

1394 

1395 elif getattr(vdtype, "kind", None) in ["m", "M"]: 

1396 # we are already datetimelike and want to coerce to non-datetimelike; 

1397 # astype_nansafe will raise for anything other than object, then upcast. 

1398 # see test_datetimelike_values_with_object_dtype 

1399 # error: Argument 2 to "astype_nansafe" has incompatible type 

1400 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" 

1401 return astype_nansafe(value, dtype) # type: ignore[arg-type] 

1402 

1403 elif isinstance(value, np.ndarray): 

1404 if value.dtype.kind in ["M", "m"]: 

1405 # catch a datetime/timedelta that is not of ns variety 

1406 # and no coercion specified 

1407 value = sanitize_to_nanoseconds(value) 

1408 

1409 elif value.dtype == _dtype_obj: 

1410 value = maybe_infer_to_datetimelike(value) 

1411 

1412 elif isinstance(value, list): 

1413 # we only get here with dtype=None, which we do not allow 

1414 raise ValueError( 

1415 "maybe_cast_to_datetime allows a list *only* if dtype is not None" 

1416 ) 

1417 

1418 # at this point we have converted or raised in all cases where we had a list 

1419 return cast(ArrayLike, value) 

1420 

1421 

1422def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarray: 

1423 """ 

1424 Safely convert non-nanosecond datetime64 or timedelta64 values to nanosecond. 

1425 """ 

1426 dtype = values.dtype 

1427 if dtype.kind == "M" and dtype != DT64NS_DTYPE: 

1428 values = astype_overflowsafe(values, dtype=DT64NS_DTYPE) 

1429 

1430 elif dtype.kind == "m" and dtype != TD64NS_DTYPE: 

1431 values = astype_overflowsafe(values, dtype=TD64NS_DTYPE) 

1432 

1433 elif copy: 

1434 values = values.copy() 

1435 

1436 return values 

1437 

1438 

1439def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: 

1440 """ 

1441 Convert dtypes with granularity less than nanosecond to nanosecond 

1442 

1443 >>> _ensure_nanosecond_dtype(np.dtype("M8[s]")) 

1444 dtype('<M8[ns]') 

1445 

1446 >>> _ensure_nanosecond_dtype(np.dtype("m8[ps]")) 

1447 Traceback (most recent call last): 

1448 ... 

1449 TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]] 

1450 """ 

1451 msg = ( 

1452 f"The '{dtype.name}' dtype has no unit. " 

1453 f"Please pass in '{dtype.name}[ns]' instead." 

1454 ) 

1455 

1456 # unpack e.g. SparseDtype 

1457 dtype = getattr(dtype, "subtype", dtype) 

1458 

1459 if not isinstance(dtype, np.dtype): 

1460 # i.e. datetime64tz 

1461 pass 

1462 

1463 elif dtype.kind == "M" and dtype != DT64NS_DTYPE: 

1464 # pandas supports dtype whose granularity is less than [ns] 

1465 # e.g., [ps], [fs], [as] 

1466 if dtype <= np.dtype("M8[ns]"): 

1467 if dtype.name == "datetime64": 

1468 raise ValueError(msg) 

1469 dtype = DT64NS_DTYPE 

1470 else: 

1471 raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]") 

1472 

1473 elif dtype.kind == "m" and dtype != TD64NS_DTYPE: 

1474 # pandas supports dtype whose granularity is less than [ns] 

1475 # e.g., [ps], [fs], [as] 

1476 if dtype <= np.dtype("m8[ns]"): 

1477 if dtype.name == "timedelta64": 

1478 raise ValueError(msg) 

1479 dtype = TD64NS_DTYPE 

1480 else: 

1481 raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") 

1482 return dtype 

1483 

1484 

1485# TODO: other value-dependent functions to standardize here include 

1486# dtypes.concat.cast_to_common_type and Index._find_common_type_compat 

1487def find_result_type(left: ArrayLike, right: Any) -> DtypeObj: 

1488 """ 

1489 Find the type/dtype for a the result of an operation between these objects. 

1490 

1491 This is similar to find_common_type, but looks at the objects instead 

1492 of just their dtypes. This can be useful in particular when one of the 

1493 objects does not have a `dtype`. 

1494 

1495 Parameters 

1496 ---------- 

1497 left : np.ndarray or ExtensionArray 

1498 right : Any 

1499 

1500 Returns 

1501 ------- 

1502 np.dtype or ExtensionDtype 

1503 

1504 See also 

1505 -------- 

1506 find_common_type 

1507 numpy.result_type 

1508 """ 

1509 new_dtype: DtypeObj 

1510 

1511 if ( 

1512 isinstance(left, np.ndarray) 

1513 and left.dtype.kind in ["i", "u", "c"] 

1514 and (lib.is_integer(right) or lib.is_float(right)) 

1515 ): 

1516 # e.g. with int8 dtype and right=512, we want to end up with 

1517 # np.int16, whereas infer_dtype_from(512) gives np.int64, 

1518 # which will make us upcast too far. 

1519 if lib.is_float(right) and right.is_integer() and left.dtype.kind != "f": 

1520 right = int(right) 

1521 

1522 new_dtype = np.result_type(left, right) 

1523 

1524 elif is_valid_na_for_dtype(right, left.dtype): 

1525 # e.g. IntervalDtype[int] and None/np.nan 

1526 new_dtype = ensure_dtype_can_hold_na(left.dtype) 

1527 

1528 else: 

1529 dtype, _ = infer_dtype_from(right, pandas_dtype=True) 

1530 

1531 new_dtype = find_common_type([left.dtype, dtype]) 

1532 

1533 return new_dtype 

1534 

1535 

1536def common_dtype_categorical_compat( 

1537 objs: list[Index | ArrayLike], dtype: DtypeObj 

1538) -> DtypeObj: 

1539 """ 

1540 Update the result of find_common_type to account for NAs in a Categorical. 

1541 

1542 Parameters 

1543 ---------- 

1544 objs : list[np.ndarray | ExtensionArray | Index] 

1545 dtype : np.dtype or ExtensionDtype 

1546 

1547 Returns 

1548 ------- 

1549 np.dtype or ExtensionDtype 

1550 """ 

1551 # GH#38240 

1552 

1553 # TODO: more generally, could do `not can_hold_na(dtype)` 

1554 if isinstance(dtype, np.dtype) and dtype.kind in ["i", "u"]: 

1555 

1556 for obj in objs: 

1557 # We don't want to accientally allow e.g. "categorical" str here 

1558 obj_dtype = getattr(obj, "dtype", None) 

1559 if isinstance(obj_dtype, CategoricalDtype): 

1560 if isinstance(obj, ABCIndex): 

1561 # This check may already be cached 

1562 hasnas = obj.hasnans 

1563 else: 

1564 # Categorical 

1565 hasnas = cast("Categorical", obj)._hasna 

1566 

1567 if hasnas: 

1568 # see test_union_int_categorical_with_nan 

1569 dtype = np.dtype(np.float64) 

1570 break 

1571 return dtype 

1572 

1573 

1574@overload 

1575def find_common_type(types: list[np.dtype]) -> np.dtype: 

1576 ... 

1577 

1578 

1579@overload 

1580def find_common_type(types: list[ExtensionDtype]) -> DtypeObj: 

1581 ... 

1582 

1583 

1584@overload 

1585def find_common_type(types: list[DtypeObj]) -> DtypeObj: 

1586 ... 

1587 

1588 

1589def find_common_type(types): 

1590 """ 

1591 Find a common data type among the given dtypes. 

1592 

1593 Parameters 

1594 ---------- 

1595 types : list of dtypes 

1596 

1597 Returns 

1598 ------- 

1599 pandas extension or numpy dtype 

1600 

1601 See Also 

1602 -------- 

1603 numpy.find_common_type 

1604 

1605 """ 

1606 if not types: 

1607 raise ValueError("no types given") 

1608 

1609 first = types[0] 

1610 

1611 # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) 

1612 # => object 

1613 if lib.dtypes_all_equal(list(types)): 

1614 return first 

1615 

1616 # get unique types (dict.fromkeys is used as order-preserving set()) 

1617 types = list(dict.fromkeys(types).keys()) 

1618 

1619 if any(isinstance(t, ExtensionDtype) for t in types): 

1620 for t in types: 

1621 if isinstance(t, ExtensionDtype): 

1622 res = t._get_common_dtype(types) 

1623 if res is not None: 

1624 return res 

1625 return np.dtype("object") 

1626 

1627 # take lowest unit 

1628 if all(is_datetime64_dtype(t) for t in types): 

1629 return np.dtype("datetime64[ns]") 

1630 if all(is_timedelta64_dtype(t) for t in types): 

1631 return np.dtype("timedelta64[ns]") 

1632 

1633 # don't mix bool / int or float or complex 

1634 # this is different from numpy, which casts bool with float/int as int 

1635 has_bools = any(is_bool_dtype(t) for t in types) 

1636 if has_bools: 

1637 for t in types: 

1638 if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t): 

1639 return np.dtype("object") 

1640 

1641 return np.find_common_type(types, []) 

1642 

1643 

1644def construct_2d_arraylike_from_scalar( 

1645 value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool 

1646) -> np.ndarray: 

1647 

1648 shape = (length, width) 

1649 

1650 if dtype.kind in ["m", "M"]: 

1651 value = _maybe_unbox_datetimelike_tz_deprecation(value, dtype) 

1652 elif dtype == _dtype_obj: 

1653 if isinstance(value, (np.timedelta64, np.datetime64)): 

1654 # calling np.array below would cast to pytimedelta/pydatetime 

1655 out = np.empty(shape, dtype=object) 

1656 out.fill(value) 

1657 return out 

1658 

1659 # Attempt to coerce to a numpy array 

1660 try: 

1661 arr = np.array(value, dtype=dtype, copy=copy) 

1662 except (ValueError, TypeError) as err: 

1663 raise TypeError( 

1664 f"DataFrame constructor called with incompatible data and dtype: {err}" 

1665 ) from err 

1666 

1667 if arr.ndim != 0: 

1668 raise ValueError("DataFrame constructor not properly called!") 

1669 

1670 return np.full(shape, arr) 

1671 

1672 

1673def construct_1d_arraylike_from_scalar( 

1674 value: Scalar, length: int, dtype: DtypeObj | None 

1675) -> ArrayLike: 

1676 """ 

1677 create a np.ndarray / pandas type of specified shape and dtype 

1678 filled with values 

1679 

1680 Parameters 

1681 ---------- 

1682 value : scalar value 

1683 length : int 

1684 dtype : pandas_dtype or np.dtype 

1685 

1686 Returns 

1687 ------- 

1688 np.ndarray / pandas type of length, filled with value 

1689 

1690 """ 

1691 

1692 if dtype is None: 

1693 try: 

1694 dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True) 

1695 except OutOfBoundsDatetime: 

1696 dtype = _dtype_obj 

1697 

1698 if isinstance(dtype, ExtensionDtype): 

1699 cls = dtype.construct_array_type() 

1700 seq = [] if length == 0 else [value] 

1701 subarr = cls._from_sequence(seq, dtype=dtype).repeat(length) 

1702 

1703 else: 

1704 

1705 if length and is_integer_dtype(dtype) and isna(value): 

1706 # coerce if we have nan for an integer dtype 

1707 dtype = np.dtype("float64") 

1708 elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): 

1709 # we need to coerce to object dtype to avoid 

1710 # to allow numpy to take our string as a scalar value 

1711 dtype = np.dtype("object") 

1712 if not isna(value): 

1713 value = ensure_str(value) 

1714 elif dtype.kind in ["M", "m"]: 

1715 value = _maybe_unbox_datetimelike_tz_deprecation(value, dtype) 

1716 

1717 subarr = np.empty(length, dtype=dtype) 

1718 if length: 

1719 # GH 47391: numpy > 1.24 will raise filling np.nan into int dtypes 

1720 subarr.fill(value) 

1721 

1722 return subarr 

1723 

1724 

1725def _maybe_unbox_datetimelike_tz_deprecation(value: Scalar, dtype: DtypeObj): 

1726 """ 

1727 Wrap _maybe_unbox_datetimelike with a check for a timezone-aware Timestamp 

1728 along with a timezone-naive datetime64 dtype, which is deprecated. 

1729 """ 

1730 # Caller is responsible for checking dtype.kind in ["m", "M"] 

1731 

1732 if isinstance(value, datetime): 

1733 # we dont want to box dt64, in particular datetime64("NaT") 

1734 value = maybe_box_datetimelike(value, dtype) 

1735 

1736 try: 

1737 value = _maybe_unbox_datetimelike(value, dtype) 

1738 except TypeError: 

1739 if ( 

1740 isinstance(value, Timestamp) 

1741 and value.tzinfo is not None 

1742 and isinstance(dtype, np.dtype) 

1743 and dtype.kind == "M" 

1744 ): 

1745 warnings.warn( 

1746 "Data is timezone-aware. Converting " 

1747 "timezone-aware data to timezone-naive by " 

1748 "passing dtype='datetime64[ns]' to " 

1749 "DataFrame or Series is deprecated and will " 

1750 "raise in a future version. Use " 

1751 "`pd.Series(values).dt.tz_localize(None)` " 

1752 "instead.", 

1753 FutureWarning, 

1754 stacklevel=find_stack_level(), 

1755 ) 

1756 new_value = value.tz_localize(None) 

1757 return _maybe_unbox_datetimelike(new_value, dtype) 

1758 else: 

1759 raise 

1760 return value 

1761 

1762 

1763def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: 

1764 """ 

1765 Transform any list-like object in a 1-dimensional numpy array of object 

1766 dtype. 

1767 

1768 Parameters 

1769 ---------- 

1770 values : any iterable which has a len() 

1771 

1772 Raises 

1773 ------ 

1774 TypeError 

1775 * If `values` does not have a len() 

1776 

1777 Returns 

1778 ------- 

1779 1-dimensional numpy array of dtype object 

1780 """ 

1781 # numpy will try to interpret nested lists as further dimensions, hence 

1782 # making a 1D array that contains list-likes is a bit tricky: 

1783 result = np.empty(len(values), dtype="object") 

1784 result[:] = values 

1785 return result 

1786 

1787 

1788def maybe_cast_to_integer_array( 

1789 arr: list | np.ndarray, dtype: np.dtype, copy: bool = False 

1790) -> np.ndarray: 

1791 """ 

1792 Takes any dtype and returns the casted version, raising for when data is 

1793 incompatible with integer/unsigned integer dtypes. 

1794 

1795 Parameters 

1796 ---------- 

1797 arr : np.ndarray or list 

1798 The array to cast. 

1799 dtype : np.dtype 

1800 The integer dtype to cast the array to. 

1801 copy: bool, default False 

1802 Whether to make a copy of the array before returning. 

1803 

1804 Returns 

1805 ------- 

1806 ndarray 

1807 Array of integer or unsigned integer dtype. 

1808 

1809 Raises 

1810 ------ 

1811 OverflowError : the dtype is incompatible with the data 

1812 ValueError : loss of precision has occurred during casting 

1813 

1814 Examples 

1815 -------- 

1816 If you try to coerce negative values to unsigned integers, it raises: 

1817 

1818 >>> pd.Series([-1], dtype="uint64") 

1819 Traceback (most recent call last): 

1820 ... 

1821 OverflowError: Trying to coerce negative values to unsigned integers 

1822 

1823 Also, if you try to coerce float values to integers, it raises: 

1824 

1825 >>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64")) 

1826 Traceback (most recent call last): 

1827 ... 

1828 ValueError: Trying to coerce float values to integers 

1829 """ 

1830 assert is_integer_dtype(dtype) 

1831 

1832 try: 

1833 if not isinstance(arr, np.ndarray): 

1834 casted = np.array(arr, dtype=dtype, copy=copy) 

1835 else: 

1836 casted = arr.astype(dtype, copy=copy) 

1837 except OverflowError as err: 

1838 raise OverflowError( 

1839 "The elements provided in the data cannot all be " 

1840 f"casted to the dtype {dtype}" 

1841 ) from err 

1842 

1843 if np.array_equal(arr, casted): 

1844 return casted 

1845 

1846 # We do this casting to allow for proper 

1847 # data and dtype checking. 

1848 # 

1849 # We didn't do this earlier because NumPy 

1850 # doesn't handle `uint64` correctly. 

1851 arr = np.asarray(arr) 

1852 

1853 if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): 

1854 raise OverflowError("Trying to coerce negative values to unsigned integers") 

1855 

1856 if is_float_dtype(arr.dtype): 

1857 if not np.isfinite(arr).all(): 

1858 raise IntCastingNaNError( 

1859 "Cannot convert non-finite values (NA or inf) to integer" 

1860 ) 

1861 raise ValueError("Trying to coerce float values to integers") 

1862 if is_object_dtype(arr.dtype): 

1863 raise ValueError("Trying to coerce float values to integers") 

1864 

1865 if casted.dtype < arr.dtype: 

1866 # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows 

1867 warnings.warn( 

1868 f"Values are too large to be losslessly cast to {dtype}. " 

1869 "In a future version this will raise OverflowError. To retain the " 

1870 f"old behavior, use pd.Series(values).astype({dtype})", 

1871 FutureWarning, 

1872 stacklevel=find_stack_level(), 

1873 ) 

1874 return casted 

1875 

1876 if arr.dtype.kind in ["m", "M"]: 

1877 # test_constructor_maskedarray_nonfloat 

1878 warnings.warn( 

1879 f"Constructing Series or DataFrame from {arr.dtype} values and " 

1880 f"dtype={dtype} is deprecated and will raise in a future version. " 

1881 "Use values.view(dtype) instead.", 

1882 FutureWarning, 

1883 stacklevel=find_stack_level(), 

1884 ) 

1885 return casted 

1886 

1887 # No known cases that get here, but raising explicitly to cover our bases. 

1888 raise ValueError(f"values cannot be losslessly cast to {dtype}") 

1889 

1890 

1891def can_hold_element(arr: ArrayLike, element: Any) -> bool: 

1892 """ 

1893 Can we do an inplace setitem with this element in an array with this dtype? 

1894 

1895 Parameters 

1896 ---------- 

1897 arr : np.ndarray or ExtensionArray 

1898 element : Any 

1899 

1900 Returns 

1901 ------- 

1902 bool 

1903 """ 

1904 dtype = arr.dtype 

1905 if not isinstance(dtype, np.dtype) or dtype.kind in ["m", "M"]: 

1906 if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)): 

1907 # np.dtype here catches datetime64ns and timedelta64ns; we assume 

1908 # in this case that we have DatetimeArray/TimedeltaArray 

1909 arr = cast( 

1910 "PeriodArray | DatetimeArray | TimedeltaArray | IntervalArray", arr 

1911 ) 

1912 try: 

1913 arr._validate_setitem_value(element) 

1914 return True 

1915 except (ValueError, TypeError): 

1916 # TODO(2.0): stop catching ValueError for tzaware, see 

1917 # _catch_deprecated_value_error 

1918 return False 

1919 

1920 # This is technically incorrect, but maintains the behavior of 

1921 # ExtensionBlock._can_hold_element 

1922 return True 

1923 

1924 try: 

1925 np_can_hold_element(dtype, element) 

1926 return True 

1927 except (TypeError, LossySetitemError): 

1928 return False 

1929 

1930 

1931def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: 

1932 """ 

1933 Raise if we cannot losslessly set this element into an ndarray with this dtype. 

1934 

1935 Specifically about places where we disagree with numpy. i.e. there are 

1936 cases where numpy will raise in doing the setitem that we do not check 

1937 for here, e.g. setting str "X" into a numeric ndarray. 

1938 

1939 Returns 

1940 ------- 

1941 Any 

1942 The element, potentially cast to the dtype. 

1943 

1944 Raises 

1945 ------ 

1946 ValueError : If we cannot losslessly store this element with this dtype. 

1947 """ 

1948 if dtype == _dtype_obj: 

1949 return element 

1950 

1951 tipo = _maybe_infer_dtype_type(element) 

1952 

1953 if dtype.kind in ["i", "u"]: 

1954 if isinstance(element, range): 

1955 if _dtype_can_hold_range(element, dtype): 

1956 return element 

1957 raise LossySetitemError 

1958 

1959 elif is_integer(element) or (is_float(element) and element.is_integer()): 

1960 # e.g. test_setitem_series_int8 if we have a python int 1 

1961 # tipo may be np.int32, despite the fact that it will fit 

1962 # in smaller int dtypes. 

1963 info = np.iinfo(dtype) 

1964 if info.min <= element <= info.max: 

1965 return dtype.type(element) 

1966 raise LossySetitemError 

1967 

1968 if tipo is not None: 

1969 if tipo.kind not in ["i", "u"]: 

1970 if isinstance(element, np.ndarray) and element.dtype.kind == "f": 

1971 # If all can be losslessly cast to integers, then we can hold them 

1972 with np.errstate(invalid="ignore"): 

1973 # We check afterwards if cast was losslessly, so no need to show 

1974 # the warning 

1975 casted = element.astype(dtype) 

1976 comp = casted == element 

1977 if comp.all(): 

1978 # Return the casted values bc they can be passed to 

1979 # np.putmask, whereas the raw values cannot. 

1980 # see TestSetitemFloatNDarrayIntoIntegerSeries 

1981 return casted 

1982 raise LossySetitemError 

1983 

1984 # Anything other than integer we cannot hold 

1985 raise LossySetitemError 

1986 elif ( 

1987 dtype.kind == "u" 

1988 and isinstance(element, np.ndarray) 

1989 and element.dtype.kind == "i" 

1990 ): 

1991 # see test_where_uint64 

1992 casted = element.astype(dtype) 

1993 if (casted == element).all(): 

1994 # TODO: faster to check (element >=0).all()? potential 

1995 # itemsize issues there? 

1996 return casted 

1997 raise LossySetitemError 

1998 elif dtype.itemsize < tipo.itemsize: 

1999 raise LossySetitemError 

2000 elif not isinstance(tipo, np.dtype): 

2001 # i.e. nullable IntegerDtype; we can put this into an ndarray 

2002 # losslessly iff it has no NAs 

2003 if element._hasna: 

2004 raise LossySetitemError 

2005 return element 

2006 

2007 return element 

2008 

2009 raise LossySetitemError 

2010 

2011 elif dtype.kind == "f": 

2012 if lib.is_integer(element) or lib.is_float(element): 

2013 casted = dtype.type(element) 

2014 if np.isnan(casted) or casted == element: 

2015 return casted 

2016 # otherwise e.g. overflow see TestCoercionFloat32 

2017 raise LossySetitemError 

2018 

2019 if tipo is not None: 

2020 # TODO: itemsize check? 

2021 if tipo.kind not in ["f", "i", "u"]: 

2022 # Anything other than float/integer we cannot hold 

2023 raise LossySetitemError 

2024 elif not isinstance(tipo, np.dtype): 

2025 # i.e. nullable IntegerDtype or FloatingDtype; 

2026 # we can put this into an ndarray losslessly iff it has no NAs 

2027 if element._hasna: 

2028 raise LossySetitemError 

2029 return element 

2030 elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind: 

2031 if isinstance(element, np.ndarray): 

2032 # e.g. TestDataFrameIndexingWhere::test_where_alignment 

2033 casted = element.astype(dtype) 

2034 # TODO(np>=1.20): we can just use np.array_equal with equal_nan 

2035 if array_equivalent(casted, element): 

2036 return casted 

2037 raise LossySetitemError 

2038 

2039 return element 

2040 

2041 raise LossySetitemError 

2042 

2043 elif dtype.kind == "c": 

2044 if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element): 

2045 if np.isnan(element): 

2046 # see test_where_complex GH#6345 

2047 return dtype.type(element) 

2048 

2049 casted = dtype.type(element) 

2050 if casted == element: 

2051 return casted 

2052 # otherwise e.g. overflow see test_32878_complex_itemsize 

2053 raise LossySetitemError 

2054 

2055 if tipo is not None: 

2056 if tipo.kind in ["c", "f", "i", "u"]: 

2057 return element 

2058 raise LossySetitemError 

2059 raise LossySetitemError 

2060 

2061 elif dtype.kind == "b": 

2062 if tipo is not None: 

2063 if tipo.kind == "b": 

2064 if not isinstance(tipo, np.dtype): 

2065 # i.e. we have a BooleanArray 

2066 if element._hasna: 

2067 # i.e. there are pd.NA elements 

2068 raise LossySetitemError 

2069 return element 

2070 raise LossySetitemError 

2071 if lib.is_bool(element): 

2072 return element 

2073 raise LossySetitemError 

2074 

2075 elif dtype.kind == "S": 

2076 # TODO: test tests.frame.methods.test_replace tests get here, 

2077 # need more targeted tests. xref phofl has a PR about this 

2078 if tipo is not None: 

2079 if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize: 

2080 return element 

2081 raise LossySetitemError 

2082 if isinstance(element, bytes) and len(element) <= dtype.itemsize: 

2083 return element 

2084 raise LossySetitemError 

2085 

2086 raise NotImplementedError(dtype) 

2087 

2088 

2089def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: 

2090 """ 

2091 _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), 

2092 but in many cases a range can be held by a smaller integer dtype. 

2093 Check if this is one of those cases. 

2094 """ 

2095 if not len(rng): 

2096 return True 

2097 return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) 

2098 

2099 

2100class LossySetitemError(Exception): 

2101 """ 

2102 Raised when trying to do a __setitem__ on an np.ndarray that is not lossless. 

2103 """ 

2104 

2105 pass