Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py: 16%

608 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from datetime import ( 

4 datetime, 

5 time, 

6 timedelta, 

7 tzinfo, 

8) 

9from typing import ( 

10 TYPE_CHECKING, 

11 Literal, 

12 cast, 

13) 

14import warnings 

15 

16import numpy as np 

17 

18from pandas._libs import ( 

19 lib, 

20 tslib, 

21) 

22from pandas._libs.tslibs import ( 

23 BaseOffset, 

24 NaT, 

25 NaTType, 

26 Resolution, 

27 Timestamp, 

28 astype_overflowsafe, 

29 fields, 

30 get_resolution, 

31 get_unit_from_dtype, 

32 ints_to_pydatetime, 

33 is_date_array_normalized, 

34 is_supported_unit, 

35 is_unitless, 

36 normalize_i8_timestamps, 

37 timezones, 

38 to_offset, 

39 tz_convert_from_utc, 

40 tzconversion, 

41) 

42from pandas._typing import npt 

43from pandas.errors import ( 

44 OutOfBoundsDatetime, 

45 PerformanceWarning, 

46) 

47from pandas.util._exceptions import find_stack_level 

48from pandas.util._validators import validate_inclusive 

49 

50from pandas.core.dtypes.astype import astype_dt64_to_dt64tz 

51from pandas.core.dtypes.common import ( 

52 DT64NS_DTYPE, 

53 INT64_DTYPE, 

54 is_bool_dtype, 

55 is_datetime64_any_dtype, 

56 is_datetime64_dtype, 

57 is_datetime64_ns_dtype, 

58 is_datetime64tz_dtype, 

59 is_dtype_equal, 

60 is_extension_array_dtype, 

61 is_float_dtype, 

62 is_object_dtype, 

63 is_period_dtype, 

64 is_sparse, 

65 is_string_dtype, 

66 is_timedelta64_dtype, 

67 pandas_dtype, 

68) 

69from pandas.core.dtypes.dtypes import DatetimeTZDtype 

70from pandas.core.dtypes.missing import isna 

71 

72from pandas.core.arrays import datetimelike as dtl 

73from pandas.core.arrays._ranges import generate_regular_range 

74import pandas.core.common as com 

75 

76from pandas.tseries.frequencies import get_period_alias 

77from pandas.tseries.offsets import ( 

78 BDay, 

79 Day, 

80 Tick, 

81) 

82 

83if TYPE_CHECKING: 83 ↛ 85line 83 didn't jump to line 85, because the condition on line 83 was never true

84 

85 from pandas import DataFrame 

86 from pandas.core.arrays import ( 

87 PeriodArray, 

88 TimedeltaArray, 

89 ) 

90 

91_midnight = time(0, 0) 

92 

93 

94def tz_to_dtype(tz: tzinfo | None, unit: str = "ns"): 

95 """ 

96 Return a datetime64[ns] dtype appropriate for the given timezone. 

97 

98 Parameters 

99 ---------- 

100 tz : tzinfo or None 

101 unit : str, default "ns" 

102 

103 Returns 

104 ------- 

105 np.dtype or Datetime64TZDType 

106 """ 

107 if tz is None: 

108 return np.dtype(f"M8[{unit}]") 

109 else: 

110 return DatetimeTZDtype(tz=tz, unit=unit) 

111 

112 

113def _field_accessor(name: str, field: str, docstring=None): 

114 def f(self): 

115 values = self._local_timestamps() 

116 

117 if field in self._bool_ops: 

118 result: np.ndarray 

119 

120 if field.endswith(("start", "end")): 

121 freq = self.freq 

122 month_kw = 12 

123 if freq: 

124 kwds = freq.kwds 

125 month_kw = kwds.get("startingMonth", kwds.get("month", 12)) 

126 

127 result = fields.get_start_end_field( 

128 values, field, self.freqstr, month_kw, reso=self._reso 

129 ) 

130 else: 

131 result = fields.get_date_field(values, field, reso=self._reso) 

132 

133 # these return a boolean by-definition 

134 return result 

135 

136 if field in self._object_ops: 

137 result = fields.get_date_name_field(values, field, reso=self._reso) 

138 result = self._maybe_mask_results(result, fill_value=None) 

139 

140 else: 

141 result = fields.get_date_field(values, field, reso=self._reso) 

142 result = self._maybe_mask_results( 

143 result, fill_value=None, convert="float64" 

144 ) 

145 

146 return result 

147 

148 f.__name__ = name 

149 f.__doc__ = docstring 

150 return property(f) 

151 

152 

153class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): 

154 """ 

155 Pandas ExtensionArray for tz-naive or tz-aware datetime data. 

156 

157 .. warning:: 

158 

159 DatetimeArray is currently experimental, and its API may change 

160 without warning. In particular, :attr:`DatetimeArray.dtype` is 

161 expected to change to always be an instance of an ``ExtensionDtype`` 

162 subclass. 

163 

164 Parameters 

165 ---------- 

166 values : Series, Index, DatetimeArray, ndarray 

167 The datetime data. 

168 

169 For DatetimeArray `values` (or a Series or Index boxing one), 

170 `dtype` and `freq` will be extracted from `values`. 

171 

172 dtype : numpy.dtype or DatetimeTZDtype 

173 Note that the only NumPy dtype allowed is 'datetime64[ns]'. 

174 freq : str or Offset, optional 

175 The frequency. 

176 copy : bool, default False 

177 Whether to copy the underlying array of values. 

178 

179 Attributes 

180 ---------- 

181 None 

182 

183 Methods 

184 ------- 

185 None 

186 """ 

187 

188 _typ = "datetimearray" 

189 _internal_fill_value = np.datetime64("NaT", "ns") 

190 _recognized_scalars = (datetime, np.datetime64) 

191 _is_recognized_dtype = is_datetime64_any_dtype 

192 _infer_matches = ("datetime", "datetime64", "date") 

193 

194 @property 

195 def _scalar_type(self) -> type[Timestamp]: 

196 return Timestamp 

197 

198 # define my properties & methods for delegation 

199 _bool_ops: list[str] = [ 

200 "is_month_start", 

201 "is_month_end", 

202 "is_quarter_start", 

203 "is_quarter_end", 

204 "is_year_start", 

205 "is_year_end", 

206 "is_leap_year", 

207 ] 

208 _object_ops: list[str] = ["freq", "tz"] 

209 _field_ops: list[str] = [ 

210 "year", 

211 "month", 

212 "day", 

213 "hour", 

214 "minute", 

215 "second", 

216 "weekofyear", 

217 "week", 

218 "weekday", 

219 "dayofweek", 

220 "day_of_week", 

221 "dayofyear", 

222 "day_of_year", 

223 "quarter", 

224 "days_in_month", 

225 "daysinmonth", 

226 "microsecond", 

227 "nanosecond", 

228 ] 

229 _other_ops: list[str] = ["date", "time", "timetz"] 

230 _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _other_ops 

231 _datetimelike_methods: list[str] = [ 

232 "to_period", 

233 "tz_localize", 

234 "tz_convert", 

235 "normalize", 

236 "strftime", 

237 "round", 

238 "floor", 

239 "ceil", 

240 "month_name", 

241 "day_name", 

242 ] 

243 

244 # ndim is inherited from ExtensionArray, must exist to ensure 

245 # Timestamp.__richcmp__(DateTimeArray) operates pointwise 

246 

247 # ensure that operations with numpy arrays defer to our implementation 

248 __array_priority__ = 1000 

249 

250 # ----------------------------------------------------------------- 

251 # Constructors 

252 

253 _dtype: np.dtype | DatetimeTZDtype 

254 _freq: BaseOffset | None = None 

255 _default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__ 

256 

257 @classmethod 

258 def _validate_dtype(cls, values, dtype): 

259 # used in TimeLikeOps.__init__ 

260 _validate_dt64_dtype(values.dtype) 

261 dtype = _validate_dt64_dtype(dtype) 

262 return dtype 

263 

264 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" 

265 @classmethod 

266 def _simple_new( # type: ignore[override] 

267 cls, 

268 values: np.ndarray, 

269 freq: BaseOffset | None = None, 

270 dtype=DT64NS_DTYPE, 

271 ) -> DatetimeArray: 

272 assert isinstance(values, np.ndarray) 

273 assert dtype.kind == "M" 

274 if isinstance(dtype, np.dtype): 

275 assert dtype == values.dtype 

276 assert not is_unitless(dtype) 

277 else: 

278 # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC], 

279 # then values.dtype should be M8[us]. 

280 assert dtype._reso == get_unit_from_dtype(values.dtype) 

281 

282 result = super()._simple_new(values, dtype) 

283 result._freq = freq 

284 return result 

285 

286 @classmethod 

287 def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False): 

288 return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) 

289 

290 @classmethod 

291 def _from_sequence_not_strict( 

292 cls, 

293 data, 

294 dtype=None, 

295 copy: bool = False, 

296 tz=None, 

297 freq: str | BaseOffset | lib.NoDefault | None = lib.no_default, 

298 dayfirst: bool = False, 

299 yearfirst: bool = False, 

300 ambiguous="raise", 

301 ): 

302 explicit_none = freq is None 

303 freq = freq if freq is not lib.no_default else None 

304 

305 freq, freq_infer = dtl.maybe_infer_freq(freq) 

306 

307 subarr, tz, inferred_freq = _sequence_to_dt64ns( 

308 data, 

309 dtype=dtype, 

310 copy=copy, 

311 tz=tz, 

312 dayfirst=dayfirst, 

313 yearfirst=yearfirst, 

314 ambiguous=ambiguous, 

315 ) 

316 

317 freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) 

318 if explicit_none: 

319 freq = None 

320 

321 dtype = tz_to_dtype(tz) 

322 result = cls._simple_new(subarr, freq=freq, dtype=dtype) 

323 

324 if inferred_freq is None and freq is not None: 

325 # this condition precludes `freq_infer` 

326 cls._validate_frequency(result, freq, ambiguous=ambiguous) 

327 

328 elif freq_infer: 

329 # Set _freq directly to bypass duplicative _validate_frequency 

330 # check. 

331 result._freq = to_offset(result.inferred_freq) 

332 

333 return result 

334 

335 @classmethod 

336 def _generate_range( 

337 cls, 

338 start, 

339 end, 

340 periods, 

341 freq, 

342 tz=None, 

343 normalize=False, 

344 ambiguous="raise", 

345 nonexistent="raise", 

346 inclusive="both", 

347 ): 

348 

349 periods = dtl.validate_periods(periods) 

350 if freq is None and any(x is None for x in [periods, start, end]): 

351 raise ValueError("Must provide freq argument if no data is supplied") 

352 

353 if com.count_not_none(start, end, periods, freq) != 3: 

354 raise ValueError( 

355 "Of the four parameters: start, end, periods, " 

356 "and freq, exactly three must be specified" 

357 ) 

358 freq = to_offset(freq) 

359 

360 if start is not None: 

361 start = Timestamp(start) 

362 

363 if end is not None: 

364 end = Timestamp(end) 

365 

366 if start is NaT or end is NaT: 

367 raise ValueError("Neither `start` nor `end` can be NaT") 

368 

369 left_inclusive, right_inclusive = validate_inclusive(inclusive) 

370 start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) 

371 tz = _infer_tz_from_endpoints(start, end, tz) 

372 

373 if tz is not None: 

374 # Localize the start and end arguments 

375 start_tz = None if start is None else start.tz 

376 end_tz = None if end is None else end.tz 

377 start = _maybe_localize_point( 

378 start, start_tz, start, freq, tz, ambiguous, nonexistent 

379 ) 

380 end = _maybe_localize_point( 

381 end, end_tz, end, freq, tz, ambiguous, nonexistent 

382 ) 

383 if freq is not None: 

384 # We break Day arithmetic (fixed 24 hour) here and opt for 

385 # Day to mean calendar day (23/24/25 hour). Therefore, strip 

386 # tz info from start and day to avoid DST arithmetic 

387 if isinstance(freq, Day): 

388 if start is not None: 

389 start = start.tz_localize(None) 

390 if end is not None: 

391 end = end.tz_localize(None) 

392 

393 if isinstance(freq, Tick): 

394 i8values = generate_regular_range(start, end, periods, freq) 

395 else: 

396 xdr = generate_range(start=start, end=end, periods=periods, offset=freq) 

397 i8values = np.array([x.value for x in xdr], dtype=np.int64) 

398 

399 endpoint_tz = start.tz if start is not None else end.tz 

400 

401 if tz is not None and endpoint_tz is None: 

402 

403 if not timezones.is_utc(tz): 

404 # short-circuit tz_localize_to_utc which would make 

405 # an unnecessary copy with UTC but be a no-op. 

406 i8values = tzconversion.tz_localize_to_utc( 

407 i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent 

408 ) 

409 

410 # i8values is localized datetime64 array -> have to convert 

411 # start/end as well to compare 

412 if start is not None: 

413 start = start.tz_localize(tz, ambiguous, nonexistent) 

414 if end is not None: 

415 end = end.tz_localize(tz, ambiguous, nonexistent) 

416 else: 

417 # Create a linearly spaced date_range in local time 

418 # Nanosecond-granularity timestamps aren't always correctly 

419 # representable with doubles, so we limit the range that we 

420 # pass to np.linspace as much as possible 

421 i8values = ( 

422 np.linspace(0, end.value - start.value, periods, dtype="int64") 

423 + start.value 

424 ) 

425 if i8values.dtype != "i8": 

426 # 2022-01-09 I (brock) am not sure if it is possible for this 

427 # to overflow and cast to e.g. f8, but if it does we need to cast 

428 i8values = i8values.astype("i8") 

429 

430 if start == end: 

431 if not left_inclusive and not right_inclusive: 

432 i8values = i8values[1:-1] 

433 else: 

434 start_i8 = Timestamp(start).value 

435 end_i8 = Timestamp(end).value 

436 if not left_inclusive or not right_inclusive: 

437 if not left_inclusive and len(i8values) and i8values[0] == start_i8: 

438 i8values = i8values[1:] 

439 if not right_inclusive and len(i8values) and i8values[-1] == end_i8: 

440 i8values = i8values[:-1] 

441 

442 dt64_values = i8values.view("datetime64[ns]") 

443 dtype = tz_to_dtype(tz) 

444 return cls._simple_new(dt64_values, freq=freq, dtype=dtype) 

445 

446 # ----------------------------------------------------------------- 

447 # DatetimeLike Interface 

448 

449 def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: 

450 if not isinstance(value, self._scalar_type) and value is not NaT: 

451 raise ValueError("'value' should be a Timestamp.") 

452 self._check_compatible_with(value, setitem=setitem) 

453 return value.asm8 

454 

455 def _scalar_from_string(self, value) -> Timestamp | NaTType: 

456 return Timestamp(value, tz=self.tz) 

457 

458 def _check_compatible_with(self, other, setitem: bool = False): 

459 if other is NaT: 

460 return 

461 self._assert_tzawareness_compat(other) 

462 if setitem: 

463 # Stricter check for setitem vs comparison methods 

464 if self.tz is not None and not timezones.tz_compare(self.tz, other.tz): 

465 # TODO(2.0): remove this check. GH#37605 

466 warnings.warn( 

467 "Setitem-like behavior with mismatched timezones is deprecated " 

468 "and will change in a future version. Instead of raising " 

469 "(or for Index, Series, and DataFrame methods, coercing to " 

470 "object dtype), the value being set (or passed as a " 

471 "fill_value, or inserted) will be cast to the existing " 

472 "DatetimeArray/DatetimeIndex/Series/DataFrame column's " 

473 "timezone. To retain the old behavior, explicitly cast to " 

474 "object dtype before the operation.", 

475 FutureWarning, 

476 stacklevel=find_stack_level(), 

477 ) 

478 raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'") 

479 

480 # ----------------------------------------------------------------- 

481 # Descriptive Properties 

482 

483 def _box_func(self, x: np.datetime64) -> Timestamp | NaTType: 

484 # GH#42228 

485 value = x.view("i8") 

486 ts = Timestamp._from_value_and_reso(value, reso=self._reso, tz=self.tz) 

487 # Non-overlapping identity check (left operand type: "Timestamp", 

488 # right operand type: "NaTType") 

489 if ts is not NaT: # type: ignore[comparison-overlap] 

490 # GH#41586 

491 # do this instead of passing to the constructor to avoid FutureWarning 

492 ts._set_freq(self.freq) 

493 return ts 

494 

495 @property 

496 # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype" 

497 # incompatible with return type "ExtensionDtype" in supertype 

498 # "ExtensionArray" 

499 def dtype(self) -> np.dtype | DatetimeTZDtype: # type: ignore[override] 

500 """ 

501 The dtype for the DatetimeArray. 

502 

503 .. warning:: 

504 

505 A future version of pandas will change dtype to never be a 

506 ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will 

507 always be an instance of an ``ExtensionDtype`` subclass. 

508 

509 Returns 

510 ------- 

511 numpy.dtype or DatetimeTZDtype 

512 If the values are tz-naive, then ``np.dtype('datetime64[ns]')`` 

513 is returned. 

514 

515 If the values are tz-aware, then the ``DatetimeTZDtype`` 

516 is returned. 

517 """ 

518 return self._dtype 

519 

520 @property 

521 def tz(self) -> tzinfo | None: 

522 """ 

523 Return the timezone. 

524 

525 Returns 

526 ------- 

527 datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None 

528 Returns None when the array is tz-naive. 

529 """ 

530 # GH 18595 

531 return getattr(self.dtype, "tz", None) 

532 

533 @tz.setter 

534 def tz(self, value): 

535 # GH 3746: Prevent localizing or converting the index by setting tz 

536 raise AttributeError( 

537 "Cannot directly set timezone. Use tz_localize() " 

538 "or tz_convert() as appropriate" 

539 ) 

540 

541 @property 

542 def tzinfo(self) -> tzinfo | None: 

543 """ 

544 Alias for tz attribute 

545 """ 

546 return self.tz 

547 

548 @property # NB: override with cache_readonly in immutable subclasses 

549 def is_normalized(self) -> bool: 

550 """ 

551 Returns True if all of the dates are at midnight ("no time") 

552 """ 

553 return is_date_array_normalized(self.asi8, self.tz, reso=self._reso) 

554 

555 @property # NB: override with cache_readonly in immutable subclasses 

556 def _resolution_obj(self) -> Resolution: 

557 return get_resolution(self.asi8, self.tz, reso=self._reso) 

558 

559 # ---------------------------------------------------------------- 

560 # Array-Like / EA-Interface Methods 

561 

562 def __array__(self, dtype=None) -> np.ndarray: 

563 if dtype is None and self.tz: 

564 # The default for tz-aware is object, to preserve tz info 

565 dtype = object 

566 

567 return super().__array__(dtype=dtype) 

568 

569 def __iter__(self): 

570 """ 

571 Return an iterator over the boxed values 

572 

573 Yields 

574 ------ 

575 tstamp : Timestamp 

576 """ 

577 if self.ndim > 1: 

578 for i in range(len(self)): 

579 yield self[i] 

580 else: 

581 # convert in chunks of 10k for efficiency 

582 data = self.asi8 

583 length = len(self) 

584 chunksize = 10000 

585 chunks = (length // chunksize) + 1 

586 

587 for i in range(chunks): 

588 start_i = i * chunksize 

589 end_i = min((i + 1) * chunksize, length) 

590 converted = ints_to_pydatetime( 

591 data[start_i:end_i], 

592 tz=self.tz, 

593 freq=self.freq, 

594 box="timestamp", 

595 reso=self._reso, 

596 ) 

597 yield from converted 

598 

599 def astype(self, dtype, copy: bool = True): 

600 # We handle 

601 # --> datetime 

602 # --> period 

603 # DatetimeLikeArrayMixin Super handles the rest. 

604 dtype = pandas_dtype(dtype) 

605 

606 if is_dtype_equal(dtype, self.dtype): 

607 if copy: 

608 return self.copy() 

609 return self 

610 

611 elif ( 

612 self.tz is None 

613 and is_datetime64_dtype(dtype) 

614 and not is_unitless(dtype) 

615 and is_supported_unit(get_unit_from_dtype(dtype)) 

616 ): 

617 # unit conversion e.g. datetime64[s] 

618 res_values = astype_overflowsafe(self._ndarray, dtype, copy=True) 

619 return type(self)._simple_new(res_values, dtype=res_values.dtype) 

620 # TODO: preserve freq? 

621 

622 elif is_datetime64_ns_dtype(dtype): 

623 return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False) 

624 

625 elif self.tz is not None and isinstance(dtype, DatetimeTZDtype): 

626 # tzaware unit conversion e.g. datetime64[s, UTC] 

627 np_dtype = np.dtype(dtype.str) 

628 res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy) 

629 return type(self)._simple_new(res_values, dtype=dtype) 

630 # TODO: preserve freq? 

631 

632 elif ( 

633 self.tz is None 

634 and is_datetime64_dtype(dtype) 

635 and dtype != self.dtype 

636 and is_unitless(dtype) 

637 ): 

638 # TODO(2.0): just fall through to dtl.DatetimeLikeArrayMixin.astype 

639 warnings.warn( 

640 "Passing unit-less datetime64 dtype to .astype is deprecated " 

641 "and will raise in a future version. Pass 'datetime64[ns]' instead", 

642 FutureWarning, 

643 stacklevel=find_stack_level(), 

644 ) 

645 # unit conversion e.g. datetime64[s] 

646 return self._ndarray.astype(dtype) 

647 

648 elif is_period_dtype(dtype): 

649 return self.to_period(freq=dtype.freq) 

650 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) 

651 

652 # ----------------------------------------------------------------- 

653 # Rendering Methods 

654 

655 def _format_native_types( 

656 self, *, na_rep="NaT", date_format=None, **kwargs 

657 ) -> npt.NDArray[np.object_]: 

658 from pandas.io.formats.format import get_format_datetime64_from_values 

659 

660 fmt = get_format_datetime64_from_values(self, date_format) 

661 

662 return tslib.format_array_from_datetime( 

663 self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._reso 

664 ) 

665 

666 # ----------------------------------------------------------------- 

667 # Comparison Methods 

668 

669 def _has_same_tz(self, other) -> bool: 

670 

671 # vzone shouldn't be None if value is non-datetime like 

672 if isinstance(other, np.datetime64): 

673 # convert to Timestamp as np.datetime64 doesn't have tz attr 

674 other = Timestamp(other) 

675 

676 if not hasattr(other, "tzinfo"): 

677 return False 

678 other_tz = other.tzinfo 

679 return timezones.tz_compare(self.tzinfo, other_tz) 

680 

681 def _assert_tzawareness_compat(self, other) -> None: 

682 # adapted from _Timestamp._assert_tzawareness_compat 

683 other_tz = getattr(other, "tzinfo", None) 

684 other_dtype = getattr(other, "dtype", None) 

685 

686 if is_datetime64tz_dtype(other_dtype): 

687 # Get tzinfo from Series dtype 

688 other_tz = other.dtype.tz 

689 if other is NaT: 

690 # pd.NaT quacks both aware and naive 

691 pass 

692 elif self.tz is None: 

693 if other_tz is not None: 

694 raise TypeError( 

695 "Cannot compare tz-naive and tz-aware datetime-like objects." 

696 ) 

697 elif other_tz is None: 

698 raise TypeError( 

699 "Cannot compare tz-naive and tz-aware datetime-like objects" 

700 ) 

701 

702 # ----------------------------------------------------------------- 

703 # Arithmetic Methods 

704 

705 def _add_offset(self, offset) -> DatetimeArray: 

706 

707 assert not isinstance(offset, Tick) 

708 

709 if self.tz is not None: 

710 values = self.tz_localize(None) 

711 else: 

712 values = self 

713 

714 try: 

715 result = offset._apply_array(values).view(values.dtype) 

716 except NotImplementedError: 

717 warnings.warn( 

718 "Non-vectorized DateOffset being applied to Series or DatetimeIndex.", 

719 PerformanceWarning, 

720 stacklevel=find_stack_level(), 

721 ) 

722 result = self.astype("O") + offset 

723 result = type(self)._from_sequence(result) 

724 if not len(self): 

725 # GH#30336 _from_sequence won't be able to infer self.tz 

726 return result.tz_localize(self.tz) 

727 

728 else: 

729 result = DatetimeArray._simple_new(result, dtype=result.dtype) 

730 if self.tz is not None: 

731 # FIXME: tz_localize with non-nano 

732 result = result.tz_localize(self.tz) 

733 

734 return result 

735 

736 # ----------------------------------------------------------------- 

737 # Timezone Conversion and Localization Methods 

738 

739 def _local_timestamps(self) -> npt.NDArray[np.int64]: 

740 """ 

741 Convert to an i8 (unix-like nanosecond timestamp) representation 

742 while keeping the local timezone and not using UTC. 

743 This is used to calculate time-of-day information as if the timestamps 

744 were timezone-naive. 

745 """ 

746 if self.tz is None or timezones.is_utc(self.tz): 

747 # Avoid the copy that would be made in tzconversion 

748 return self.asi8 

749 return tz_convert_from_utc(self.asi8, self.tz, reso=self._reso) 

750 

751 def tz_convert(self, tz) -> DatetimeArray: 

752 """ 

753 Convert tz-aware Datetime Array/Index from one time zone to another. 

754 

755 Parameters 

756 ---------- 

757 tz : str, pytz.timezone, dateutil.tz.tzfile or None 

758 Time zone for time. Corresponding timestamps would be converted 

759 to this time zone of the Datetime Array/Index. A `tz` of None will 

760 convert to UTC and remove the timezone information. 

761 

762 Returns 

763 ------- 

764 Array or Index 

765 

766 Raises 

767 ------ 

768 TypeError 

769 If Datetime Array/Index is tz-naive. 

770 

771 See Also 

772 -------- 

773 DatetimeIndex.tz : A timezone that has a variable offset from UTC. 

774 DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a 

775 given time zone, or remove timezone from a tz-aware DatetimeIndex. 

776 

777 Examples 

778 -------- 

779 With the `tz` parameter, we can change the DatetimeIndex 

780 to other time zones: 

781 

782 >>> dti = pd.date_range(start='2014-08-01 09:00', 

783 ... freq='H', periods=3, tz='Europe/Berlin') 

784 

785 >>> dti 

786 DatetimeIndex(['2014-08-01 09:00:00+02:00', 

787 '2014-08-01 10:00:00+02:00', 

788 '2014-08-01 11:00:00+02:00'], 

789 dtype='datetime64[ns, Europe/Berlin]', freq='H') 

790 

791 >>> dti.tz_convert('US/Central') 

792 DatetimeIndex(['2014-08-01 02:00:00-05:00', 

793 '2014-08-01 03:00:00-05:00', 

794 '2014-08-01 04:00:00-05:00'], 

795 dtype='datetime64[ns, US/Central]', freq='H') 

796 

797 With the ``tz=None``, we can remove the timezone (after converting 

798 to UTC if necessary): 

799 

800 >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H', 

801 ... periods=3, tz='Europe/Berlin') 

802 

803 >>> dti 

804 DatetimeIndex(['2014-08-01 09:00:00+02:00', 

805 '2014-08-01 10:00:00+02:00', 

806 '2014-08-01 11:00:00+02:00'], 

807 dtype='datetime64[ns, Europe/Berlin]', freq='H') 

808 

809 >>> dti.tz_convert(None) 

810 DatetimeIndex(['2014-08-01 07:00:00', 

811 '2014-08-01 08:00:00', 

812 '2014-08-01 09:00:00'], 

813 dtype='datetime64[ns]', freq='H') 

814 """ 

815 tz = timezones.maybe_get_tz(tz) 

816 

817 if self.tz is None: 

818 # tz naive, use tz_localize 

819 raise TypeError( 

820 "Cannot convert tz-naive timestamps, use tz_localize to localize" 

821 ) 

822 

823 # No conversion since timestamps are all UTC to begin with 

824 dtype = tz_to_dtype(tz, unit=self._unit) 

825 return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq) 

826 

827 @dtl.ravel_compat 

828 def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArray: 

829 """ 

830 Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. 

831 

832 This method takes a time zone (tz) naive Datetime Array/Index object 

833 and makes this time zone aware. It does not move the time to another 

834 time zone. 

835 

836 This method can also be used to do the inverse -- to create a time 

837 zone unaware object from an aware object. To that end, pass `tz=None`. 

838 

839 Parameters 

840 ---------- 

841 tz : str, pytz.timezone, dateutil.tz.tzfile or None 

842 Time zone to convert timestamps to. Passing ``None`` will 

843 remove the time zone information preserving local time. 

844 ambiguous : 'infer', 'NaT', bool array, default 'raise' 

845 When clocks moved backward due to DST, ambiguous times may arise. 

846 For example in Central European Time (UTC+01), when going from 

847 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 

848 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the 

849 `ambiguous` parameter dictates how ambiguous times should be 

850 handled. 

851 

852 - 'infer' will attempt to infer fall dst-transition hours based on 

853 order 

854 - bool-ndarray where True signifies a DST time, False signifies a 

855 non-DST time (note that this flag is only applicable for 

856 ambiguous times) 

857 - 'NaT' will return NaT where there are ambiguous times 

858 - 'raise' will raise an AmbiguousTimeError if there are ambiguous 

859 times. 

860 

861 nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ 

862default 'raise' 

863 A nonexistent time does not exist in a particular timezone 

864 where clocks moved forward due to DST. 

865 

866 - 'shift_forward' will shift the nonexistent time forward to the 

867 closest existing time 

868 - 'shift_backward' will shift the nonexistent time backward to the 

869 closest existing time 

870 - 'NaT' will return NaT where there are nonexistent times 

871 - timedelta objects will shift nonexistent times by the timedelta 

872 - 'raise' will raise an NonExistentTimeError if there are 

873 nonexistent times. 

874 

875 Returns 

876 ------- 

877 Same type as self 

878 Array/Index converted to the specified time zone. 

879 

880 Raises 

881 ------ 

882 TypeError 

883 If the Datetime Array/Index is tz-aware and tz is not None. 

884 

885 See Also 

886 -------- 

887 DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from 

888 one time zone to another. 

889 

890 Examples 

891 -------- 

892 >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3) 

893 >>> tz_naive 

894 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', 

895 '2018-03-03 09:00:00'], 

896 dtype='datetime64[ns]', freq='D') 

897 

898 Localize DatetimeIndex in US/Eastern time zone: 

899 

900 >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern') 

901 >>> tz_aware 

902 DatetimeIndex(['2018-03-01 09:00:00-05:00', 

903 '2018-03-02 09:00:00-05:00', 

904 '2018-03-03 09:00:00-05:00'], 

905 dtype='datetime64[ns, US/Eastern]', freq=None) 

906 

907 With the ``tz=None``, we can remove the time zone information 

908 while keeping the local time (not converted to UTC): 

909 

910 >>> tz_aware.tz_localize(None) 

911 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', 

912 '2018-03-03 09:00:00'], 

913 dtype='datetime64[ns]', freq=None) 

914 

915 Be careful with DST changes. When there is sequential data, pandas can 

916 infer the DST time: 

917 

918 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', 

919 ... '2018-10-28 02:00:00', 

920 ... '2018-10-28 02:30:00', 

921 ... '2018-10-28 02:00:00', 

922 ... '2018-10-28 02:30:00', 

923 ... '2018-10-28 03:00:00', 

924 ... '2018-10-28 03:30:00'])) 

925 >>> s.dt.tz_localize('CET', ambiguous='infer') 

926 0 2018-10-28 01:30:00+02:00 

927 1 2018-10-28 02:00:00+02:00 

928 2 2018-10-28 02:30:00+02:00 

929 3 2018-10-28 02:00:00+01:00 

930 4 2018-10-28 02:30:00+01:00 

931 5 2018-10-28 03:00:00+01:00 

932 6 2018-10-28 03:30:00+01:00 

933 dtype: datetime64[ns, CET] 

934 

935 In some cases, inferring the DST is impossible. In such cases, you can 

936 pass an ndarray to the ambiguous parameter to set the DST explicitly 

937 

938 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', 

939 ... '2018-10-28 02:36:00', 

940 ... '2018-10-28 03:46:00'])) 

941 >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) 

942 0 2018-10-28 01:20:00+02:00 

943 1 2018-10-28 02:36:00+02:00 

944 2 2018-10-28 03:46:00+01:00 

945 dtype: datetime64[ns, CET] 

946 

947 If the DST transition causes nonexistent times, you can shift these 

948 dates forward or backwards with a timedelta object or `'shift_forward'` 

949 or `'shift_backwards'`. 

950 

951 >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', 

952 ... '2015-03-29 03:30:00'])) 

953 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 

954 0 2015-03-29 03:00:00+02:00 

955 1 2015-03-29 03:30:00+02:00 

956 dtype: datetime64[ns, Europe/Warsaw] 

957 

958 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') 

959 0 2015-03-29 01:59:59.999999999+01:00 

960 1 2015-03-29 03:30:00+02:00 

961 dtype: datetime64[ns, Europe/Warsaw] 

962 

963 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) 

964 0 2015-03-29 03:30:00+02:00 

965 1 2015-03-29 03:30:00+02:00 

966 dtype: datetime64[ns, Europe/Warsaw] 

967 """ 

968 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") 

969 if nonexistent not in nonexistent_options and not isinstance( 

970 nonexistent, timedelta 

971 ): 

972 raise ValueError( 

973 "The nonexistent argument must be one of 'raise', " 

974 "'NaT', 'shift_forward', 'shift_backward' or " 

975 "a timedelta object" 

976 ) 

977 

978 if self.tz is not None: 

979 if tz is None: 

980 new_dates = tz_convert_from_utc(self.asi8, self.tz) 

981 else: 

982 raise TypeError("Already tz-aware, use tz_convert to convert.") 

983 else: 

984 tz = timezones.maybe_get_tz(tz) 

985 # Convert to UTC 

986 

987 new_dates = tzconversion.tz_localize_to_utc( 

988 self.asi8, 

989 tz, 

990 ambiguous=ambiguous, 

991 nonexistent=nonexistent, 

992 reso=self._reso, 

993 ) 

994 new_dates = new_dates.view(f"M8[{self._unit}]") 

995 dtype = tz_to_dtype(tz, unit=self._unit) 

996 

997 freq = None 

998 if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])): 

999 # we can preserve freq 

1000 # TODO: Also for fixed-offsets 

1001 freq = self.freq 

1002 elif tz is None and self.tz is None: 

1003 # no-op 

1004 freq = self.freq 

1005 return self._simple_new(new_dates, dtype=dtype, freq=freq) 

1006 

1007 # ---------------------------------------------------------------- 

1008 # Conversion Methods - Vectorized analogues of Timestamp methods 

1009 

1010 def to_pydatetime(self) -> npt.NDArray[np.object_]: 

1011 """ 

1012 Return an ndarray of datetime.datetime objects. 

1013 

1014 Returns 

1015 ------- 

1016 datetimes : ndarray[object] 

1017 """ 

1018 return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._reso) 

1019 

1020 def normalize(self) -> DatetimeArray: 

1021 """ 

1022 Convert times to midnight. 

1023 

1024 The time component of the date-time is converted to midnight i.e. 

1025 00:00:00. This is useful in cases, when the time does not matter. 

1026 Length is unaltered. The timezones are unaffected. 

1027 

1028 This method is available on Series with datetime values under 

1029 the ``.dt`` accessor, and directly on Datetime Array/Index. 

1030 

1031 Returns 

1032 ------- 

1033 DatetimeArray, DatetimeIndex or Series 

1034 The same type as the original data. Series will have the same 

1035 name and index. DatetimeIndex will have the same name. 

1036 

1037 See Also 

1038 -------- 

1039 floor : Floor the datetimes to the specified freq. 

1040 ceil : Ceil the datetimes to the specified freq. 

1041 round : Round the datetimes to the specified freq. 

1042 

1043 Examples 

1044 -------- 

1045 >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H', 

1046 ... periods=3, tz='Asia/Calcutta') 

1047 >>> idx 

1048 DatetimeIndex(['2014-08-01 10:00:00+05:30', 

1049 '2014-08-01 11:00:00+05:30', 

1050 '2014-08-01 12:00:00+05:30'], 

1051 dtype='datetime64[ns, Asia/Calcutta]', freq='H') 

1052 >>> idx.normalize() 

1053 DatetimeIndex(['2014-08-01 00:00:00+05:30', 

1054 '2014-08-01 00:00:00+05:30', 

1055 '2014-08-01 00:00:00+05:30'], 

1056 dtype='datetime64[ns, Asia/Calcutta]', freq=None) 

1057 """ 

1058 new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._reso) 

1059 dt64_values = new_values.view(self._ndarray.dtype) 

1060 

1061 dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype) 

1062 dta = dta._with_freq("infer") 

1063 if self.tz is not None: 

1064 dta = dta.tz_localize(self.tz) 

1065 return dta 

1066 

1067 def to_period(self, freq=None) -> PeriodArray: 

1068 """ 

1069 Cast to PeriodArray/Index at a particular frequency. 

1070 

1071 Converts DatetimeArray/Index to PeriodArray/Index. 

1072 

1073 Parameters 

1074 ---------- 

1075 freq : str or Offset, optional 

1076 One of pandas' :ref:`offset strings <timeseries.offset_aliases>` 

1077 or an Offset object. Will be inferred by default. 

1078 

1079 Returns 

1080 ------- 

1081 PeriodArray/Index 

1082 

1083 Raises 

1084 ------ 

1085 ValueError 

1086 When converting a DatetimeArray/Index with non-regular values, 

1087 so that a frequency cannot be inferred. 

1088 

1089 See Also 

1090 -------- 

1091 PeriodIndex: Immutable ndarray holding ordinal values. 

1092 DatetimeIndex.to_pydatetime: Return DatetimeIndex as object. 

1093 

1094 Examples 

1095 -------- 

1096 >>> df = pd.DataFrame({"y": [1, 2, 3]}, 

1097 ... index=pd.to_datetime(["2000-03-31 00:00:00", 

1098 ... "2000-05-31 00:00:00", 

1099 ... "2000-08-31 00:00:00"])) 

1100 >>> df.index.to_period("M") 

1101 PeriodIndex(['2000-03', '2000-05', '2000-08'], 

1102 dtype='period[M]') 

1103 

1104 Infer the daily frequency 

1105 

1106 >>> idx = pd.date_range("2017-01-01", periods=2) 

1107 >>> idx.to_period() 

1108 PeriodIndex(['2017-01-01', '2017-01-02'], 

1109 dtype='period[D]') 

1110 """ 

1111 from pandas.core.arrays import PeriodArray 

1112 

1113 if self.tz is not None: 

1114 warnings.warn( 

1115 "Converting to PeriodArray/Index representation " 

1116 "will drop timezone information.", 

1117 UserWarning, 

1118 stacklevel=find_stack_level(), 

1119 ) 

1120 

1121 if freq is None: 

1122 freq = self.freqstr or self.inferred_freq 

1123 

1124 if freq is None: 

1125 raise ValueError( 

1126 "You must pass a freq argument as current index has none." 

1127 ) 

1128 

1129 res = get_period_alias(freq) 

1130 

1131 # https://github.com/pandas-dev/pandas/issues/33358 

1132 if res is None: 

1133 res = freq 

1134 

1135 freq = res 

1136 

1137 return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz) 

1138 

1139 def to_perioddelta(self, freq) -> TimedeltaArray: 

1140 """ 

1141 Calculate deltas between self values and self converted to Periods at a freq. 

1142 

1143 Used for vectorized offsets. 

1144 

1145 Parameters 

1146 ---------- 

1147 freq : Period frequency 

1148 

1149 Returns 

1150 ------- 

1151 TimedeltaArray/Index 

1152 """ 

1153 # Deprecaation GH#34853 

1154 warnings.warn( 

1155 "to_perioddelta is deprecated and will be removed in a " 

1156 "future version. " 

1157 "Use `dtindex - dtindex.to_period(freq).to_timestamp()` instead.", 

1158 FutureWarning, 

1159 # stacklevel chosen to be correct for when called from DatetimeIndex 

1160 stacklevel=find_stack_level(), 

1161 ) 

1162 from pandas.core.arrays.timedeltas import TimedeltaArray 

1163 

1164 if self._ndarray.dtype != "M8[ns]": 

1165 raise NotImplementedError("Only supported for nanosecond resolution.") 

1166 

1167 i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8 

1168 m8delta = i8delta.view("m8[ns]") 

1169 return TimedeltaArray(m8delta) 

1170 

1171 # ----------------------------------------------------------------- 

1172 # Properties - Vectorized Timestamp Properties/Methods 

1173 

1174 def month_name(self, locale=None) -> npt.NDArray[np.object_]: 

1175 """ 

1176 Return the month names with specified locale. 

1177 

1178 Parameters 

1179 ---------- 

1180 locale : str, optional 

1181 Locale determining the language in which to return the month name. 

1182 Default is English locale. 

1183 

1184 Returns 

1185 ------- 

1186 Series or Index 

1187 Series or Index of month names. 

1188 

1189 Examples 

1190 -------- 

1191 >>> s = pd.Series(pd.date_range(start='2018-01', freq='M', periods=3)) 

1192 >>> s 

1193 0 2018-01-31 

1194 1 2018-02-28 

1195 2 2018-03-31 

1196 dtype: datetime64[ns] 

1197 >>> s.dt.month_name() 

1198 0 January 

1199 1 February 

1200 2 March 

1201 dtype: object 

1202 

1203 >>> idx = pd.date_range(start='2018-01', freq='M', periods=3) 

1204 >>> idx 

1205 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], 

1206 dtype='datetime64[ns]', freq='M') 

1207 >>> idx.month_name() 

1208 Index(['January', 'February', 'March'], dtype='object') 

1209 """ 

1210 values = self._local_timestamps() 

1211 

1212 result = fields.get_date_name_field( 

1213 values, "month_name", locale=locale, reso=self._reso 

1214 ) 

1215 result = self._maybe_mask_results(result, fill_value=None) 

1216 return result 

1217 

1218 def day_name(self, locale=None) -> npt.NDArray[np.object_]: 

1219 """ 

1220 Return the day names with specified locale. 

1221 

1222 Parameters 

1223 ---------- 

1224 locale : str, optional 

1225 Locale determining the language in which to return the day name. 

1226 Default is English locale. 

1227 

1228 Returns 

1229 ------- 

1230 Series or Index 

1231 Series or Index of day names. 

1232 

1233 Examples 

1234 -------- 

1235 >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3)) 

1236 >>> s 

1237 0 2018-01-01 

1238 1 2018-01-02 

1239 2 2018-01-03 

1240 dtype: datetime64[ns] 

1241 >>> s.dt.day_name() 

1242 0 Monday 

1243 1 Tuesday 

1244 2 Wednesday 

1245 dtype: object 

1246 

1247 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3) 

1248 >>> idx 

1249 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], 

1250 dtype='datetime64[ns]', freq='D') 

1251 >>> idx.day_name() 

1252 Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object') 

1253 """ 

1254 values = self._local_timestamps() 

1255 

1256 result = fields.get_date_name_field( 

1257 values, "day_name", locale=locale, reso=self._reso 

1258 ) 

1259 result = self._maybe_mask_results(result, fill_value=None) 

1260 return result 

1261 

1262 @property 

1263 def time(self) -> npt.NDArray[np.object_]: 

1264 """ 

1265 Returns numpy array of :class:`datetime.time` objects. 

1266 

1267 The time part of the Timestamps. 

1268 """ 

1269 # If the Timestamps have a timezone that is not UTC, 

1270 # convert them into their i8 representation while 

1271 # keeping their timezone and not using UTC 

1272 timestamps = self._local_timestamps() 

1273 

1274 return ints_to_pydatetime(timestamps, box="time", reso=self._reso) 

1275 

1276 @property 

1277 def timetz(self) -> npt.NDArray[np.object_]: 

1278 """ 

1279 Returns numpy array of :class:`datetime.time` objects with timezones. 

1280 

1281 The time part of the Timestamps. 

1282 """ 

1283 return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._reso) 

1284 

1285 @property 

1286 def date(self) -> npt.NDArray[np.object_]: 

1287 """ 

1288 Returns numpy array of python :class:`datetime.date` objects. 

1289 

1290 Namely, the date part of Timestamps without time and 

1291 timezone information. 

1292 """ 

1293 # If the Timestamps have a timezone that is not UTC, 

1294 # convert them into their i8 representation while 

1295 # keeping their timezone and not using UTC 

1296 timestamps = self._local_timestamps() 

1297 

1298 return ints_to_pydatetime(timestamps, box="date", reso=self._reso) 

1299 

1300 def isocalendar(self) -> DataFrame: 

1301 """ 

1302 Calculate year, week, and day according to the ISO 8601 standard. 

1303 

1304 .. versionadded:: 1.1.0 

1305 

1306 Returns 

1307 ------- 

1308 DataFrame 

1309 With columns year, week and day. 

1310 

1311 See Also 

1312 -------- 

1313 Timestamp.isocalendar : Function return a 3-tuple containing ISO year, 

1314 week number, and weekday for the given Timestamp object. 

1315 datetime.date.isocalendar : Return a named tuple object with 

1316 three components: year, week and weekday. 

1317 

1318 Examples 

1319 -------- 

1320 >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4) 

1321 >>> idx.isocalendar() 

1322 year week day 

1323 2019-12-29 2019 52 7 

1324 2019-12-30 2020 1 1 

1325 2019-12-31 2020 1 2 

1326 2020-01-01 2020 1 3 

1327 >>> idx.isocalendar().week 

1328 2019-12-29 52 

1329 2019-12-30 1 

1330 2019-12-31 1 

1331 2020-01-01 1 

1332 Freq: D, Name: week, dtype: UInt32 

1333 """ 

1334 from pandas import DataFrame 

1335 

1336 values = self._local_timestamps() 

1337 sarray = fields.build_isocalendar_sarray(values, reso=self._reso) 

1338 iso_calendar_df = DataFrame( 

1339 sarray, columns=["year", "week", "day"], dtype="UInt32" 

1340 ) 

1341 if self._hasna: 

1342 iso_calendar_df.iloc[self._isnan] = None 

1343 return iso_calendar_df 

1344 

1345 @property 

1346 def weekofyear(self): 

1347 """ 

1348 The week ordinal of the year. 

1349 

1350 .. deprecated:: 1.1.0 

1351 

1352 weekofyear and week have been deprecated. 

1353 Please use DatetimeIndex.isocalendar().week instead. 

1354 """ 

1355 warnings.warn( 

1356 "weekofyear and week have been deprecated, please use " 

1357 "DatetimeIndex.isocalendar().week instead, which returns " 

1358 "a Series. To exactly reproduce the behavior of week and " 

1359 "weekofyear and return an Index, you may call " 

1360 "pd.Int64Index(idx.isocalendar().week)", 

1361 FutureWarning, 

1362 stacklevel=find_stack_level(), 

1363 ) 

1364 week_series = self.isocalendar().week 

1365 if week_series.hasnans: 

1366 return week_series.to_numpy(dtype="float64", na_value=np.nan) 

1367 return week_series.to_numpy(dtype="int64") 

1368 

1369 week = weekofyear 

1370 

1371 year = _field_accessor( 

1372 "year", 

1373 "Y", 

1374 """ 

1375 The year of the datetime. 

1376 

1377 Examples 

1378 -------- 

1379 >>> datetime_series = pd.Series( 

1380 ... pd.date_range("2000-01-01", periods=3, freq="Y") 

1381 ... ) 

1382 >>> datetime_series 

1383 0 2000-12-31 

1384 1 2001-12-31 

1385 2 2002-12-31 

1386 dtype: datetime64[ns] 

1387 >>> datetime_series.dt.year 

1388 0 2000 

1389 1 2001 

1390 2 2002 

1391 dtype: int64 

1392 """, 

1393 ) 

1394 month = _field_accessor( 

1395 "month", 

1396 "M", 

1397 """ 

1398 The month as January=1, December=12. 

1399 

1400 Examples 

1401 -------- 

1402 >>> datetime_series = pd.Series( 

1403 ... pd.date_range("2000-01-01", periods=3, freq="M") 

1404 ... ) 

1405 >>> datetime_series 

1406 0 2000-01-31 

1407 1 2000-02-29 

1408 2 2000-03-31 

1409 dtype: datetime64[ns] 

1410 >>> datetime_series.dt.month 

1411 0 1 

1412 1 2 

1413 2 3 

1414 dtype: int64 

1415 """, 

1416 ) 

1417 day = _field_accessor( 

1418 "day", 

1419 "D", 

1420 """ 

1421 The day of the datetime. 

1422 

1423 Examples 

1424 -------- 

1425 >>> datetime_series = pd.Series( 

1426 ... pd.date_range("2000-01-01", periods=3, freq="D") 

1427 ... ) 

1428 >>> datetime_series 

1429 0 2000-01-01 

1430 1 2000-01-02 

1431 2 2000-01-03 

1432 dtype: datetime64[ns] 

1433 >>> datetime_series.dt.day 

1434 0 1 

1435 1 2 

1436 2 3 

1437 dtype: int64 

1438 """, 

1439 ) 

1440 hour = _field_accessor( 

1441 "hour", 

1442 "h", 

1443 """ 

1444 The hours of the datetime. 

1445 

1446 Examples 

1447 -------- 

1448 >>> datetime_series = pd.Series( 

1449 ... pd.date_range("2000-01-01", periods=3, freq="h") 

1450 ... ) 

1451 >>> datetime_series 

1452 0 2000-01-01 00:00:00 

1453 1 2000-01-01 01:00:00 

1454 2 2000-01-01 02:00:00 

1455 dtype: datetime64[ns] 

1456 >>> datetime_series.dt.hour 

1457 0 0 

1458 1 1 

1459 2 2 

1460 dtype: int64 

1461 """, 

1462 ) 

1463 minute = _field_accessor( 

1464 "minute", 

1465 "m", 

1466 """ 

1467 The minutes of the datetime. 

1468 

1469 Examples 

1470 -------- 

1471 >>> datetime_series = pd.Series( 

1472 ... pd.date_range("2000-01-01", periods=3, freq="T") 

1473 ... ) 

1474 >>> datetime_series 

1475 0 2000-01-01 00:00:00 

1476 1 2000-01-01 00:01:00 

1477 2 2000-01-01 00:02:00 

1478 dtype: datetime64[ns] 

1479 >>> datetime_series.dt.minute 

1480 0 0 

1481 1 1 

1482 2 2 

1483 dtype: int64 

1484 """, 

1485 ) 

1486 second = _field_accessor( 

1487 "second", 

1488 "s", 

1489 """ 

1490 The seconds of the datetime. 

1491 

1492 Examples 

1493 -------- 

1494 >>> datetime_series = pd.Series( 

1495 ... pd.date_range("2000-01-01", periods=3, freq="s") 

1496 ... ) 

1497 >>> datetime_series 

1498 0 2000-01-01 00:00:00 

1499 1 2000-01-01 00:00:01 

1500 2 2000-01-01 00:00:02 

1501 dtype: datetime64[ns] 

1502 >>> datetime_series.dt.second 

1503 0 0 

1504 1 1 

1505 2 2 

1506 dtype: int64 

1507 """, 

1508 ) 

1509 microsecond = _field_accessor( 

1510 "microsecond", 

1511 "us", 

1512 """ 

1513 The microseconds of the datetime. 

1514 

1515 Examples 

1516 -------- 

1517 >>> datetime_series = pd.Series( 

1518 ... pd.date_range("2000-01-01", periods=3, freq="us") 

1519 ... ) 

1520 >>> datetime_series 

1521 0 2000-01-01 00:00:00.000000 

1522 1 2000-01-01 00:00:00.000001 

1523 2 2000-01-01 00:00:00.000002 

1524 dtype: datetime64[ns] 

1525 >>> datetime_series.dt.microsecond 

1526 0 0 

1527 1 1 

1528 2 2 

1529 dtype: int64 

1530 """, 

1531 ) 

1532 nanosecond = _field_accessor( 

1533 "nanosecond", 

1534 "ns", 

1535 """ 

1536 The nanoseconds of the datetime. 

1537 

1538 Examples 

1539 -------- 

1540 >>> datetime_series = pd.Series( 

1541 ... pd.date_range("2000-01-01", periods=3, freq="ns") 

1542 ... ) 

1543 >>> datetime_series 

1544 0 2000-01-01 00:00:00.000000000 

1545 1 2000-01-01 00:00:00.000000001 

1546 2 2000-01-01 00:00:00.000000002 

1547 dtype: datetime64[ns] 

1548 >>> datetime_series.dt.nanosecond 

1549 0 0 

1550 1 1 

1551 2 2 

1552 dtype: int64 

1553 """, 

1554 ) 

1555 _dayofweek_doc = """ 

1556 The day of the week with Monday=0, Sunday=6. 

1557 

1558 Return the day of the week. It is assumed the week starts on 

1559 Monday, which is denoted by 0 and ends on Sunday which is denoted 

1560 by 6. This method is available on both Series with datetime 

1561 values (using the `dt` accessor) or DatetimeIndex. 

1562 

1563 Returns 

1564 ------- 

1565 Series or Index 

1566 Containing integers indicating the day number. 

1567 

1568 See Also 

1569 -------- 

1570 Series.dt.dayofweek : Alias. 

1571 Series.dt.weekday : Alias. 

1572 Series.dt.day_name : Returns the name of the day of the week. 

1573 

1574 Examples 

1575 -------- 

1576 >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series() 

1577 >>> s.dt.dayofweek 

1578 2016-12-31 5 

1579 2017-01-01 6 

1580 2017-01-02 0 

1581 2017-01-03 1 

1582 2017-01-04 2 

1583 2017-01-05 3 

1584 2017-01-06 4 

1585 2017-01-07 5 

1586 2017-01-08 6 

1587 Freq: D, dtype: int64 

1588 """ 

1589 day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc) 

1590 dayofweek = day_of_week 

1591 weekday = day_of_week 

1592 

1593 day_of_year = _field_accessor( 

1594 "dayofyear", 

1595 "doy", 

1596 """ 

1597 The ordinal day of the year. 

1598 """, 

1599 ) 

1600 dayofyear = day_of_year 

1601 quarter = _field_accessor( 

1602 "quarter", 

1603 "q", 

1604 """ 

1605 The quarter of the date. 

1606 """, 

1607 ) 

1608 days_in_month = _field_accessor( 

1609 "days_in_month", 

1610 "dim", 

1611 """ 

1612 The number of days in the month. 

1613 """, 

1614 ) 

1615 daysinmonth = days_in_month 

1616 _is_month_doc = """ 

1617 Indicates whether the date is the {first_or_last} day of the month. 

1618 

1619 Returns 

1620 ------- 

1621 Series or array 

1622 For Series, returns a Series with boolean values. 

1623 For DatetimeIndex, returns a boolean array. 

1624 

1625 See Also 

1626 -------- 

1627 is_month_start : Return a boolean indicating whether the date 

1628 is the first day of the month. 

1629 is_month_end : Return a boolean indicating whether the date 

1630 is the last day of the month. 

1631 

1632 Examples 

1633 -------- 

1634 This method is available on Series with datetime values under 

1635 the ``.dt`` accessor, and directly on DatetimeIndex. 

1636 

1637 >>> s = pd.Series(pd.date_range("2018-02-27", periods=3)) 

1638 >>> s 

1639 0 2018-02-27 

1640 1 2018-02-28 

1641 2 2018-03-01 

1642 dtype: datetime64[ns] 

1643 >>> s.dt.is_month_start 

1644 0 False 

1645 1 False 

1646 2 True 

1647 dtype: bool 

1648 >>> s.dt.is_month_end 

1649 0 False 

1650 1 True 

1651 2 False 

1652 dtype: bool 

1653 

1654 >>> idx = pd.date_range("2018-02-27", periods=3) 

1655 >>> idx.is_month_start 

1656 array([False, False, True]) 

1657 >>> idx.is_month_end 

1658 array([False, True, False]) 

1659 """ 

1660 is_month_start = _field_accessor( 

1661 "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first") 

1662 ) 

1663 

1664 is_month_end = _field_accessor( 

1665 "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last") 

1666 ) 

1667 

1668 is_quarter_start = _field_accessor( 

1669 "is_quarter_start", 

1670 "is_quarter_start", 

1671 """ 

1672 Indicator for whether the date is the first day of a quarter. 

1673 

1674 Returns 

1675 ------- 

1676 is_quarter_start : Series or DatetimeIndex 

1677 The same type as the original data with boolean values. Series will 

1678 have the same name and index. DatetimeIndex will have the same 

1679 name. 

1680 

1681 See Also 

1682 -------- 

1683 quarter : Return the quarter of the date. 

1684 is_quarter_end : Similar property for indicating the quarter start. 

1685 

1686 Examples 

1687 -------- 

1688 This method is available on Series with datetime values under 

1689 the ``.dt`` accessor, and directly on DatetimeIndex. 

1690 

1691 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30", 

1692 ... periods=4)}) 

1693 >>> df.assign(quarter=df.dates.dt.quarter, 

1694 ... is_quarter_start=df.dates.dt.is_quarter_start) 

1695 dates quarter is_quarter_start 

1696 0 2017-03-30 1 False 

1697 1 2017-03-31 1 False 

1698 2 2017-04-01 2 True 

1699 3 2017-04-02 2 False 

1700 

1701 >>> idx = pd.date_range('2017-03-30', periods=4) 

1702 >>> idx 

1703 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'], 

1704 dtype='datetime64[ns]', freq='D') 

1705 

1706 >>> idx.is_quarter_start 

1707 array([False, False, True, False]) 

1708 """, 

1709 ) 

1710 is_quarter_end = _field_accessor( 

1711 "is_quarter_end", 

1712 "is_quarter_end", 

1713 """ 

1714 Indicator for whether the date is the last day of a quarter. 

1715 

1716 Returns 

1717 ------- 

1718 is_quarter_end : Series or DatetimeIndex 

1719 The same type as the original data with boolean values. Series will 

1720 have the same name and index. DatetimeIndex will have the same 

1721 name. 

1722 

1723 See Also 

1724 -------- 

1725 quarter : Return the quarter of the date. 

1726 is_quarter_start : Similar property indicating the quarter start. 

1727 

1728 Examples 

1729 -------- 

1730 This method is available on Series with datetime values under 

1731 the ``.dt`` accessor, and directly on DatetimeIndex. 

1732 

1733 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30", 

1734 ... periods=4)}) 

1735 >>> df.assign(quarter=df.dates.dt.quarter, 

1736 ... is_quarter_end=df.dates.dt.is_quarter_end) 

1737 dates quarter is_quarter_end 

1738 0 2017-03-30 1 False 

1739 1 2017-03-31 1 True 

1740 2 2017-04-01 2 False 

1741 3 2017-04-02 2 False 

1742 

1743 >>> idx = pd.date_range('2017-03-30', periods=4) 

1744 >>> idx 

1745 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'], 

1746 dtype='datetime64[ns]', freq='D') 

1747 

1748 >>> idx.is_quarter_end 

1749 array([False, True, False, False]) 

1750 """, 

1751 ) 

1752 is_year_start = _field_accessor( 

1753 "is_year_start", 

1754 "is_year_start", 

1755 """ 

1756 Indicate whether the date is the first day of a year. 

1757 

1758 Returns 

1759 ------- 

1760 Series or DatetimeIndex 

1761 The same type as the original data with boolean values. Series will 

1762 have the same name and index. DatetimeIndex will have the same 

1763 name. 

1764 

1765 See Also 

1766 -------- 

1767 is_year_end : Similar property indicating the last day of the year. 

1768 

1769 Examples 

1770 -------- 

1771 This method is available on Series with datetime values under 

1772 the ``.dt`` accessor, and directly on DatetimeIndex. 

1773 

1774 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3)) 

1775 >>> dates 

1776 0 2017-12-30 

1777 1 2017-12-31 

1778 2 2018-01-01 

1779 dtype: datetime64[ns] 

1780 

1781 >>> dates.dt.is_year_start 

1782 0 False 

1783 1 False 

1784 2 True 

1785 dtype: bool 

1786 

1787 >>> idx = pd.date_range("2017-12-30", periods=3) 

1788 >>> idx 

1789 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'], 

1790 dtype='datetime64[ns]', freq='D') 

1791 

1792 >>> idx.is_year_start 

1793 array([False, False, True]) 

1794 """, 

1795 ) 

1796 is_year_end = _field_accessor( 

1797 "is_year_end", 

1798 "is_year_end", 

1799 """ 

1800 Indicate whether the date is the last day of the year. 

1801 

1802 Returns 

1803 ------- 

1804 Series or DatetimeIndex 

1805 The same type as the original data with boolean values. Series will 

1806 have the same name and index. DatetimeIndex will have the same 

1807 name. 

1808 

1809 See Also 

1810 -------- 

1811 is_year_start : Similar property indicating the start of the year. 

1812 

1813 Examples 

1814 -------- 

1815 This method is available on Series with datetime values under 

1816 the ``.dt`` accessor, and directly on DatetimeIndex. 

1817 

1818 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3)) 

1819 >>> dates 

1820 0 2017-12-30 

1821 1 2017-12-31 

1822 2 2018-01-01 

1823 dtype: datetime64[ns] 

1824 

1825 >>> dates.dt.is_year_end 

1826 0 False 

1827 1 True 

1828 2 False 

1829 dtype: bool 

1830 

1831 >>> idx = pd.date_range("2017-12-30", periods=3) 

1832 >>> idx 

1833 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'], 

1834 dtype='datetime64[ns]', freq='D') 

1835 

1836 >>> idx.is_year_end 

1837 array([False, True, False]) 

1838 """, 

1839 ) 

1840 is_leap_year = _field_accessor( 

1841 "is_leap_year", 

1842 "is_leap_year", 

1843 """ 

1844 Boolean indicator if the date belongs to a leap year. 

1845 

1846 A leap year is a year, which has 366 days (instead of 365) including 

1847 29th of February as an intercalary day. 

1848 Leap years are years which are multiples of four with the exception 

1849 of years divisible by 100 but not by 400. 

1850 

1851 Returns 

1852 ------- 

1853 Series or ndarray 

1854 Booleans indicating if dates belong to a leap year. 

1855 

1856 Examples 

1857 -------- 

1858 This method is available on Series with datetime values under 

1859 the ``.dt`` accessor, and directly on DatetimeIndex. 

1860 

1861 >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y") 

1862 >>> idx 

1863 DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'], 

1864 dtype='datetime64[ns]', freq='A-DEC') 

1865 >>> idx.is_leap_year 

1866 array([ True, False, False]) 

1867 

1868 >>> dates_series = pd.Series(idx) 

1869 >>> dates_series 

1870 0 2012-12-31 

1871 1 2013-12-31 

1872 2 2014-12-31 

1873 dtype: datetime64[ns] 

1874 >>> dates_series.dt.is_leap_year 

1875 0 True 

1876 1 False 

1877 2 False 

1878 dtype: bool 

1879 """, 

1880 ) 

1881 

1882 def to_julian_date(self) -> npt.NDArray[np.float64]: 

1883 """ 

1884 Convert Datetime Array to float64 ndarray of Julian Dates. 

1885 0 Julian date is noon January 1, 4713 BC. 

1886 https://en.wikipedia.org/wiki/Julian_day 

1887 """ 

1888 

1889 # http://mysite.verizon.net/aesir_research/date/jdalg2.htm 

1890 year = np.asarray(self.year) 

1891 month = np.asarray(self.month) 

1892 day = np.asarray(self.day) 

1893 testarr = month < 3 

1894 year[testarr] -= 1 

1895 month[testarr] += 12 

1896 return ( 

1897 day 

1898 + np.fix((153 * month - 457) / 5) 

1899 + 365 * year 

1900 + np.floor(year / 4) 

1901 - np.floor(year / 100) 

1902 + np.floor(year / 400) 

1903 + 1_721_118.5 

1904 + ( 

1905 self.hour 

1906 + self.minute / 60 

1907 + self.second / 3600 

1908 + self.microsecond / 3600 / 10**6 

1909 + self.nanosecond / 3600 / 10**9 

1910 ) 

1911 / 24 

1912 ) 

1913 

1914 # ----------------------------------------------------------------- 

1915 # Reductions 

1916 

1917 def std( 

1918 self, 

1919 axis=None, 

1920 dtype=None, 

1921 out=None, 

1922 ddof: int = 1, 

1923 keepdims: bool = False, 

1924 skipna: bool = True, 

1925 ): 

1926 """ 

1927 Return sample standard deviation over requested axis. 

1928 

1929 Normalized by N-1 by default. This can be changed using the ddof argument 

1930 

1931 Parameters 

1932 ---------- 

1933 axis : int optional, default None 

1934 Axis for the function to be applied on. 

1935 For `Series` this parameter is unused and defaults to `None`. 

1936 ddof : int, default 1 

1937 Degrees of Freedom. The divisor used in calculations is N - ddof, 

1938 where N represents the number of elements. 

1939 skipna : bool, default True 

1940 Exclude NA/null values. If an entire row/column is NA, the result will be 

1941 NA. 

1942 

1943 Returns 

1944 ------- 

1945 Timedelta 

1946 """ 

1947 # Because std is translation-invariant, we can get self.std 

1948 # by calculating (self - Timestamp(0)).std, and we can do it 

1949 # without creating a copy by using a view on self._ndarray 

1950 from pandas.core.arrays import TimedeltaArray 

1951 

1952 # Find the td64 dtype with the same resolution as our dt64 dtype 

1953 dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64") 

1954 dtype = np.dtype(dtype_str) 

1955 

1956 tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype) 

1957 

1958 return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna) 

1959 

1960 

1961# ------------------------------------------------------------------- 

1962# Constructor Helpers 

1963 

1964 

1965def sequence_to_datetimes(data, require_iso8601: bool = False) -> DatetimeArray: 

1966 """ 

1967 Parse/convert the passed data to either DatetimeArray or np.ndarray[object]. 

1968 """ 

1969 result, tz, freq = _sequence_to_dt64ns( 

1970 data, 

1971 allow_mixed=True, 

1972 require_iso8601=require_iso8601, 

1973 ) 

1974 

1975 dtype = tz_to_dtype(tz) 

1976 dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype) 

1977 return dta 

1978 

1979 

1980def _sequence_to_dt64ns( 

1981 data, 

1982 dtype=None, 

1983 copy: bool = False, 

1984 tz=None, 

1985 dayfirst: bool = False, 

1986 yearfirst: bool = False, 

1987 ambiguous="raise", 

1988 *, 

1989 allow_mixed: bool = False, 

1990 require_iso8601: bool = False, 

1991): 

1992 """ 

1993 Parameters 

1994 ---------- 

1995 data : list-like 

1996 dtype : dtype, str, or None, default None 

1997 copy : bool, default False 

1998 tz : tzinfo, str, or None, default None 

1999 dayfirst : bool, default False 

2000 yearfirst : bool, default False 

2001 ambiguous : str, bool, or arraylike, default 'raise' 

2002 See pandas._libs.tslibs.tzconversion.tz_localize_to_utc. 

2003 allow_mixed : bool, default False 

2004 Interpret integers as timestamps when datetime objects are also present. 

2005 require_iso8601 : bool, default False 

2006 Only consider ISO-8601 formats when parsing strings. 

2007 

2008 Returns 

2009 ------- 

2010 result : numpy.ndarray 

2011 The sequence converted to a numpy array with dtype ``datetime64[ns]``. 

2012 tz : tzinfo or None 

2013 Either the user-provided tzinfo or one inferred from the data. 

2014 inferred_freq : Tick or None 

2015 The inferred frequency of the sequence. 

2016 

2017 Raises 

2018 ------ 

2019 TypeError : PeriodDType data is passed 

2020 """ 

2021 

2022 inferred_freq = None 

2023 

2024 dtype = _validate_dt64_dtype(dtype) 

2025 tz = timezones.maybe_get_tz(tz) 

2026 

2027 # if dtype has an embedded tz, capture it 

2028 tz = validate_tz_from_dtype(dtype, tz) 

2029 

2030 data, copy = dtl.ensure_arraylike_for_datetimelike( 

2031 data, copy, cls_name="DatetimeArray" 

2032 ) 

2033 

2034 if isinstance(data, DatetimeArray): 

2035 inferred_freq = data.freq 

2036 

2037 # By this point we are assured to have either a numpy array or Index 

2038 data, copy = maybe_convert_dtype(data, copy, tz=tz) 

2039 data_dtype = getattr(data, "dtype", None) 

2040 

2041 if ( 

2042 is_object_dtype(data_dtype) 

2043 or is_string_dtype(data_dtype) 

2044 or is_sparse(data_dtype) 

2045 ): 

2046 # TODO: We do not have tests specific to string-dtypes, 

2047 # also complex or categorical or other extension 

2048 copy = False 

2049 if lib.infer_dtype(data, skipna=False) == "integer": 

2050 data = data.astype(np.int64) 

2051 else: 

2052 # data comes back here as either i8 to denote UTC timestamps 

2053 # or M8[ns] to denote wall times 

2054 data, inferred_tz = objects_to_datetime64ns( 

2055 data, 

2056 dayfirst=dayfirst, 

2057 yearfirst=yearfirst, 

2058 allow_object=False, 

2059 allow_mixed=allow_mixed, 

2060 require_iso8601=require_iso8601, 

2061 ) 

2062 if tz and inferred_tz: 

2063 # two timezones: convert to intended from base UTC repr 

2064 if data.dtype == "i8": 

2065 # GH#42505 

2066 # by convention, these are _already_ UTC, e.g 

2067 return data.view(DT64NS_DTYPE), tz, None 

2068 

2069 if timezones.is_utc(tz): 

2070 # Fastpath, avoid copy made in tzconversion 

2071 utc_vals = data.view("i8") 

2072 else: 

2073 utc_vals = tz_convert_from_utc(data.view("i8"), tz) 

2074 data = utc_vals.view(DT64NS_DTYPE) 

2075 elif inferred_tz: 

2076 tz = inferred_tz 

2077 

2078 data_dtype = data.dtype 

2079 

2080 # `data` may have originally been a Categorical[datetime64[ns, tz]], 

2081 # so we need to handle these types. 

2082 if is_datetime64tz_dtype(data_dtype): 

2083 # DatetimeArray -> ndarray 

2084 tz = _maybe_infer_tz(tz, data.tz) 

2085 result = data._ndarray 

2086 

2087 elif is_datetime64_dtype(data_dtype): 

2088 # tz-naive DatetimeArray or ndarray[datetime64] 

2089 data = getattr(data, "_ndarray", data) 

2090 if data.dtype != DT64NS_DTYPE: 

2091 data = astype_overflowsafe(data, dtype=DT64NS_DTYPE) 

2092 copy = False 

2093 

2094 if tz is not None: 

2095 # Convert tz-naive to UTC 

2096 tz = timezones.maybe_get_tz(tz) 

2097 # TODO: if tz is UTC, are there situations where we *don't* want a 

2098 # copy? tz_localize_to_utc always makes one. 

2099 data = tzconversion.tz_localize_to_utc( 

2100 data.view("i8"), tz, ambiguous=ambiguous 

2101 ) 

2102 data = data.view(DT64NS_DTYPE) 

2103 

2104 assert data.dtype == DT64NS_DTYPE, data.dtype 

2105 result = data 

2106 

2107 else: 

2108 # must be integer dtype otherwise 

2109 # assume this data are epoch timestamps 

2110 if tz: 

2111 tz = timezones.maybe_get_tz(tz) 

2112 

2113 if data.dtype != INT64_DTYPE: 

2114 data = data.astype(np.int64, copy=False) 

2115 result = data.view(DT64NS_DTYPE) 

2116 

2117 if copy: 

2118 result = result.copy() 

2119 

2120 assert isinstance(result, np.ndarray), type(result) 

2121 assert result.dtype == "M8[ns]", result.dtype 

2122 

2123 # We have to call this again after possibly inferring a tz above 

2124 validate_tz_from_dtype(dtype, tz) 

2125 

2126 return result, tz, inferred_freq 

2127 

2128 

2129def objects_to_datetime64ns( 

2130 data: np.ndarray, 

2131 dayfirst, 

2132 yearfirst, 

2133 utc=False, 

2134 errors="raise", 

2135 require_iso8601: bool = False, 

2136 allow_object: bool = False, 

2137 allow_mixed: bool = False, 

2138): 

2139 """ 

2140 Convert data to array of timestamps. 

2141 

2142 Parameters 

2143 ---------- 

2144 data : np.ndarray[object] 

2145 dayfirst : bool 

2146 yearfirst : bool 

2147 utc : bool, default False 

2148 Whether to convert timezone-aware timestamps to UTC. 

2149 errors : {'raise', 'ignore', 'coerce'} 

2150 require_iso8601 : bool, default False 

2151 allow_object : bool 

2152 Whether to return an object-dtype ndarray instead of raising if the 

2153 data contains more than one timezone. 

2154 allow_mixed : bool, default False 

2155 Interpret integers as timestamps when datetime objects are also present. 

2156 

2157 Returns 

2158 ------- 

2159 result : ndarray 

2160 np.int64 dtype if returned values represent UTC timestamps 

2161 np.datetime64[ns] if returned values represent wall times 

2162 object if mixed timezones 

2163 inferred_tz : tzinfo or None 

2164 

2165 Raises 

2166 ------ 

2167 ValueError : if data cannot be converted to datetimes 

2168 """ 

2169 assert errors in ["raise", "ignore", "coerce"] 

2170 

2171 # if str-dtype, convert 

2172 data = np.array(data, copy=False, dtype=np.object_) 

2173 

2174 flags = data.flags 

2175 order: Literal["F", "C"] = "F" if flags.f_contiguous else "C" 

2176 try: 

2177 result, tz_parsed = tslib.array_to_datetime( 

2178 data.ravel("K"), 

2179 errors=errors, 

2180 utc=utc, 

2181 dayfirst=dayfirst, 

2182 yearfirst=yearfirst, 

2183 require_iso8601=require_iso8601, 

2184 allow_mixed=allow_mixed, 

2185 ) 

2186 result = result.reshape(data.shape, order=order) 

2187 except OverflowError as err: 

2188 # Exception is raised when a part of date is greater than 32 bit signed int 

2189 raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err 

2190 

2191 if tz_parsed is not None: 

2192 # We can take a shortcut since the datetime64 numpy array 

2193 # is in UTC 

2194 # Return i8 values to denote unix timestamps 

2195 return result.view("i8"), tz_parsed 

2196 elif is_datetime64_dtype(result): 

2197 # returning M8[ns] denotes wall-times; since tz is None 

2198 # the distinction is a thin one 

2199 return result, tz_parsed 

2200 elif is_object_dtype(result): 

2201 # GH#23675 when called via `pd.to_datetime`, returning an object-dtype 

2202 # array is allowed. When called via `pd.DatetimeIndex`, we can 

2203 # only accept datetime64 dtype, so raise TypeError if object-dtype 

2204 # is returned, as that indicates the values can be recognized as 

2205 # datetimes but they have conflicting timezones/awareness 

2206 if allow_object: 

2207 return result, tz_parsed 

2208 raise TypeError(result) 

2209 else: # pragma: no cover 

2210 # GH#23675 this TypeError should never be hit, whereas the TypeError 

2211 # in the object-dtype branch above is reachable. 

2212 raise TypeError(result) 

2213 

2214 

2215def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): 

2216 """ 

2217 Convert data based on dtype conventions, issuing deprecation warnings 

2218 or errors where appropriate. 

2219 

2220 Parameters 

2221 ---------- 

2222 data : np.ndarray or pd.Index 

2223 copy : bool 

2224 tz : tzinfo or None, default None 

2225 

2226 Returns 

2227 ------- 

2228 data : np.ndarray or pd.Index 

2229 copy : bool 

2230 

2231 Raises 

2232 ------ 

2233 TypeError : PeriodDType data is passed 

2234 """ 

2235 if not hasattr(data, "dtype"): 

2236 # e.g. collections.deque 

2237 return data, copy 

2238 

2239 if is_float_dtype(data.dtype): 

2240 # Note: we must cast to datetime64[ns] here in order to treat these 

2241 # as wall-times instead of UTC timestamps. 

2242 data = data.astype(DT64NS_DTYPE) 

2243 copy = False 

2244 if ( 

2245 tz is not None 

2246 and len(data) > 0 

2247 and not timezones.is_utc(timezones.maybe_get_tz(tz)) 

2248 ): 

2249 # GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes 

2250 warnings.warn( 

2251 "The behavior of DatetimeArray._from_sequence with a timezone-aware " 

2252 "dtype and floating-dtype data is deprecated. In a future version, " 

2253 "this data will be interpreted as nanosecond UTC timestamps " 

2254 "instead of wall-times, matching the behavior with integer dtypes. " 

2255 "To retain the old behavior, explicitly cast to 'datetime64[ns]' " 

2256 "before passing the data to pandas. To get the future behavior, " 

2257 "first cast to 'int64'.", 

2258 FutureWarning, 

2259 stacklevel=find_stack_level(), 

2260 ) 

2261 

2262 elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): 

2263 # GH#29794 enforcing deprecation introduced in GH#23539 

2264 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]") 

2265 elif is_period_dtype(data.dtype): 

2266 # Note: without explicitly raising here, PeriodIndex 

2267 # test_setops.test_join_does_not_recur fails 

2268 raise TypeError( 

2269 "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead" 

2270 ) 

2271 

2272 elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype): 

2273 # TODO: We have no tests for these 

2274 data = np.array(data, dtype=np.object_) 

2275 copy = False 

2276 

2277 return data, copy 

2278 

2279 

2280# ------------------------------------------------------------------- 

2281# Validation and Inference 

2282 

2283 

2284def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None: 

2285 """ 

2286 If a timezone is inferred from data, check that it is compatible with 

2287 the user-provided timezone, if any. 

2288 

2289 Parameters 

2290 ---------- 

2291 tz : tzinfo or None 

2292 inferred_tz : tzinfo or None 

2293 

2294 Returns 

2295 ------- 

2296 tz : tzinfo or None 

2297 

2298 Raises 

2299 ------ 

2300 TypeError : if both timezones are present but do not match 

2301 """ 

2302 if tz is None: 

2303 tz = inferred_tz 

2304 elif inferred_tz is None: 

2305 pass 

2306 elif not timezones.tz_compare(tz, inferred_tz): 

2307 raise TypeError( 

2308 f"data is already tz-aware {inferred_tz}, unable to " 

2309 f"set specified tz: {tz}" 

2310 ) 

2311 return tz 

2312 

2313 

2314def _validate_dt64_dtype(dtype): 

2315 """ 

2316 Check that a dtype, if passed, represents either a numpy datetime64[ns] 

2317 dtype or a pandas DatetimeTZDtype. 

2318 

2319 Parameters 

2320 ---------- 

2321 dtype : object 

2322 

2323 Returns 

2324 ------- 

2325 dtype : None, numpy.dtype, or DatetimeTZDtype 

2326 

2327 Raises 

2328 ------ 

2329 ValueError : invalid dtype 

2330 

2331 Notes 

2332 ----- 

2333 Unlike validate_tz_from_dtype, this does _not_ allow non-existent 

2334 tz errors to go through 

2335 """ 

2336 if dtype is not None: 

2337 dtype = pandas_dtype(dtype) 

2338 if is_dtype_equal(dtype, np.dtype("M8")): 

2339 # no precision, disallowed GH#24806 

2340 msg = ( 

2341 "Passing in 'datetime64' dtype with no precision is not allowed. " 

2342 "Please pass in 'datetime64[ns]' instead." 

2343 ) 

2344 raise ValueError(msg) 

2345 

2346 if (isinstance(dtype, np.dtype) and dtype != DT64NS_DTYPE) or not isinstance( 

2347 dtype, (np.dtype, DatetimeTZDtype) 

2348 ): 

2349 raise ValueError( 

2350 f"Unexpected value for 'dtype': '{dtype}'. " 

2351 "Must be 'datetime64[ns]' or DatetimeTZDtype'." 

2352 ) 

2353 

2354 if getattr(dtype, "tz", None): 

2355 # https://github.com/pandas-dev/pandas/issues/18595 

2356 # Ensure that we have a standard timezone for pytz objects. 

2357 # Without this, things like adding an array of timedeltas and 

2358 # a tz-aware Timestamp (with a tz specific to its datetime) will 

2359 # be incorrect(ish?) for the array as a whole 

2360 dtype = cast(DatetimeTZDtype, dtype) 

2361 dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) 

2362 

2363 return dtype 

2364 

2365 

2366def validate_tz_from_dtype(dtype, tz: tzinfo | None) -> tzinfo | None: 

2367 """ 

2368 If the given dtype is a DatetimeTZDtype, extract the implied 

2369 tzinfo object from it and check that it does not conflict with the given 

2370 tz. 

2371 

2372 Parameters 

2373 ---------- 

2374 dtype : dtype, str 

2375 tz : None, tzinfo 

2376 

2377 Returns 

2378 ------- 

2379 tz : consensus tzinfo 

2380 

2381 Raises 

2382 ------ 

2383 ValueError : on tzinfo mismatch 

2384 """ 

2385 if dtype is not None: 

2386 if isinstance(dtype, str): 

2387 try: 

2388 dtype = DatetimeTZDtype.construct_from_string(dtype) 

2389 except TypeError: 

2390 # Things like `datetime64[ns]`, which is OK for the 

2391 # constructors, but also nonsense, which should be validated 

2392 # but not by us. We *do* allow non-existent tz errors to 

2393 # go through 

2394 pass 

2395 dtz = getattr(dtype, "tz", None) 

2396 if dtz is not None: 

2397 if tz is not None and not timezones.tz_compare(tz, dtz): 

2398 raise ValueError("cannot supply both a tz and a dtype with a tz") 

2399 tz = dtz 

2400 

2401 if tz is not None and is_datetime64_dtype(dtype): 

2402 # We also need to check for the case where the user passed a 

2403 # tz-naive dtype (i.e. datetime64[ns]) 

2404 if tz is not None and not timezones.tz_compare(tz, dtz): 

2405 raise ValueError( 

2406 "cannot supply both a tz and a " 

2407 "timezone-naive dtype (i.e. datetime64[ns])" 

2408 ) 

2409 

2410 return tz 

2411 

2412 

2413def _infer_tz_from_endpoints( 

2414 start: Timestamp, end: Timestamp, tz: tzinfo | None 

2415) -> tzinfo | None: 

2416 """ 

2417 If a timezone is not explicitly given via `tz`, see if one can 

2418 be inferred from the `start` and `end` endpoints. If more than one 

2419 of these inputs provides a timezone, require that they all agree. 

2420 

2421 Parameters 

2422 ---------- 

2423 start : Timestamp 

2424 end : Timestamp 

2425 tz : tzinfo or None 

2426 

2427 Returns 

2428 ------- 

2429 tz : tzinfo or None 

2430 

2431 Raises 

2432 ------ 

2433 TypeError : if start and end timezones do not agree 

2434 """ 

2435 try: 

2436 inferred_tz = timezones.infer_tzinfo(start, end) 

2437 except AssertionError as err: 

2438 # infer_tzinfo raises AssertionError if passed mismatched timezones 

2439 raise TypeError( 

2440 "Start and end cannot both be tz-aware with different timezones" 

2441 ) from err 

2442 

2443 inferred_tz = timezones.maybe_get_tz(inferred_tz) 

2444 tz = timezones.maybe_get_tz(tz) 

2445 

2446 if tz is not None and inferred_tz is not None: 

2447 if not timezones.tz_compare(inferred_tz, tz): 

2448 raise AssertionError("Inferred time zone not equal to passed time zone") 

2449 

2450 elif inferred_tz is not None: 

2451 tz = inferred_tz 

2452 

2453 return tz 

2454 

2455 

2456def _maybe_normalize_endpoints( 

2457 start: Timestamp | None, end: Timestamp | None, normalize: bool 

2458): 

2459 _normalized = True 

2460 

2461 if start is not None: 

2462 if normalize: 

2463 start = start.normalize() 

2464 _normalized = True 

2465 else: 

2466 _normalized = _normalized and start.time() == _midnight 

2467 

2468 if end is not None: 

2469 if normalize: 

2470 end = end.normalize() 

2471 _normalized = True 

2472 else: 

2473 _normalized = _normalized and end.time() == _midnight 

2474 

2475 return start, end, _normalized 

2476 

2477 

2478def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent): 

2479 """ 

2480 Localize a start or end Timestamp to the timezone of the corresponding 

2481 start or end Timestamp 

2482 

2483 Parameters 

2484 ---------- 

2485 ts : start or end Timestamp to potentially localize 

2486 is_none : argument that should be None 

2487 is_not_none : argument that should not be None 

2488 freq : Tick, DateOffset, or None 

2489 tz : str, timezone object or None 

2490 ambiguous: str, localization behavior for ambiguous times 

2491 nonexistent: str, localization behavior for nonexistent times 

2492 

2493 Returns 

2494 ------- 

2495 ts : Timestamp 

2496 """ 

2497 # Make sure start and end are timezone localized if: 

2498 # 1) freq = a Timedelta-like frequency (Tick) 

2499 # 2) freq = None i.e. generating a linspaced range 

2500 if is_none is None and is_not_none is not None: 

2501 # Note: We can't ambiguous='infer' a singular ambiguous time; however, 

2502 # we have historically defaulted ambiguous=False 

2503 ambiguous = ambiguous if ambiguous != "infer" else False 

2504 localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None} 

2505 if isinstance(freq, Tick) or freq is None: 

2506 localize_args["tz"] = tz 

2507 ts = ts.tz_localize(**localize_args) 

2508 return ts 

2509 

2510 

2511def generate_range(start=None, end=None, periods=None, offset=BDay()): 

2512 """ 

2513 Generates a sequence of dates corresponding to the specified time 

2514 offset. Similar to dateutil.rrule except uses pandas DateOffset 

2515 objects to represent time increments. 

2516 

2517 Parameters 

2518 ---------- 

2519 start : datetime, (default None) 

2520 end : datetime, (default None) 

2521 periods : int, (default None) 

2522 offset : DateOffset, (default BDay()) 

2523 

2524 Notes 

2525 ----- 

2526 * This method is faster for generating weekdays than dateutil.rrule 

2527 * At least two of (start, end, periods) must be specified. 

2528 * If both start and end are specified, the returned dates will 

2529 satisfy start <= date <= end. 

2530 

2531 Returns 

2532 ------- 

2533 dates : generator object 

2534 """ 

2535 offset = to_offset(offset) 

2536 

2537 start = Timestamp(start) 

2538 start = start if start is not NaT else None 

2539 end = Timestamp(end) 

2540 end = end if end is not NaT else None 

2541 

2542 if start and not offset.is_on_offset(start): 

2543 start = offset.rollforward(start) 

2544 

2545 elif end and not offset.is_on_offset(end): 

2546 end = offset.rollback(end) 

2547 

2548 if periods is None and end < start and offset.n >= 0: 

2549 end = None 

2550 periods = 0 

2551 

2552 if end is None: 

2553 end = start + (periods - 1) * offset 

2554 

2555 if start is None: 

2556 start = end - (periods - 1) * offset 

2557 

2558 cur = start 

2559 if offset.n >= 0: 

2560 while cur <= end: 

2561 yield cur 

2562 

2563 if cur == end: 

2564 # GH#24252 avoid overflows by not performing the addition 

2565 # in offset.apply unless we have to 

2566 break 

2567 

2568 # faster than cur + offset 

2569 next_date = offset._apply(cur) 

2570 if next_date <= cur: 

2571 raise ValueError(f"Offset {offset} did not increment date") 

2572 cur = next_date 

2573 else: 

2574 while cur >= end: 

2575 yield cur 

2576 

2577 if cur == end: 

2578 # GH#24252 avoid overflows by not performing the addition 

2579 # in offset.apply unless we have to 

2580 break 

2581 

2582 # faster than cur + offset 

2583 next_date = offset._apply(cur) 

2584 if next_date >= cur: 

2585 raise ValueError(f"Offset {offset} did not decrement date") 

2586 cur = next_date